summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c10
-rw-r--r--net/9p/protocol.c6
-rw-r--r--net/9p/trans_common.c10
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_rdma.c6
-rw-r--r--net/9p/trans_virtio.c72
-rw-r--r--net/9p/util.c2
-rw-r--r--net/appletalk/ddp.c8
-rw-r--r--net/atm/common.c1
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_stp_if.c11
-rw-r--r--net/can/af_can.c9
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/raw.c4
-rw-r--r--net/ceph/armor.c4
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/osd_client.c624
-rw-r--r--net/core/dev.c98
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/ethtool.c19
-rw-r--r--net/core/pktgen.c5
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c30
-rw-r--r--net/ipv4/fib_frontend.c114
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c47
-rw-r--r--net/ipv4/fib_trie.c18
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c18
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/irda/iriap.c6
-rw-r--r--net/irda/irnet/irnet_ppp.c3
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/rds/cong.c9
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rose/rose_loopback.c13
-rw-r--r--net/rose/rose_route.c20
-rw-r--r--net/rose/rose_subr.c101
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c38
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/svcauth_unix.c18
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_replay.c17
-rw-r--r--net/xfrm/xfrm_state.c21
-rw-r--r--net/xfrm/xfrm_user.c26
58 files changed, 1076 insertions, 437 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 347ec0cd271..2ccbf04d37d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -223,7 +223,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
req = &c->reqs[row][col];
if (!req->tc) {
- req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+ req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
if (!req->wq) {
printk(KERN_ERR "Couldn't grow tag array\n");
return ERR_PTR(-ENOMEM);
@@ -233,17 +233,17 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
P9_TRANS_PREF_PAYLOAD_SEP) {
int alloc_msize = min(c->msize, 4096);
req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
- GFP_KERNEL);
+ GFP_NOFS);
req->tc->capacity = alloc_msize;
req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
- GFP_KERNEL);
+ GFP_NOFS);
req->rc->capacity = alloc_msize;
} else {
req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_KERNEL);
+ GFP_NOFS);
req->tc->capacity = c->msize;
req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_KERNEL);
+ GFP_NOFS);
req->rc->capacity = c->msize;
}
if ((!req->tc) || (!req->rc)) {
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 2ce515b859b..8a4084fa8b5 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -205,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
if (errcode)
break;
- *sptr = kmalloc(len + 1, GFP_KERNEL);
+ *sptr = kmalloc(len + 1, GFP_NOFS);
if (*sptr == NULL) {
errcode = -EFAULT;
break;
@@ -273,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
if (!errcode) {
*wnames =
kmalloc(sizeof(char *) * *nwname,
- GFP_KERNEL);
+ GFP_NOFS);
if (!*wnames)
errcode = -ENOMEM;
}
@@ -317,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
*wqids =
kmalloc(*nwqid *
sizeof(struct p9_qid),
- GFP_KERNEL);
+ GFP_NOFS);
if (*wqids == NULL)
errcode = -ENOMEM;
}
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index d62b9aa58df..9172ab78fcb 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -41,9 +41,9 @@ EXPORT_SYMBOL(p9_release_req_pages);
int
p9_nr_pages(struct p9_req_t *req)
{
- int start_page, end_page;
- start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
- end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+ unsigned long start_page, end_page;
+ start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
+ end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
PAGE_SIZE - 1) >> PAGE_SHIFT;
return end_page - start_page;
}
@@ -69,8 +69,8 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
if (*pdata_off)
- first_page_bytes = min((PAGE_SIZE - *pdata_off),
- req->tc->pbuf_size);
+ first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
+ req->tc->pbuf_size);
rpinfo = req->tc->private;
pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a30471e5174..aa5672b15ea 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -350,7 +350,7 @@ static void p9_read_work(struct work_struct *work)
if (m->req->rc == NULL) {
m->req->rc = kmalloc(sizeof(struct p9_fcall) +
- m->client->msize, GFP_KERNEL);
+ m->client->msize, GFP_NOFS);
if (!m->req->rc) {
m->req = NULL;
err = -ENOMEM;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 29a54ccd213..150e0c4bbf4 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -424,7 +424,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
struct p9_rdma_context *rpl_context = NULL;
/* Allocate an fcall for the reply */
- rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
+ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
if (!rpl_context) {
err = -ENOMEM;
goto err_close;
@@ -437,7 +437,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
*/
if (!req->rc) {
req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
- GFP_KERNEL);
+ GFP_NOFS);
if (req->rc) {
req->rc->sdata = (char *) req->rc +
sizeof(struct p9_fcall);
@@ -468,7 +468,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
req->rc = NULL;
/* Post the request */
- c = kmalloc(sizeof *c, GFP_KERNEL);
+ c = kmalloc(sizeof *c, GFP_NOFS);
if (!c) {
err = -ENOMEM;
goto err_free1;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9b550ed9c71..e8f046b0718 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <linux/scatterlist.h>
+#include <linux/swap.h>
#include <linux/virtio.h>
#include <linux/virtio_9p.h>
#include "trans_common.h"
@@ -51,6 +52,8 @@
/* a single mutex to manage channel initialization and attachment */
static DEFINE_MUTEX(virtio_9p_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
+static atomic_t vp_pinned = ATOMIC_INIT(0);
/**
* struct virtio_chan - per-instance transport information
@@ -78,7 +81,10 @@ struct virtio_chan {
struct virtqueue *vq;
int ring_bufs_avail;
wait_queue_head_t *vc_wq;
-
+ /* This is global limit. Since we don't have a global structure,
+ * will be placing it in each channel.
+ */
+ int p9_max_pages;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[VIRTQUEUE_NUM];
@@ -141,34 +147,36 @@ static void req_done(struct virtqueue *vq)
P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
- do {
+ while (1) {
spin_lock_irqsave(&chan->lock, flags);
rc = virtqueue_get_buf(chan->vq, &len);
- if (rc != NULL) {
- if (!chan->ring_bufs_avail) {
- chan->ring_bufs_avail = 1;
- wake_up(chan->vc_wq);
- }
- spin_unlock_irqrestore(&chan->lock, flags);
- P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
- P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
- rc->tag);
- req = p9_tag_lookup(chan->client, rc->tag);
- req->status = REQ_STATUS_RCVD;
- if (req->tc->private) {
- struct trans_rpage_info *rp = req->tc->private;
- /*Release pages */
- p9_release_req_pages(rp);
- if (rp->rp_alloc)
- kfree(rp);
- req->tc->private = NULL;
- }
- p9_client_cb(chan->client, req);
- } else {
+ if (rc == NULL) {
spin_unlock_irqrestore(&chan->lock, flags);
+ break;
+ }
+
+ chan->ring_bufs_avail = 1;
+ spin_unlock_irqrestore(&chan->lock, flags);
+ /* Wakeup if anyone waiting for VirtIO ring space. */
+ wake_up(chan->vc_wq);
+ P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+ P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
+ req = p9_tag_lookup(chan->client, rc->tag);
+ if (req->tc->private) {
+ struct trans_rpage_info *rp = req->tc->private;
+ int p = rp->rp_nr_pages;
+ /*Release pages */
+ p9_release_req_pages(rp);
+ atomic_sub(p, &vp_pinned);
+ wake_up(&vp_wq);
+ if (rp->rp_alloc)
+ kfree(rp);
+ req->tc->private = NULL;
}
- } while (rc != NULL);
+ req->status = REQ_STATUS_RCVD;
+ p9_client_cb(chan->client, req);
+ }
}
/**
@@ -263,7 +271,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
-req_retry:
req->status = REQ_STATUS_SENT;
if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
@@ -271,6 +278,14 @@ req_retry:
int rpinfo_size = sizeof(struct trans_rpage_info) +
sizeof(struct page *) * nr_pages;
+ if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+ err = wait_event_interruptible(vp_wq,
+ atomic_read(&vp_pinned) < chan->p9_max_pages);
+ if (err == -ERESTARTSYS)
+ return err;
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
+ }
+
if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
/* We can use sdata */
req->tc->private = req->tc->sdata + req->tc->size;
@@ -293,9 +308,12 @@ req_retry:
if (rpinfo->rp_alloc)
kfree(rpinfo);
return err;
+ } else {
+ atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
}
}
+req_retry_pinned:
spin_lock_irqsave(&chan->lock, flags);
/* Handle out VirtIO ring buffers */
@@ -356,7 +374,7 @@ req_retry:
return err;
P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
- goto req_retry;
+ goto req_retry_pinned;
} else {
spin_unlock_irqrestore(&chan->lock, flags);
P9_DPRINTK(P9_DEBUG_TRANS,
@@ -453,6 +471,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
}
init_waitqueue_head(chan->vc_wq);
chan->ring_bufs_avail = 1;
+ /* Ceiling limit to avoid denial of service attacks */
+ chan->p9_max_pages = nr_free_buffer_pages()/4;
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
diff --git a/net/9p/util.c b/net/9p/util.c
index e048701a72d..b84619b5ba2 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p)
unsigned long flags;
retry:
- if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+ if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
return 0;
spin_lock_irqsave(&p->lock, flags);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3d4f4b04340..956a5302002 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1051,13 +1051,17 @@ static int atalk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- lock_sock(sk);
if (sk) {
+ sock_hold(sk);
+ lock_sock(sk);
+
sock_orphan(sk);
sock->sk = NULL;
atalk_destroy_socket(sk);
+
+ release_sock(sk);
+ sock_put(sk);
}
- release_sock(sk);
return 0;
}
diff --git a/net/atm/common.c b/net/atm/common.c
index 1b9c52a02cd..22b963d06a1 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -252,6 +252,7 @@ void atm_dev_release_vccs(struct atm_dev *dev)
}
write_unlock_irq(&vcc_sklist_lock);
}
+EXPORT_SYMBOL(atm_dev_release_vccs);
static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
{
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index dce8f0009a1..718b60366df 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -389,6 +389,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
int err = 0;
+ bool changed_addr;
/* Don't allow bridging non-ethernet like devices */
if ((dev->flags & IFF_LOOPBACK) ||
@@ -446,7 +447,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
list_add_rcu(&p->list, &br->port_list);
spin_lock_bh(&br->lock);
- br_stp_recalculate_bridge_id(br);
+ changed_addr = br_stp_recalculate_bridge_id(br);
br_features_recompute(br);
if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
@@ -456,6 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
br_ifinfo_notify(RTM_NEWLINK, p);
+ if (changed_addr)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
dev_set_mtu(br->dev, br_min_mtu(br));
kobject_uevent(&p->kobj, KOBJ_ADD);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 030a002ff8e..59660c909a7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -445,9 +445,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->payload_len = htons(8 + sizeof(*mldq));
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
+ ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
&ip6h->saddr);
- ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
@@ -1475,7 +1475,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
ip6h->payload_len == 0)
return 0;
- len = ntohs(ip6h->payload_len);
+ len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
if (skb->len < len)
return -EINVAL;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 19e2f46ed08..387013d3374 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -497,7 +497,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br);
extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
extern void br_stp_enable_port(struct net_bridge_port *p);
extern void br_stp_disable_port(struct net_bridge_port *p);
-extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
+extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
extern void br_stp_set_bridge_priority(struct net_bridge *br,
u16 newprio);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 79372d4a405..9b61d09de9b 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -204,7 +204,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
/* called under bridge lock */
-void br_stp_recalculate_bridge_id(struct net_bridge *br)
+bool br_stp_recalculate_bridge_id(struct net_bridge *br)
{
const unsigned char *br_mac_zero =
(const unsigned char *)br_mac_zero_aligned;
@@ -213,7 +213,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
/* user has chosen a value so keep it */
if (br->flags & BR_SET_MAC_ADDR)
- return;
+ return false;
list_for_each_entry(p, &br->port_list, list) {
if (addr == br_mac_zero ||
@@ -222,8 +222,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
}
- if (compare_ether_addr(br->bridge_id.addr, addr))
- br_stp_change_bridge_id(br, addr);
+ if (compare_ether_addr(br->bridge_id.addr, addr) == 0)
+ return false; /* no change */
+
+ br_stp_change_bridge_id(br, addr);
+ return true;
}
/* called under bridge lock */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 702be5a2c95..733d66f1b05 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -95,7 +95,7 @@ struct s_pstats can_pstats; /* receive list statistics */
* af_can socket functions
*/
-static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -108,6 +108,7 @@ static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
}
+EXPORT_SYMBOL(can_ioctl);
static void can_sock_destruct(struct sock *sk)
{
@@ -698,13 +699,9 @@ int can_proto_register(struct can_proto *cp)
printk(KERN_ERR "can: protocol %d already registered\n",
proto);
err = -EBUSY;
- } else {
+ } else
proto_tab[proto] = cp;
- /* use generic ioctl function if not defined by module */
- if (!cp->ops->ioctl)
- cp->ops->ioctl = can_ioctl;
- }
spin_unlock(&proto_tab_lock);
if (err < 0)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 092dc88a7c6..871a0ad5102 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1569,7 +1569,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
return size;
}
-static struct proto_ops bcm_ops __read_mostly = {
+static const struct proto_ops bcm_ops = {
.family = PF_CAN,
.release = bcm_release,
.bind = sock_no_bind,
@@ -1578,7 +1578,7 @@ static struct proto_ops bcm_ops __read_mostly = {
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = datagram_poll,
- .ioctl = NULL, /* use can_ioctl() from af_can.c */
+ .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/can/raw.c b/net/can/raw.c
index 883e9d74fdd..649acfa7c70 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -742,7 +742,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
return size;
}
-static struct proto_ops raw_ops __read_mostly = {
+static const struct proto_ops raw_ops = {
.family = PF_CAN,
.release = raw_release,
.bind = raw_bind,
@@ -751,7 +751,7 @@ static struct proto_ops raw_ops __read_mostly = {
.accept = sock_no_accept,
.getname = raw_getname,
.poll = datagram_poll,
- .ioctl = NULL, /* use can_ioctl() from af_can.c */
+ .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = raw_setsockopt,
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index eb2a666b0be..1fc1ee11dfa 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end)
while (src < end) {
int a, b, c, d;
- if (src < end && src[0] == '\n')
+ if (src[0] == '\n') {
src++;
+ continue;
+ }
if (src + 4 > end)
return -EINVAL;
a = decode_bits(src[0]);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f3e4a13fea0..95f96ab94bb 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -62,6 +62,7 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_OSD_MAP: return "osd_map";
case CEPH_MSG_OSD_OP: return "osd_op";
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+ case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
default: return "unknown";
}
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3e20a122ffa..02212ed5085 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -22,10 +22,15 @@
#define OSD_OPREPLY_FRONT_LEN 512
static const struct ceph_connection_operations osd_con_ops;
-static int __kick_requests(struct ceph_osd_client *osdc,
- struct ceph_osd *kickosd);
-static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void send_queued(struct ceph_osd_client *osdc);
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void __register_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+static int __send_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
static int op_needs_trail(int op)
{
@@ -34,6 +39,7 @@ static int op_needs_trail(int op)
case CEPH_OSD_OP_SETXATTR:
case CEPH_OSD_OP_CMPXATTR:
case CEPH_OSD_OP_CALL:
+ case CEPH_OSD_OP_NOTIFY:
return 1;
default:
return 0;
@@ -209,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
init_completion(&req->r_completion);
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
+ INIT_LIST_HEAD(&req->r_linger_item);
+ INIT_LIST_HEAD(&req->r_linger_osd);
req->r_flags = flags;
WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -315,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
break;
case CEPH_OSD_OP_STARTSYNC:
break;
+ case CEPH_OSD_OP_NOTIFY:
+ {
+ __le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
+ __le32 timeout = cpu_to_le32(src->watch.timeout);
+
+ BUG_ON(!req->r_trail);
+
+ ceph_pagelist_append(req->r_trail,
+ &prot_ver, sizeof(prot_ver));
+ ceph_pagelist_append(req->r_trail,
+ &timeout, sizeof(timeout));
+ }
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ case CEPH_OSD_OP_WATCH:
+ dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+ dst->watch.ver = cpu_to_le64(src->watch.ver);
+ dst->watch.flag = src->watch.flag;
+ break;
default:
pr_err("unrecognized osd opcode %d\n", dst->op);
WARN_ON(1);
@@ -529,6 +555,45 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
return NULL;
}
+/*
+ * Resubmit requests pending on the given osd.
+ */
+static void __kick_osd_requests(struct ceph_osd_client *osdc,
+ struct ceph_osd *osd)
+{
+ struct ceph_osd_request *req, *nreq;
+ int err;
+
+ dout("__kick_osd_requests osd%d\n", osd->o_osd);
+ err = __reset_osd(osdc, osd);
+ if (err == -EAGAIN)
+ return;
+
+ list_for_each_entry(req, &osd->o_requests, r_osd_item) {
+ list_move(&req->r_req_lru_item, &osdc->req_unsent);
+ dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
+ osd->o_osd);
+ if (!req->r_linger)
+ req->r_flags |= CEPH_OSD_FLAG_RETRY;
+ }
+
+ list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
+ r_linger_osd) {
+ __unregister_linger_request(osdc, req);
+ __register_request(osdc, req);
+ list_move(&req->r_req_lru_item, &osdc->req_unsent);
+ dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
+ osd->o_osd);
+ }
+}
+
+static void kick_osd_requests(struct ceph_osd_client *osdc,
+ struct ceph_osd *kickosd)
+{
+ mutex_lock(&osdc->request_mutex);
+ __kick_osd_requests(osdc, kickosd);
+ mutex_unlock(&osdc->request_mutex);
+}
/*
* If the osd connection drops, we need to resubmit all requests.
@@ -543,7 +608,8 @@ static void osd_reset(struct ceph_connection *con)
dout("osd_reset osd%d\n", osd->o_osd);
osdc = osd->o_osdc;
down_read(&osdc->map_sem);
- kick_requests(osdc, osd);
+ kick_osd_requests(osdc, osd);
+ send_queued(osdc);
up_read(&osdc->map_sem);
}
@@ -561,6 +627,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc;
INIT_LIST_HEAD(&osd->o_requests);
+ INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru);
osd->o_incarnation = 1;
@@ -650,7 +717,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
int ret = 0;
dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
- if (list_empty(&osd->o_requests)) {
+ if (list_empty(&osd->o_requests) &&
+ list_empty(&osd->o_linger_requests)) {
__remove_osd(osdc, osd);
} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
&osd->o_con.peer_addr,
@@ -723,10 +791,9 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
* Register request, assign tid. If this is the first request, set up
* the timeout event.
*/
-static void register_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req)
+static void __register_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
{
- mutex_lock(&osdc->request_mutex);
req->r_tid = ++osdc->last_tid;
req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
INIT_LIST_HEAD(&req->r_req_lru_item);
@@ -740,6 +807,13 @@ static void register_request(struct ceph_osd_client *osdc,
dout(" first request, scheduling timeout\n");
__schedule_osd_timeout(osdc);
}
+}
+
+static void register_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ mutex_lock(&osdc->request_mutex);
+ __register_request(osdc, req);
mutex_unlock(&osdc->request_mutex);
}
@@ -758,9 +832,14 @@ static void __unregister_request(struct ceph_osd_client *osdc,
ceph_con_revoke(&req->r_osd->o_con, req->r_request);
list_del_init(&req->r_osd_item);
- if (list_empty(&req->r_osd->o_requests))
+ if (list_empty(&req->r_osd->o_requests) &&
+ list_empty(&req->r_osd->o_linger_requests)) {
+ dout("moving osd to %p lru\n", req->r_osd);
__move_osd_to_lru(osdc, req->r_osd);
- req->r_osd = NULL;
+ }
+ if (list_empty(&req->r_osd_item) &&
+ list_empty(&req->r_linger_item))
+ req->r_osd = NULL;
}
ceph_osdc_put_request(req);
@@ -781,20 +860,72 @@ static void __cancel_request(struct ceph_osd_request *req)
ceph_con_revoke(&req->r_osd->o_con, req->r_request);
req->r_sent = 0;
}
- list_del_init(&req->r_req_lru_item);
}
+static void __register_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ dout("__register_linger_request %p\n", req);
+ list_add_tail(&req->r_linger_item, &osdc->req_linger);
+ list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+}
+
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ dout("__unregister_linger_request %p\n", req);
+ if (req->r_osd) {
+ list_del_init(&req->r_linger_item);
+ list_del_init(&req->r_linger_osd);
+
+ if (list_empty(&req->r_osd->o_requests) &&
+ list_empty(&req->r_osd->o_linger_requests)) {
+ dout("moving osd to %p lru\n", req->r_osd);
+ __move_osd_to_lru(osdc, req->r_osd);
+ }
+ req->r_osd = NULL;
+ }
+}
+
+void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ mutex_lock(&osdc->request_mutex);
+ if (req->r_linger) {
+ __unregister_linger_request(osdc, req);
+ ceph_osdc_put_request(req);
+ }
+ mutex_unlock(&osdc->request_mutex);
+}
+EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
+
+void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ if (!req->r_linger) {
+ dout("set_request_linger %p\n", req);
+ req->r_linger = 1;
+ /*
+ * caller is now responsible for calling
+ * unregister_linger_request
+ */
+ ceph_osdc_get_request(req);
+ }
+}
+EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+
/*
* Pick an osd (the first 'up' osd in the pg), allocate the osd struct
* (as needed), and set the request r_osd appropriately. If there is
- * no up osd, set r_osd to NULL.
+ * no up osd, set r_osd to NULL. Move the request to the appropiate list
+ * (unsent, homeless) or leave on in-flight lru.
*
* Return 0 if unchanged, 1 if changed, or negative on error.
*
* Caller should hold map_sem for read and request_mutex.
*/
-static int __map_osds(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req)
+static int __map_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
{
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg pgid;
@@ -802,11 +933,13 @@ static int __map_osds(struct ceph_osd_client *osdc,
int o = -1, num = 0;
int err;
- dout("map_osds %p tid %lld\n", req, req->r_tid);
+ dout("map_request %p tid %lld\n", req, req->r_tid);
err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
&req->r_file_layout, osdc->osdmap);
- if (err)
+ if (err) {
+ list_move(&req->r_req_lru_item, &osdc->req_notarget);
return err;
+ }
pgid = reqhead->layout.ol_pgid;
req->r_pgid = pgid;
@@ -823,7 +956,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
(req->r_osd == NULL && o == -1))
return 0; /* no change */
- dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
+ dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
req->r_osd ? req->r_osd->o_osd : -1);
@@ -841,10 +974,12 @@ static int __map_osds(struct ceph_osd_client *osdc,
if (!req->r_osd && o >= 0) {
err = -ENOMEM;
req->r_osd = create_osd(osdc);
- if (!req->r_osd)
+ if (!req->r_osd) {
+ list_move(&req->r_req_lru_item, &osdc->req_notarget);
goto out;
+ }
- dout("map_osds osd %p is osd%d\n", req->r_osd, o);
+ dout("map_request osd %p is osd%d\n", req->r_osd, o);
req->r_osd->o_osd = o;
req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
__insert_osd(osdc, req->r_osd);
@@ -855,6 +990,9 @@ static int __map_osds(struct ceph_osd_client *osdc,
if (req->r_osd) {
__remove_osd_from_lru(req->r_osd);
list_add(&req->r_osd_item, &req->r_osd->o_requests);
+ list_move(&req->r_req_lru_item, &osdc->req_unsent);
+ } else {
+ list_move(&req->r_req_lru_item, &osdc->req_notarget);
}
err = 1; /* osd or pg changed */
@@ -869,16 +1007,6 @@ static int __send_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
struct ceph_osd_request_head *reqhead;
- int err;
-
- err = __map_osds(osdc, req);
- if (err < 0)
- return err;
- if (req->r_osd == NULL) {
- dout("send_request %p no up osds in pg\n", req);
- ceph_monc_request_next_osdmap(&osdc->client->monc);
- return 0;
- }
dout("send_request %p tid %llu to osd%d flags %d\n",
req, req->r_tid, req->r_osd->o_osd, req->r_flags);
@@ -898,6 +1026,21 @@ static int __send_request(struct ceph_osd_client *osdc,
}
/*
+ * Send any requests in the queue (req_unsent).
+ */
+static void send_queued(struct ceph_osd_client *osdc)
+{
+ struct ceph_osd_request *req, *tmp;
+
+ dout("send_queued\n");
+ mutex_lock(&osdc->request_mutex);
+ list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
+ __send_request(osdc, req);
+ }
+ mutex_unlock(&osdc->request_mutex);
+}
+
+/*
* Timeout callback, called every N seconds when 1 or more osd
* requests has been active for more than N seconds. When this
* happens, we ping all OSDs with requests who have timed out to
@@ -916,30 +1059,13 @@ static void handle_timeout(struct work_struct *work)
unsigned long keepalive =
osdc->client->options->osd_keepalive_timeout * HZ;
unsigned long last_stamp = 0;
- struct rb_node *p;
struct list_head slow_osds;
-
dout("timeout\n");
down_read(&osdc->map_sem);
ceph_monc_request_next_osdmap(&osdc->client->monc);
mutex_lock(&osdc->request_mutex);
- for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
- req = rb_entry(p, struct ceph_osd_request, r_node);
-
- if (req->r_resend) {
- int err;
-
- dout("osdc resending prev failed %lld\n", req->r_tid);
- err = __send_request(osdc, req);
- if (err)
- dout("osdc failed again on %lld\n", req->r_tid);
- else
- req->r_resend = false;
- continue;
- }
- }
/*
* reset osds that appear to be _really_ unresponsive. this
@@ -963,7 +1089,7 @@ static void handle_timeout(struct work_struct *work)
BUG_ON(!osd);
pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
req->r_tid, osd->o_osd);
- __kick_requests(osdc, osd);
+ __kick_osd_requests(osdc, osd);
}
/*
@@ -991,7 +1117,7 @@ static void handle_timeout(struct work_struct *work)
__schedule_osd_timeout(osdc);
mutex_unlock(&osdc->request_mutex);
-
+ send_queued(osdc);
up_read(&osdc->map_sem);
}
@@ -1035,7 +1161,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
numops * sizeof(struct ceph_osd_op))
goto bad;
dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
-
/* lookup */
mutex_lock(&osdc->request_mutex);
req = __lookup_request(osdc, tid);
@@ -1079,6 +1204,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("handle_reply tid %llu flags %d\n", tid, flags);
+ if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
+ __register_linger_request(osdc, req);
+
/* either this is a read, or we got the safe response */
if (result < 0 ||
(flags & CEPH_OSD_FLAG_ONDISK) ||
@@ -1099,6 +1227,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
}
done:
+ dout("req=%p req->r_linger=%d\n", req, req->r_linger);
ceph_osdc_put_request(req);
return;
@@ -1109,108 +1238,83 @@ bad:
ceph_msg_dump(msg);
}
-
-static int __kick_requests(struct ceph_osd_client *osdc,
- struct ceph_osd *kickosd)
+static void reset_changed_osds(struct ceph_osd_client *osdc)
{
- struct ceph_osd_request *req;
struct rb_node *p, *n;
- int needmap = 0;
- int err;
- dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
- if (kickosd) {
- err = __reset_osd(osdc, kickosd);
- if (err == -EAGAIN)
- return 1;
- } else {
- for (p = rb_first(&osdc->osds); p; p = n) {
- struct ceph_osd *osd =
- rb_entry(p, struct ceph_osd, o_node);
-
- n = rb_next(p);
- if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
- memcmp(&osd->o_con.peer_addr,
- ceph_osd_addr(osdc->osdmap,
- osd->o_osd),
- sizeof(struct ceph_entity_addr)) != 0)
- __reset_osd(osdc, osd);
- }
+ for (p = rb_first(&osdc->osds); p; p = n) {
+ struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
+
+ n = rb_next(p);
+ if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
+ memcmp(&osd->o_con.peer_addr,
+ ceph_osd_addr(osdc->osdmap,
+ osd->o_osd),
+ sizeof(struct ceph_entity_addr)) != 0)
+ __reset_osd(osdc, osd);
}
+}
+
+/*
+ * Requeue requests whose mapping to an OSD has changed. If requests map to
+ * no osd, request a new map.
+ *
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static void kick_requests(struct ceph_osd_client *osdc)
+{
+ struct ceph_osd_request *req, *nreq;
+ struct rb_node *p;
+ int needmap = 0;
+ int err;
+ dout("kick_requests\n");
+ mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_osd_request, r_node);
-
- if (req->r_resend) {
- dout(" r_resend set on tid %llu\n", req->r_tid);
- __cancel_request(req);
- goto kick;
- }
- if (req->r_osd && kickosd == req->r_osd) {
- __cancel_request(req);
- goto kick;
+ err = __map_request(osdc, req);
+ if (err < 0)
+ continue; /* error */
+ if (req->r_osd == NULL) {
+ dout("%p tid %llu maps to no osd\n", req, req->r_tid);
+ needmap++; /* request a newer map */
+ } else if (err > 0) {
+ dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ if (!req->r_linger)
+ req->r_flags |= CEPH_OSD_FLAG_RETRY;
}
+ }
+
+ list_for_each_entry_safe(req, nreq, &osdc->req_linger,
+ r_linger_item) {
+ dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
- err = __map_osds(osdc, req);
+ err = __map_request(osdc, req);
if (err == 0)
- continue; /* no change */
- if (err < 0) {
- /*
- * FIXME: really, we should set the request
- * error and fail if this isn't a 'nofail'
- * request, but that's a fair bit more
- * complicated to do. So retry!
- */
- dout(" setting r_resend on %llu\n", req->r_tid);
- req->r_resend = true;
- continue;
- }
+ continue; /* no change and no osd was specified */
+ if (err < 0)
+ continue; /* hrm! */
if (req->r_osd == NULL) {
dout("tid %llu maps to no valid osd\n", req->r_tid);
needmap++; /* request a newer map */
continue;
}
-kick:
- dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
+ dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1);
- req->r_flags |= CEPH_OSD_FLAG_RETRY;
- err = __send_request(osdc, req);
- if (err) {
- dout(" setting r_resend on %llu\n", req->r_tid);
- req->r_resend = true;
- }
+ __unregister_linger_request(osdc, req);
+ __register_request(osdc, req);
}
-
- return needmap;
-}
-
-/*
- * Resubmit osd requests whose osd or osd address has changed. Request
- * a new osd map if osds are down, or we are otherwise unable to determine
- * how to direct a request.
- *
- * Close connections to down osds.
- *
- * If @who is specified, resubmit requests for that specific osd.
- *
- * Caller should hold map_sem for read and request_mutex.
- */
-static void kick_requests(struct ceph_osd_client *osdc,
- struct ceph_osd *kickosd)
-{
- int needmap;
-
- mutex_lock(&osdc->request_mutex);
- needmap = __kick_requests(osdc, kickosd);
mutex_unlock(&osdc->request_mutex);
if (needmap) {
dout("%d requests for down osds, need new map\n", needmap);
ceph_monc_request_next_osdmap(&osdc->client->monc);
}
-
}
+
+
/*
* Process updated osd map.
*
@@ -1263,6 +1367,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_osdmap_destroy(osdc->osdmap);
osdc->osdmap = newmap;
}
+ kick_requests(osdc);
+ reset_changed_osds(osdc);
} else {
dout("ignoring incremental map %u len %d\n",
epoch, maplen);
@@ -1300,6 +1406,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
osdc->osdmap = newmap;
if (oldmap)
ceph_osdmap_destroy(oldmap);
+ kick_requests(osdc);
}
p += maplen;
nr_maps--;
@@ -1308,8 +1415,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
- if (newmap)
- kick_requests(osdc, NULL);
+ send_queued(osdc);
up_read(&osdc->map_sem);
wake_up_all(&osdc->client->auth_wq);
return;
@@ -1322,6 +1428,223 @@ bad:
}
/*
+ * watch/notify callback event infrastructure
+ *
+ * These callbacks are used both for watch and notify operations.
+ */
+static void __release_event(struct kref *kref)
+{
+ struct ceph_osd_event *event =
+ container_of(kref, struct ceph_osd_event, kref);
+
+ dout("__release_event %p\n", event);
+ kfree(event);
+}
+
+static void get_event(struct ceph_osd_event *event)
+{
+ kref_get(&event->kref);
+}
+
+void ceph_osdc_put_event(struct ceph_osd_event *event)
+{
+ kref_put(&event->kref, __release_event);
+}
+EXPORT_SYMBOL(ceph_osdc_put_event);
+
+static void __insert_event(struct ceph_osd_client *osdc,
+ struct ceph_osd_event *new)
+{
+ struct rb_node **p = &osdc->event_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_osd_event *event = NULL;
+
+ while (*p) {
+ parent = *p;
+ event = rb_entry(parent, struct ceph_osd_event, node);
+ if (new->cookie < event->cookie)
+ p = &(*p)->rb_left;
+ else if (new->cookie > event->cookie)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &osdc->event_tree);
+}
+
+static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
+ u64 cookie)
+{
+ struct rb_node **p = &osdc->event_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_osd_event *event = NULL;
+
+ while (*p) {
+ parent = *p;
+ event = rb_entry(parent, struct ceph_osd_event, node);
+ if (cookie < event->cookie)
+ p = &(*p)->rb_left;
+ else if (cookie > event->cookie)
+ p = &(*p)->rb_right;
+ else
+ return event;
+ }
+ return NULL;
+}
+
+static void __remove_event(struct ceph_osd_event *event)
+{
+ struct ceph_osd_client *osdc = event->osdc;
+
+ if (!RB_EMPTY_NODE(&event->node)) {
+ dout("__remove_event removed %p\n", event);
+ rb_erase(&event->node, &osdc->event_tree);
+ ceph_osdc_put_event(event);
+ } else {
+ dout("__remove_event didn't remove %p\n", event);
+ }
+}
+
+int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+ void (*event_cb)(u64, u64, u8, void *),
+ int one_shot, void *data,
+ struct ceph_osd_event **pevent)
+{
+ struct ceph_osd_event *event;
+
+ event = kmalloc(sizeof(*event), GFP_NOIO);
+ if (!event)
+ return -ENOMEM;
+
+ dout("create_event %p\n", event);
+ event->cb = event_cb;
+ event->one_shot = one_shot;
+ event->data = data;
+ event->osdc = osdc;
+ INIT_LIST_HEAD(&event->osd_node);
+ kref_init(&event->kref); /* one ref for us */
+ kref_get(&event->kref); /* one ref for the caller */
+ init_completion(&event->completion);
+
+ spin_lock(&osdc->event_lock);
+ event->cookie = ++osdc->event_count;
+ __insert_event(osdc, event);
+ spin_unlock(&osdc->event_lock);
+
+ *pevent = event;
+ return 0;
+}
+EXPORT_SYMBOL(ceph_osdc_create_event);
+
+void ceph_osdc_cancel_event(struct ceph_osd_event *event)
+{
+ struct ceph_osd_client *osdc = event->osdc;
+
+ dout("cancel_event %p\n", event);
+ spin_lock(&osdc->event_lock);
+ __remove_event(event);
+ spin_unlock(&osdc->event_lock);
+ ceph_osdc_put_event(event); /* caller's */
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_event);
+
+
+static void do_event_work(struct work_struct *work)
+{
+ struct ceph_osd_event_work *event_work =
+ container_of(work, struct ceph_osd_event_work, work);
+ struct ceph_osd_event *event = event_work->event;
+ u64 ver = event_work->ver;
+ u64 notify_id = event_work->notify_id;
+ u8 opcode = event_work->opcode;
+
+ dout("do_event_work completing %p\n", event);
+ event->cb(ver, notify_id, opcode, event->data);
+ complete(&event->completion);
+ dout("do_event_work completed %p\n", event);
+ ceph_osdc_put_event(event);
+ kfree(event_work);
+}
+
+
+/*
+ * Process osd watch notifications
+ */
+void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+{
+ void *p, *end;
+ u8 proto_ver;
+ u64 cookie, ver, notify_id;
+ u8 opcode;
+ struct ceph_osd_event *event;
+ struct ceph_osd_event_work *event_work;
+
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ ceph_decode_8_safe(&p, end, proto_ver, bad);
+ ceph_decode_8_safe(&p, end, opcode, bad);
+ ceph_decode_64_safe(&p, end, cookie, bad);
+ ceph_decode_64_safe(&p, end, ver, bad);
+ ceph_decode_64_safe(&p, end, notify_id, bad);
+
+ spin_lock(&osdc->event_lock);
+ event = __find_event(osdc, cookie);
+ if (event) {
+ get_event(event);
+ if (event->one_shot)
+ __remove_event(event);
+ }
+ spin_unlock(&osdc->event_lock);
+ dout("handle_watch_notify cookie %lld ver %lld event %p\n",
+ cookie, ver, event);
+ if (event) {
+ event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
+ INIT_WORK(&event_work->work, do_event_work);
+ if (!event_work) {
+ dout("ERROR: could not allocate event_work\n");
+ goto done_err;
+ }
+ event_work->event = event;
+ event_work->ver = ver;
+ event_work->notify_id = notify_id;
+ event_work->opcode = opcode;
+ if (!queue_work(osdc->notify_wq, &event_work->work)) {
+ dout("WARNING: failed to queue notify event work\n");
+ goto done_err;
+ }
+ }
+
+ return;
+
+done_err:
+ complete(&event->completion);
+ ceph_osdc_put_event(event);
+ return;
+
+bad:
+ pr_err("osdc handle_watch_notify corrupt msg\n");
+ return;
+}
+
+int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
+{
+ int err;
+
+ dout("wait_event %p\n", event);
+ err = wait_for_completion_interruptible_timeout(&event->completion,
+ timeout * HZ);
+ ceph_osdc_put_event(event);
+ if (err > 0)
+ err = 0;
+ dout("wait_event %p returns %d\n", event, err);
+ return err;
+}
+EXPORT_SYMBOL(ceph_osdc_wait_event);
+
+/*
* Register request, send initial attempt.
*/
int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1347,15 +1670,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
* the request still han't been touched yet.
*/
if (req->r_sent == 0) {
- rc = __send_request(osdc, req);
- if (rc) {
- if (nofail) {
- dout("osdc_start_request failed send, "
- " marking %lld\n", req->r_tid);
- req->r_resend = true;
- rc = 0;
- } else {
- __unregister_request(osdc, req);
+ rc = __map_request(osdc, req);
+ if (rc < 0)
+ return rc;
+ if (req->r_osd == NULL) {
+ dout("send_request %p no up osds in pg\n", req);
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+ } else {
+ rc = __send_request(osdc, req);
+ if (rc) {
+ if (nofail) {
+ dout("osdc_start_request failed send, "
+ " will retry %lld\n", req->r_tid);
+ rc = 0;
+ } else {
+ __unregister_request(osdc, req);
+ }
}
}
}
@@ -1441,9 +1771,15 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
INIT_LIST_HEAD(&osdc->osd_lru);
osdc->requests = RB_ROOT;
INIT_LIST_HEAD(&osdc->req_lru);
+ INIT_LIST_HEAD(&osdc->req_unsent);
+ INIT_LIST_HEAD(&osdc->req_notarget);
+ INIT_LIST_HEAD(&osdc->req_linger);
osdc->num_requests = 0;
INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+ spin_lock_init(&osdc->event_lock);
+ osdc->event_tree = RB_ROOT;
+ osdc->event_count = 0;
schedule_delayed_work(&osdc->osds_timeout_work,
round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
@@ -1463,6 +1799,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
"osd_op_reply");
if (err < 0)
goto out_msgpool;
+
+ osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+ if (IS_ERR(osdc->notify_wq)) {
+ err = PTR_ERR(osdc->notify_wq);
+ osdc->notify_wq = NULL;
+ goto out_msgpool;
+ }
return 0;
out_msgpool:
@@ -1476,6 +1819,8 @@ EXPORT_SYMBOL(ceph_osdc_init);
void ceph_osdc_stop(struct ceph_osd_client *osdc)
{
+ flush_workqueue(osdc->notify_wq);
+ destroy_workqueue(osdc->notify_wq);
cancel_delayed_work_sync(&osdc->timeout_work);
cancel_delayed_work_sync(&osdc->osds_timeout_work);
if (osdc->osdmap) {
@@ -1483,6 +1828,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
osdc->osdmap = NULL;
}
remove_old_osds(osdc, 1);
+ WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
mempool_destroy(osdc->req_mempool);
ceph_msgpool_destroy(&osdc->msgpool_op);
ceph_msgpool_destroy(&osdc->msgpool_op_reply);
@@ -1591,6 +1937,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
case CEPH_MSG_OSD_OPREPLY:
handle_reply(osdc, msg, con);
break;
+ case CEPH_MSG_WATCH_NOTIFY:
+ handle_watch_notify(osdc, msg);
+ break;
default:
pr_err("received unknown message type %d %s\n", type,
@@ -1684,6 +2033,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
switch (type) {
case CEPH_MSG_OSD_MAP:
+ case CEPH_MSG_WATCH_NOTIFY:
return ceph_msg_new(type, front, GFP_NOFS);
case CEPH_MSG_OSD_OPREPLY:
return get_reply(con, hdr, skip);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b88eba97da..3da9fb06d47 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1140,9 +1140,6 @@ static int __dev_open(struct net_device *dev)
ASSERT_RTNL();
- /*
- * Is it even present?
- */
if (!netif_device_present(dev))
return -ENODEV;
@@ -1151,9 +1148,6 @@ static int __dev_open(struct net_device *dev)
if (ret)
return ret;
- /*
- * Call device private open method
- */
set_bit(__LINK_STATE_START, &dev->state);
if (ops->ndo_validate_addr)
@@ -1162,31 +1156,12 @@ static int __dev_open(struct net_device *dev)
if (!ret && ops->ndo_open)
ret = ops->ndo_open(dev);
- /*
- * If it went open OK then:
- */
-
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
else {
- /*
- * Set the flags.
- */
dev->flags |= IFF_UP;
-
- /*
- * Enable NET_DMA
- */
net_dmaengine_get();
-
- /*
- * Initialize multicasting status
- */
dev_set_rx_mode(dev);
-
- /*
- * Wakeup transmit queue engine
- */
dev_activate(dev);
}
@@ -1209,22 +1184,13 @@ int dev_open(struct net_device *dev)
{
int ret;
- /*
- * Is it already up?
- */
if (dev->flags & IFF_UP)
return 0;
- /*
- * Open device
- */
ret = __dev_open(dev);
if (ret < 0)
return ret;
- /*
- * ... and announce new interface.
- */
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
call_netdevice_notifiers(NETDEV_UP, dev);
@@ -1240,10 +1206,6 @@ static int __dev_close_many(struct list_head *head)
might_sleep();
list_for_each_entry(dev, head, unreg_list) {
- /*
- * Tell people we are going down, so that they can
- * prepare to death, when device is still operating.
- */
call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
clear_bit(__LINK_STATE_START, &dev->state);
@@ -1272,15 +1234,7 @@ static int __dev_close_many(struct list_head *head)
if (ops->ndo_stop)
ops->ndo_stop(dev);
- /*
- * Device is now down.
- */
-
dev->flags &= ~IFF_UP;
-
- /*
- * Shutdown NET_DMA
- */
net_dmaengine_put();
}
@@ -1309,9 +1263,6 @@ static int dev_close_many(struct list_head *head)
__dev_close_many(head);
- /*
- * Tell people we are down
- */
list_for_each_entry(dev, head, unreg_list) {
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
call_netdevice_notifiers(NETDEV_DOWN, dev);
@@ -1353,14 +1304,17 @@ EXPORT_SYMBOL(dev_close);
*/
void dev_disable_lro(struct net_device *dev)
{
- if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
- dev->ethtool_ops->set_flags) {
- u32 flags = dev->ethtool_ops->get_flags(dev);
- if (flags & ETH_FLAG_LRO) {
- flags &= ~ETH_FLAG_LRO;
- dev->ethtool_ops->set_flags(dev, flags);
- }
- }
+ u32 flags;
+
+ if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
+ flags = dev->ethtool_ops->get_flags(dev);
+ else
+ flags = ethtool_op_get_flags(dev);
+
+ if (!(flags & ETH_FLAG_LRO))
+ return;
+
+ __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
WARN_ON(dev->features & NETIF_F_LRO);
}
EXPORT_SYMBOL(dev_disable_lro);
@@ -1368,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro);
static int dev_boot_phase = 1;
-/*
- * Device change register/unregister. These are not inline or static
- * as we export them to the world.
- */
-
/**
* register_netdevice_notifier - register a network notifier block
* @nb: notifier
@@ -1474,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
ASSERT_RTNL();
return raw_notifier_call_chain(&netdev_chain, val, dev);
}
+EXPORT_SYMBOL(call_netdevice_notifiers);
/* When > 0 there are consumers of rx skb time stamps */
static atomic_t netstamp_needed = ATOMIC_INIT(0);
@@ -1504,6 +1454,27 @@ static inline void net_timestamp_check(struct sk_buff *skb)
__net_timestamp(skb);
}
+static inline bool is_skb_forwardable(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ unsigned int len;
+
+ if (!(dev->flags & IFF_UP))
+ return false;
+
+ len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
+ if (skb->len <= len)
+ return true;
+
+ /* if TSO is enabled, we don't care about the length as the packet
+ * could be forwarded without being segmented before
+ */
+ if (skb_is_gso(skb))
+ return true;
+
+ return false;
+}
+
/**
* dev_forward_skb - loopback an skb to another netif
*
@@ -1527,8 +1498,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_orphan(skb);
nf_reset(skb);
- if (unlikely(!(dev->flags & IFF_UP) ||
- (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+ if (unlikely(!is_skb_forwardable(dev, skb))) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce..706502ff64a 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void)
struct per_cpu_dm_data *data;
int cpu, rc;
- printk(KERN_INFO "Initalizing network drop monitor service\n");
+ printk(KERN_INFO "Initializing network drop monitor service\n");
if (sizeof(void *) > 8) {
printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a1086fb0c0c..74ead9eca12 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -141,9 +141,24 @@ u32 ethtool_op_get_flags(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_flags);
+/* Check if device can enable (or disable) particular feature coded in "data"
+ * argument. Flags "supported" describe features that can be toggled by device.
+ * If feature can not be toggled, it state (enabled or disabled) must match
+ * hardcoded device features state, otherwise flags are marked as invalid.
+ */
+bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
+{
+ u32 features = dev->features & flags_dup_features;
+ /* "data" can contain only flags_dup_features bits,
+ * see __ethtool_set_flags */
+
+ return (features & ~supported) != (data & ~supported);
+}
+EXPORT_SYMBOL(ethtool_invalid_flags);
+
int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
{
- if (data & ~supported)
+ if (ethtool_invalid_flags(dev, data, supported))
return -EINVAL;
dev->features = ((dev->features & ~flags_dup_features) |
@@ -513,7 +528,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
}
}
-static int __ethtool_set_flags(struct net_device *dev, u32 data)
+int __ethtool_set_flags(struct net_device *dev, u32 data)
{
u32 changed;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0c55eaa70e3..aeeece72b72 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3761,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu)
list_add_tail(&t->th_list, &pktgen_threads);
init_completion(&t->start_done);
- p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+ p = kthread_create_on_node(pktgen_thread_worker,
+ t,
+ cpu_to_node(cpu),
+ "kpktgend_%d", cpu);
if (IS_ERR(p)) {
pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
list_del(&t->th_list);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090d273d786..1b74d3b6437 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -215,6 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
case ARPHRD_INFINIBAND:
ip_ib_mc_map(addr, dev->broadcast, haddr);
return 0;
+ case ARPHRD_IPGRE:
+ ip_ipgre_mc_map(addr, dev->broadcast, haddr);
+ return 0;
default:
if (dir) {
memcpy(haddr, dev->broadcast, dev->addr_len);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6d85800daeb..5345b0bee6d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,6 +64,8 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include "fib_lookup.h"
+
static struct ipv4_devconf ipv4_devconf = {
.data = {
[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -151,6 +153,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
break;
}
}
+ if (!result) {
+ struct flowi4 fl4 = { .daddr = addr };
+ struct fib_result res = { 0 };
+ struct fib_table *local;
+
+ /* Fallback to FIB local table so that communication
+ * over loopback subnets work.
+ */
+ local = fib_get_table(net, RT_TABLE_LOCAL);
+ if (local &&
+ !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
+ res.type == RTN_LOCAL)
+ result = FIB_RES_DEV(res);
+ }
if (result && devref)
dev_hold(result);
rcu_read_unlock();
@@ -345,6 +361,17 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
}
+ /* On promotion all secondaries from subnet are changing
+ * the primary IP, we must remove all their routes silently
+ * and later to add them back with new prefsrc. Do this
+ * while all addresses are on the device list.
+ */
+ for (ifa = promote; ifa; ifa = ifa->ifa_next) {
+ if (ifa1->ifa_mask == ifa->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, ifa))
+ fib_del_ifaddr(ifa, ifa1);
+ }
+
/* 2. Unlink it */
*ifap = ifa1->ifa_next;
@@ -364,6 +391,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (promote) {
+ struct in_ifaddr *next_sec = promote->ifa_next;
if (prev_prom) {
prev_prom->ifa_next = promote->ifa_next;
@@ -375,7 +403,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_UP, promote);
- for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
+ for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
if (ifa1->ifa_mask != ifa->ifa_mask ||
!inet_ifa_match(ifa1->ifa_address, ifa))
continue;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a373a259253..451088330bb 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
if (res.type != RTN_LOCAL || !accept_local)
goto e_inval;
}
- *spec_dst = FIB_RES_PREFSRC(res);
+ *spec_dst = FIB_RES_PREFSRC(net, res);
fib_combine_itag(itag, &res);
dev_match = false;
@@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
ret = 0;
if (fib_lookup(net, &fl4, &res) == 0) {
if (res.type == RTN_UNICAST) {
- *spec_dst = FIB_RES_PREFSRC(res);
+ *spec_dst = FIB_RES_PREFSRC(net, res);
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
}
}
@@ -722,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
}
}
-static void fib_del_ifaddr(struct in_ifaddr *ifa)
+/* Delete primary or secondary address.
+ * Optionally, on secondary address promotion consider the addresses
+ * from subnet iprim as deleted, even if they are in device list.
+ * In this case the secondary ifa can be in device list.
+ */
+void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
{
struct in_device *in_dev = ifa->ifa_dev;
struct net_device *dev = in_dev->dev;
struct in_ifaddr *ifa1;
- struct in_ifaddr *prim = ifa;
+ struct in_ifaddr *prim = ifa, *prim1 = NULL;
__be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
__be32 any = ifa->ifa_address & ifa->ifa_mask;
#define LOCAL_OK 1
@@ -735,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
#define BRD0_OK 4
#define BRD1_OK 8
unsigned ok = 0;
+ int subnet = 0; /* Primary network */
+ int gone = 1; /* Address is missing */
+ int same_prefsrc = 0; /* Another primary with same IP */
- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
- fib_magic(RTM_DELROUTE,
- dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
- else {
+ if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
if (prim == NULL) {
printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
return;
}
+ if (iprim && iprim != prim) {
+ printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
+ return;
+ }
+ } else if (!ipv4_is_zeronet(any) &&
+ (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ any, ifa->ifa_prefixlen, prim);
+ subnet = 1;
}
/* Deletion is more complicated than add.
@@ -755,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
*/
for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+ if (ifa1 == ifa) {
+ /* promotion, keep the IP */
+ gone = 0;
+ continue;
+ }
+ /* Ignore IFAs from our subnet */
+ if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, iprim))
+ continue;
+
+ /* Ignore ifa1 if it uses different primary IP (prefsrc) */
+ if (ifa1->ifa_flags & IFA_F_SECONDARY) {
+ /* Another address from our subnet? */
+ if (ifa1->ifa_mask == prim->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, prim))
+ prim1 = prim;
+ else {
+ /* We reached the secondaries, so
+ * same_prefsrc should be determined.
+ */
+ if (!same_prefsrc)
+ continue;
+ /* Search new prim1 if ifa1 is not
+ * using the current prim1
+ */
+ if (!prim1 ||
+ ifa1->ifa_mask != prim1->ifa_mask ||
+ !inet_ifa_match(ifa1->ifa_address, prim1))
+ prim1 = inet_ifa_byprefix(in_dev,
+ ifa1->ifa_address,
+ ifa1->ifa_mask);
+ if (!prim1)
+ continue;
+ if (prim1->ifa_local != prim->ifa_local)
+ continue;
+ }
+ } else {
+ if (prim->ifa_local != ifa1->ifa_local)
+ continue;
+ prim1 = ifa1;
+ if (prim != prim1)
+ same_prefsrc = 1;
+ }
if (ifa->ifa_local == ifa1->ifa_local)
ok |= LOCAL_OK;
if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -763,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
ok |= BRD1_OK;
if (any == ifa1->ifa_broadcast)
ok |= BRD0_OK;
+ /* primary has network specific broadcasts */
+ if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
+ __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
+ __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
+
+ if (!ipv4_is_zeronet(any1)) {
+ if (ifa->ifa_broadcast == brd1 ||
+ ifa->ifa_broadcast == any1)
+ ok |= BRD_OK;
+ if (brd == brd1 || brd == any1)
+ ok |= BRD1_OK;
+ if (any == brd1 || any == any1)
+ ok |= BRD0_OK;
+ }
+ }
}
if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
- if (!(ok & BRD1_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
- if (!(ok & BRD0_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ if (subnet && ifa->ifa_prefixlen < 31) {
+ if (!(ok & BRD1_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ if (!(ok & BRD0_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ }
if (!(ok & LOCAL_OK)) {
fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
/* Check, that this local address finally disappeared. */
- if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+ if (gone &&
+ inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
/* And the last, but not the least thing.
* We must flush stray FIB entries.
*
@@ -885,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct net_device *dev = ifa->ifa_dev->dev;
+ struct net *net = dev_net(dev);
switch (event) {
case NETDEV_UP:
@@ -892,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
- fib_update_nh_saddrs(dev);
+ atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
- fib_del_ifaddr(ifa);
- fib_update_nh_saddrs(dev);
+ fib_del_ifaddr(ifa, NULL);
+ atomic_inc(&net->ipv4.dev_addr_genid);
if (ifa->ifa_dev->ifa_list == NULL) {
/* Last address was deleted from this interface.
* Disable IP.
@@ -915,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
{
struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct net *net = dev_net(dev);
if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2, -1);
@@ -932,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
+ atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
@@ -990,6 +1068,7 @@ static void ip_fib_net_exit(struct net *net)
fib4_rules_exit(net);
#endif
+ rtnl_lock();
for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
struct fib_table *tb;
struct hlist_head *head;
@@ -1002,6 +1081,7 @@ static void ip_fib_net_exit(struct net *net)
fib_free_table(tb);
}
}
+ rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 4ec323875a0..af0f14aba16 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -10,7 +10,6 @@ struct fib_alias {
struct fib_info *fa_info;
u8 fa_tos;
u8 fa_type;
- u8 fa_scope;
u8 fa_state;
struct rcu_head rcu;
};
@@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *);
extern struct fib_info *fib_create_info(struct fib_config *cfg);
extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, __be32 dst,
+ u32 tb_id, u8 type, __be32 dst,
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 622ac4c9502..641a5a2a9f9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
unsigned int mask = (fib_info_hash_size - 1);
unsigned int val = fi->fib_nhs;
- val ^= fi->fib_protocol;
+ val ^= (fi->fib_protocol << 8) | fi->fib_scope;
val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
for_nexthops(fi) {
@@ -248,10 +248,11 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
if (fi->fib_nhs != nfi->fib_nhs)
continue;
if (nfi->fib_protocol == fi->fib_protocol &&
+ nfi->fib_scope == fi->fib_scope &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
- sizeof(fi->fib_metrics)) == 0 &&
+ sizeof(u32) * RTAX_MAX) == 0 &&
((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
@@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
goto errout;
err = fib_dump_info(skb, info->pid, seq, event, tb_id,
- fa->fa_type, fa->fa_scope, key, dst_len,
+ fa->fa_type, key, dst_len,
fa->fa_tos, fa->fa_info, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -695,6 +696,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
fib_info_hash_free(old_laddrhash, bytes);
}
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+{
+ nh->nh_saddr = inet_select_addr(nh->nh_dev,
+ nh->nh_gw,
+ nh->nh_parent->fib_scope);
+ nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
+
+ return nh->nh_saddr;
+}
+
struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
@@ -753,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_net = hold_net(net);
fi->fib_protocol = cfg->fc_protocol;
+ fi->fib_scope = cfg->fc_scope;
fi->fib_flags = cfg->fc_flags;
fi->fib_priority = cfg->fc_priority;
fi->fib_prefsrc = cfg->fc_prefsrc;
@@ -854,10 +866,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
}
change_nexthops(fi) {
- nexthop_nh->nh_cfg_scope = cfg->fc_scope;
- nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
- nexthop_nh->nh_gw,
- nexthop_nh->nh_cfg_scope);
+ fib_info_update_nh_saddr(net, nexthop_nh);
} endfor_nexthops(fi)
link_it:
@@ -906,7 +915,7 @@ failure:
}
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
+ u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
{
struct nlmsghdr *nlh;
@@ -928,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
NLA_PUT_U32(skb, RTA_TABLE, tb_id);
rtm->rtm_type = type;
rtm->rtm_flags = fi->fib_flags;
- rtm->rtm_scope = scope;
+ rtm->rtm_scope = fi->fib_scope;
rtm->rtm_protocol = fi->fib_protocol;
if (rtm->rtm_dst_len)
@@ -1084,7 +1093,7 @@ void fib_select_default(struct fib_result *res)
list_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
- if (fa->fa_scope != res->scope ||
+ if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
@@ -1128,24 +1137,6 @@ out:
return;
}
-void fib_update_nh_saddrs(struct net_device *dev)
-{
- struct hlist_head *head;
- struct hlist_node *node;
- struct fib_nh *nh;
- unsigned int hash;
-
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_for_each_entry(nh, node, head, nh_hash) {
- if (nh->nh_dev != dev)
- continue;
- nh->nh_saddr = inet_select_addr(nh->nh_dev,
- nh->nh_gw,
- nh->nh_cfg_scope);
- }
-}
-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
/*
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3d28a35c2e1..b92c86f6e9b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
if (fa->fa_type == cfg->fc_type &&
- fa->fa_scope == cfg->fc_scope &&
fa->fa_info == fi) {
fa_match = fa;
break;
@@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_tos = fa->fa_tos;
new_fa->fa_info = fi;
new_fa->fa_type = cfg->fc_type;
- new_fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
new_fa->fa_state = state & ~FA_S_ACCESSED;
@@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
new_fa->fa_type = cfg->fc_type;
- new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
/*
* Insert new entry to the list.
@@ -1362,15 +1359,15 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
continue;
- if (fa->fa_scope < flp->flowi4_scope)
+ if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (err) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
- t->stats.semantic_match_miss++;
+ t->stats.semantic_match_passed++;
#endif
- return 1;
+ return err;
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
@@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
res->prefixlen = plen;
res->nh_sel = nhsel;
res->type = fa->fa_type;
- res->scope = fa->fa_scope;
+ res->scope = fa->fa_info->fib_scope;
res->fi = fi;
res->table = tb;
res->fa_head = &li->falh;
@@ -1664,7 +1661,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
(cfg->fc_scope == RT_SCOPE_NOWHERE ||
- fa->fa_scope == cfg->fc_scope) &&
+ fa->fa_info->fib_scope == cfg->fc_scope) &&
+ (!cfg->fc_prefsrc ||
+ fi->fib_prefsrc == cfg->fc_prefsrc) &&
(!cfg->fc_protocol ||
fi->fib_protocol == cfg->fc_protocol) &&
fib_nh_match(cfg, fi) == 0) {
@@ -1861,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
RTM_NEWROUTE,
tb->tb_id,
fa->fa_type,
- fa->fa_scope,
xkey,
plen,
fa->fa_tos,
@@ -2382,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
seq_indent(seq, iter->depth+1);
seq_printf(seq, " /%d %s %s", li->plen,
rtn_scope(buf1, sizeof(buf1),
- fa->fa_scope),
+ fa->fa_info->fib_scope),
rtn_type(buf2, sizeof(buf2),
fa->fa_type));
if (fa->fa_tos)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1906fa35860..28a736f3442 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
} else {
dopt->ts_needtime = 0;
- if (soffset + 8 <= optlen) {
+ if (soffset + 7 <= optlen) {
__be32 addr;
- memcpy(&addr, sptr+soffset-1, 4);
- if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) {
+ memcpy(&addr, dptr+soffset-1, 4);
+ if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
dopt->ts_needtime = 1;
soffset += 8;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e837ffd3edc..2d3c72e5bbb 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -569,6 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
+ rt = NULL;
goto done;
}
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 870b5182ddd..4b0c8118080 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1593,8 +1593,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
rt->rt_peer_genid = rt_peer_genid();
}
check_peer_pmtu(dst, peer);
-
- inet_putpeer(peer);
}
}
@@ -1720,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
rcu_read_lock();
if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
- src = FIB_RES_PREFSRC(res);
+ src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
else
src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
RT_SCOPE_UNIVERSE);
@@ -2617,7 +2615,7 @@ static struct rtable *ip_route_output_slow(struct net *net,
fib_select_default(&res);
if (!fl4.saddr)
- fl4.saddr = FIB_RES_PREFSRC(res);
+ fl4.saddr = FIB_RES_PREFSRC(net, res);
dev_out = FIB_RES_DEV(res);
fl4.flowi4_oif = dev_out->ifindex;
@@ -3221,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net)
{
get_random_bytes(&net->ipv4.rt_genid,
sizeof(net->ipv4.rt_genid));
+ get_random_bytes(&net->ipv4.dev_addr_genid,
+ sizeof(net->ipv4.dev_addr_genid));
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index da782e7ab16..bef9f04c22b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
#define DBGUNDO(x...) do { } while (0)
#endif
-static void tcp_undo_cwr(struct sock *sk, const int undo)
+static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
else
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
- if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
+ if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
TCP_ECN_withdraw_cwr(tp);
}
} else {
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
}
- tcp_moderate_cwnd(tp);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -2699,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
* or our original transmission succeeded.
*/
DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
mib_idx = LINUX_MIB_TCPLOSSUNDO;
else
@@ -2726,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
if (tp->undo_marker && !tp->undo_retrans) {
DBGUNDO(sk, "D-SACK");
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
tp->undo_marker = 0;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
}
@@ -2779,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
DBGUNDO(sk, "Hoe");
- tcp_undo_cwr(sk, 0);
+ tcp_undo_cwr(sk, false);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
/* So... Do not make Hoe's retransmit yet.
@@ -2808,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk)
DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
@@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk)
static inline void tcp_complete_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ /* Do not moderate cwnd if it's already undone in cwr or recovery */
+ if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+ }
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
@@ -3494,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
if (flag & FLAG_ECE)
tcp_ratehalving_spur_to_response(sk);
else
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
}
/* F-RTO spurious RTO detection algorithm (RFC4138)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7ff0343e05c..29e48593bf2 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -663,7 +663,7 @@ static int pim6_rcv(struct sk_buff *skb)
skb_pull(skb, (u8 *)encap - skb->data);
skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IPV6);
- skb->ip_summed = 0;
+ skb->ip_summed = CHECKSUM_NONE;
skb->pkt_type = PACKET_HOST;
skb_tunnel_rx(skb, reg_dev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0e49c9db3c9..92f952d093d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -341,6 +341,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
case ARPHRD_INFINIBAND:
ipv6_ib_mc_map(addr, dev->broadcast, buf);
return 0;
+ case ARPHRD_IPGRE:
+ return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
default:
if (dir) {
memcpy(buf, dev->broadcast, dev->addr_len);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6814c8722fa..843406f14d7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -854,7 +854,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
int flags = 0;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef79f9..6dcf5e7d661 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
static struct ctl_table empty[1];
+static ctl_table ipv6_static_skeleton[] = {
+ {
+ .procname = "neigh",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = empty,
+ },
+ { }
+};
+
static ctl_table ipv6_table_template[] = {
{
.procname = "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- {
- .procname = "neigh",
- .maxlen = 0,
- .mode = 0555,
- .child = empty,
- },
{ }
};
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
int ipv6_static_sysctl_register(void)
{
- ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
+ ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
if (ip6_base == NULL)
return -ENOMEM;
return 0;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 2731b51923d..9680226640e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -148,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
ipx_remove_socket(sk);
skb_queue_purge(&sk->sk_receive_queue);
sk_refcnt_debug_dec(sk);
- sock_put(sk);
}
/*
@@ -1404,6 +1403,7 @@ static int ipx_release(struct socket *sock)
sk_refcnt_debug_release(sk);
ipx_destroy_socket(sk);
release_sock(sk);
+ sock_put(sk);
out:
return 0;
}
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 5b743bdd89b..36477538cea 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -656,10 +656,16 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
n = 1;
name_len = fp[n++];
+
+ IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;);
+
memcpy(name, fp+n, name_len); n+=name_len;
name[name_len] = '\0';
attr_len = fp[n++];
+
+ IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;);
+
memcpy(attr, fp+n, attr_len); n+=attr_len;
attr[attr_len] = '\0';
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7c567b8aa89..2bb2beb6a37 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -105,6 +105,9 @@ irnet_ctrl_write(irnet_socket * ap,
while(isspace(start[length - 1]))
length--;
+ DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5,
+ -EINVAL, CTRL_ERROR, "Invalid nickname.\n");
+
/* Copy the name for later reuse */
memcpy(ap->rname, start + 5, length - 5);
ap->rname[length - 5] = '\0';
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0accc9..a8193f52c13 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
return 0;
}
-static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+static struct pernet_operations l2tp_eth_net_ops = {
.init = l2tp_eth_init_net,
.id = &l2tp_eth_net_id,
.size = sizeof(struct l2tp_eth_net),
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 5c48ffb60c2..2dc6de13ac1 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
EXPORT_SYMBOL(unregister_ip_vs_app);
EXPORT_SYMBOL(register_ip_vs_app_inc);
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
/*
* Get an ip_vs_app object
*/
@@ -167,14 +169,13 @@ int
register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
int result;
- mutex_lock(&ipvs->app_mutex);
+ mutex_lock(&__ip_vs_app_mutex);
result = ip_vs_app_inc_new(net, app, proto, port);
- mutex_unlock(&ipvs->app_mutex);
+ mutex_unlock(&__ip_vs_app_mutex);
return result;
}
@@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
/* increase the module use count */
ip_vs_use_count_inc();
- mutex_lock(&ipvs->app_mutex);
+ mutex_lock(&__ip_vs_app_mutex);
list_add(&app->a_list, &ipvs->app_list);
- mutex_unlock(&ipvs->app_mutex);
+ mutex_unlock(&__ip_vs_app_mutex);
return 0;
}
@@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
*/
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *inc, *nxt;
- mutex_lock(&ipvs->app_mutex);
+ mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
ip_vs_app_inc_release(net, inc);
@@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
list_del(&app->a_list);
- mutex_unlock(&ipvs->app_mutex);
+ mutex_unlock(&__ip_vs_app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
@@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct netns_ipvs *ipvs = net_ipvs(net);
- mutex_lock(&ipvs->app_mutex);
+ mutex_lock(&__ip_vs_app_mutex);
return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
}
@@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
{
- struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
-
- mutex_unlock(&ipvs->app_mutex);
+ mutex_unlock(&__ip_vs_app_mutex);
}
static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -583,7 +581,6 @@ static int __net_init __ip_vs_app_init(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
INIT_LIST_HEAD(&ipvs->app_list);
- __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b799cea31f9..33733c8872e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3605,7 +3605,7 @@ int __net_init __ip_vs_control_init(struct net *net)
/* procfs stats */
ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
- if (ipvs->tot_stats.cpustats) {
+ if (!ipvs->tot_stats.cpustats) {
pr_err("%s(): alloc_percpu.\n", __func__);
return -ENOMEM;
}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 75ea686f27d..6daaa49d133 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -33,8 +33,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/rbtree.h>
-
-#include <asm-generic/bitops/le.h>
+#include <linux/bitops.h>
#include "rds.h"
@@ -285,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
- generic___set_le_bit(off, (void *)map->m_page_addrs[i]);
+ __set_bit_le(off, (void *)map->m_page_addrs[i]);
}
void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -299,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
- generic___clear_le_bit(off, (void *)map->m_page_addrs[i]);
+ __clear_bit_le(off, (void *)map->m_page_addrs[i]);
}
static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
@@ -310,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
- return generic_test_le_bit(off, (void *)map->m_page_addrs[i]);
+ return test_bit_le(off, (void *)map->m_page_addrs[i]);
}
void rds_cong_add_socket(struct rds_sock *rs)
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5ee0c62046a..a80aef6e3d1 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -978,7 +978,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
struct sock *make;
struct rose_sock *make_rose;
struct rose_facilities_struct facilities;
- int n, len;
+ int n;
skb->sk = NULL; /* Initially we don't know who it's for */
@@ -987,9 +987,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
*/
memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
- len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
- len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
- if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+ if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
+ skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
+ &facilities)) {
rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
return 0;
}
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index ae4a9d99aec..344456206b7 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -73,9 +73,20 @@ static void rose_loopback_timer(unsigned long param)
unsigned int lci_i, lci_o;
while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+ if (skb->len < ROSE_MIN_LEN) {
+ kfree_skb(skb);
+ continue;
+ }
lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
frametype = skb->data[2];
- dest = (rose_address *)(skb->data + 4);
+ if (frametype == ROSE_CALL_REQUEST &&
+ (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
+ skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
+ ROSE_CALL_REQ_ADDR_LEN_VAL)) {
+ kfree_skb(skb);
+ continue;
+ }
+ dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i;
skb_reset_transport_header(skb);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 88a77e90e7e..08dcd2f29cd 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -861,7 +861,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
unsigned int lci, new_lci;
unsigned char cause, diagnostic;
struct net_device *dev;
- int len, res = 0;
+ int res = 0;
char buf[11];
#if 0
@@ -869,10 +869,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return res;
#endif
+ if (skb->len < ROSE_MIN_LEN)
+ return res;
frametype = skb->data[2];
lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
- src_addr = (rose_address *)(skb->data + 9);
- dest_addr = (rose_address *)(skb->data + 4);
+ if (frametype == ROSE_CALL_REQUEST &&
+ (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
+ skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
+ ROSE_CALL_REQ_ADDR_LEN_VAL))
+ return res;
+ src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF);
+ dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
spin_lock_bh(&rose_neigh_list_lock);
spin_lock_bh(&rose_route_list_lock);
@@ -1010,12 +1017,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
goto out;
}
- len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
- len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
-
memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
- if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+ if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
+ skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
+ &facilities)) {
rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
goto out;
}
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 1734abba26a..f6c71caa94b 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -142,7 +142,7 @@ void rose_write_internal(struct sock *sk, int frametype)
*dptr++ = ROSE_GFI | lci1;
*dptr++ = lci2;
*dptr++ = frametype;
- *dptr++ = 0xAA;
+ *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL;
memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN);
dptr += ROSE_ADDR_LEN;
memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
@@ -246,12 +246,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
do {
switch (*p & 0xC0) {
case 0x00:
+ if (len < 2)
+ return -1;
p += 2;
n += 2;
len -= 2;
break;
case 0x40:
+ if (len < 3)
+ return -1;
if (*p == FAC_NATIONAL_RAND)
facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
p += 3;
@@ -260,40 +264,61 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
break;
case 0x80:
+ if (len < 4)
+ return -1;
p += 4;
n += 4;
len -= 4;
break;
case 0xC0:
+ if (len < 2)
+ return -1;
l = p[1];
+ if (len < 2 + l)
+ return -1;
if (*p == FAC_NATIONAL_DEST_DIGI) {
if (!fac_national_digis_received) {
+ if (l < AX25_ADDR_LEN)
+ return -1;
memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
facilities->source_ndigis = 1;
}
}
else if (*p == FAC_NATIONAL_SRC_DIGI) {
if (!fac_national_digis_received) {
+ if (l < AX25_ADDR_LEN)
+ return -1;
memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
facilities->dest_ndigis = 1;
}
}
else if (*p == FAC_NATIONAL_FAIL_CALL) {
+ if (l < AX25_ADDR_LEN)
+ return -1;
memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
}
else if (*p == FAC_NATIONAL_FAIL_ADD) {
+ if (l < 1 + ROSE_ADDR_LEN)
+ return -1;
memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
}
else if (*p == FAC_NATIONAL_DIGIS) {
+ if (l % AX25_ADDR_LEN)
+ return -1;
fac_national_digis_received = 1;
facilities->source_ndigis = 0;
facilities->dest_ndigis = 0;
for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) {
- if (pt[6] & AX25_HBIT)
+ if (pt[6] & AX25_HBIT) {
+ if (facilities->dest_ndigis >= ROSE_MAX_DIGIS)
+ return -1;
memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN);
- else
+ } else {
+ if (facilities->source_ndigis >= ROSE_MAX_DIGIS)
+ return -1;
memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN);
+ }
}
}
p += l + 2;
@@ -314,25 +339,38 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
do {
switch (*p & 0xC0) {
case 0x00:
+ if (len < 2)
+ return -1;
p += 2;
n += 2;
len -= 2;
break;
case 0x40:
+ if (len < 3)
+ return -1;
p += 3;
n += 3;
len -= 3;
break;
case 0x80:
+ if (len < 4)
+ return -1;
p += 4;
n += 4;
len -= 4;
break;
case 0xC0:
+ if (len < 2)
+ return -1;
l = p[1];
+
+ /* Prevent overflows*/
+ if (l < 10 || l > 20)
+ return -1;
+
if (*p == FAC_CCITT_DEST_NSAP) {
memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
memcpy(callsign, p + 12, l - 10);
@@ -355,45 +393,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
return n;
}
-int rose_parse_facilities(unsigned char *p,
+int rose_parse_facilities(unsigned char *p, unsigned packet_len,
struct rose_facilities_struct *facilities)
{
int facilities_len, len;
facilities_len = *p++;
- if (facilities_len == 0)
+ if (facilities_len == 0 || (unsigned)facilities_len > packet_len)
return 0;
- while (facilities_len > 0) {
- if (*p == 0x00) {
- facilities_len--;
- p++;
-
- switch (*p) {
- case FAC_NATIONAL: /* National */
- len = rose_parse_national(p + 1, facilities, facilities_len - 1);
- facilities_len -= len + 1;
- p += len + 1;
- break;
-
- case FAC_CCITT: /* CCITT */
- len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
- facilities_len -= len + 1;
- p += len + 1;
- break;
-
- default:
- printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
- facilities_len--;
- p++;
- break;
- }
- } else
- break; /* Error in facilities format */
+ while (facilities_len >= 3 && *p == 0x00) {
+ facilities_len--;
+ p++;
+
+ switch (*p) {
+ case FAC_NATIONAL: /* National */
+ len = rose_parse_national(p + 1, facilities, facilities_len - 1);
+ break;
+
+ case FAC_CCITT: /* CCITT */
+ len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
+ break;
+
+ default:
+ printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
+ len = 1;
+ break;
+ }
+
+ if (len < 0)
+ return 0;
+ if (WARN_ON(len >= facilities_len))
+ return 0;
+ facilities_len -= len + 1;
+ p += len + 1;
}
- return 1;
+ return facilities_len == 0;
}
static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 152976ec0b7..d5bf91d04f6 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1205,7 +1205,7 @@ SCTP_STATIC __init int sctp_init(void)
if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
continue;
sctp_assoc_hashtable = (struct sctp_hashbucket *)
- __get_free_pages(GFP_ATOMIC, order);
+ __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
} while (!sctp_assoc_hashtable && --order > 0);
if (!sctp_assoc_hashtable) {
pr_err("Failed association hash alloc\n");
@@ -1238,7 +1238,7 @@ SCTP_STATIC __init int sctp_init(void)
if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
continue;
sctp_port_hashtable = (struct sctp_bind_hashbucket *)
- __get_free_pages(GFP_ATOMIC, order);
+ __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
} while (!sctp_port_hashtable && --order > 0);
if (!sctp_port_hashtable) {
pr_err("Failed bind hash alloc\n");
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 8b4061049d7..e3c36a27441 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name)
EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
+struct gss_api_mech *
+gss_mech_get_by_OID(struct xdr_netobj *obj)
+{
+ struct gss_api_mech *pos, *gm = NULL;
+
+ spin_lock(&registered_mechs_lock);
+ list_for_each_entry(pos, &registered_mechs, gm_list) {
+ if (obj->len == pos->gm_oid.len) {
+ if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
+ if (try_module_get(pos->gm_owner))
+ gm = pos;
+ break;
+ }
+ }
+ }
+ spin_unlock(&registered_mechs_lock);
+ return gm;
+
+}
+
+EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
+
static inline int
mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
{
@@ -193,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
+int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
+{
+ struct gss_api_mech *pos = NULL;
+ int i = 0;
+
+ spin_lock(&registered_mechs_lock);
+ list_for_each_entry(pos, &registered_mechs, gm_list) {
+ array_ptr[i] = pos->gm_pfs->pseudoflavor;
+ i++;
+ }
+ spin_unlock(&registered_mechs_lock);
+ return i;
+}
+
+EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
+
u32
gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
{
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ffb687671da..6b43ee7221d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -860,8 +860,10 @@ static void rpc_release_resources_task(struct rpc_task *task)
{
if (task->tk_rqstp)
xprt_release(task);
- if (task->tk_msg.rpc_cred)
+ if (task->tk_msg.rpc_cred) {
put_rpccred(task->tk_msg.rpc_cred);
+ task->tk_msg.rpc_cred = NULL;
+ }
rpc_task_release_client(task);
}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 30916b06c12..c8e10216c11 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -38,6 +38,14 @@ struct unix_domain {
extern struct auth_ops svcauth_unix;
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+ struct unix_domain *ud = container_of(dom, struct unix_domain, h);
+
+ kfree(dom->name);
+ kfree(ud);
+}
+
struct auth_domain *unix_domain_find(char *name)
{
struct auth_domain *rv;
@@ -47,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name)
while(1) {
if (rv) {
if (new && rv != &new->h)
- auth_domain_put(&new->h);
+ svcauth_unix_domain_release(&new->h);
if (rv->flavour != &svcauth_unix) {
auth_domain_put(rv);
@@ -74,14 +82,6 @@ struct auth_domain *unix_domain_find(char *name)
}
EXPORT_SYMBOL_GPL(unix_domain_find);
-static void svcauth_unix_domain_release(struct auth_domain *dom)
-{
- struct unix_domain *ud = container_of(dom, struct unix_domain, h);
-
- kfree(dom->name);
- kfree(ud);
-}
-
/**************************************************
* cache for IP address to unix_domain
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be96d429b47..1e336a06d3e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -710,6 +710,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (sk == NULL)
return;
+ transport->srcport = 0;
+
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 872065ca7f8..a026b0ef244 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -173,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop_unlock;
}
- if (x->props.replay_window && x->repl->check(x, skb, seq)) {
+ if (x->repl->check(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
@@ -190,6 +190,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_SKB_CB(skb)->seq.input.low = seq;
XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
+ skb_dst_force(skb);
+
nexthdr = x->type->input(x, skb);
if (nexthdr == -EINPROGRESS)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 1aba03f449c..47bacd8c025 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -78,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
spin_unlock_bh(&x->lock);
+ skb_dst_force(skb);
+
err = x->type->output(x, skb);
if (err == -EINPROGRESS)
goto out_exit;
@@ -94,7 +96,7 @@ resume:
err = -EHOSTUNREACH;
goto error_nolock;
}
- skb_dst_set(skb, dst_clone(dst));
+ skb_dst_set(skb, dst);
x = dst->xfrm;
} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 2f5be5b1574..f218385950c 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -118,6 +118,9 @@ static int xfrm_replay_check(struct xfrm_state *x,
u32 diff;
u32 seq = ntohl(net_seq);
+ if (!x->props.replay_window)
+ return 0;
+
if (unlikely(seq == 0))
goto err;
@@ -193,9 +196,14 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x,
{
unsigned int bitnr, nr;
struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ u32 pos;
u32 seq = ntohl(net_seq);
u32 diff = replay_esn->seq - seq;
- u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+
+ if (!replay_esn->replay_window)
+ return 0;
+
+ pos = (replay_esn->seq - 1) % replay_esn->replay_window;
if (unlikely(seq == 0))
goto err;
@@ -373,12 +381,17 @@ static int xfrm_replay_check_esn(struct xfrm_state *x,
unsigned int bitnr, nr;
u32 diff;
struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ u32 pos;
u32 seq = ntohl(net_seq);
- u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
u32 wsize = replay_esn->replay_window;
u32 top = replay_esn->seq;
u32 bottom = top - wsize + 1;
+ if (!wsize)
+ return 0;
+
+ pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+
if (unlikely(seq == 0 && replay_esn->seq_hi == 0 &&
(replay_esn->seq < replay_esn->replay_window - 1)))
goto err;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d575f053486..dd78536d40d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1181,6 +1181,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
goto error;
}
+ if (orig->replay_esn) {
+ err = xfrm_replay_clone(x, orig);
+ if (err)
+ goto error;
+ }
+
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
err = xfrm_init_state(x);
@@ -1907,7 +1913,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
return res;
}
-int xfrm_init_state(struct xfrm_state *x)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
{
struct xfrm_state_afinfo *afinfo;
struct xfrm_mode *inner_mode;
@@ -1980,12 +1986,25 @@ int xfrm_init_state(struct xfrm_state *x)
if (x->outer_mode == NULL)
goto error;
+ if (init_replay) {
+ err = xfrm_init_replay(x);
+ if (err)
+ goto error;
+ }
+
x->km.state = XFRM_STATE_VALID;
error:
return err;
}
+EXPORT_SYMBOL(__xfrm_init_state);
+
+int xfrm_init_state(struct xfrm_state *x)
+{
+ return __xfrm_init_state(x, true);
+}
+
EXPORT_SYMBOL(xfrm_init_state);
int __net_init xfrm_state_init(struct net *net)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 706385ae3e4..3d15d3e1b2c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -127,6 +127,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (!rt)
return 0;
+ if (p->id.proto != IPPROTO_ESP)
+ return -EINVAL;
+
if (p->replay_window != 0)
return -EINVAL;
@@ -360,6 +363,23 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
return 0;
}
+static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn,
+ struct nlattr *rp)
+{
+ struct xfrm_replay_state_esn *up;
+
+ if (!replay_esn || !rp)
+ return 0;
+
+ up = nla_data(rp);
+
+ if (xfrm_replay_state_esn_len(replay_esn) !=
+ xfrm_replay_state_esn_len(up))
+ return -EINVAL;
+
+ return 0;
+}
+
static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn,
struct xfrm_replay_state_esn **preplay_esn,
struct nlattr *rta)
@@ -511,7 +531,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
xfrm_mark_get(attrs, &x->mark);
- err = xfrm_init_state(x);
+ err = __xfrm_init_state(x, false);
if (err)
goto error;
@@ -1766,6 +1786,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
if (x->km.state != XFRM_STATE_VALID)
goto out;
+ err = xfrm_replay_verify_len(x->replay_esn, rp);
+ if (err)
+ goto out;
+
spin_lock_bh(&x->lock);
xfrm_update_ae_params(x, attrs);
spin_unlock_bh(&x->lock);