318 files changed, 15693 insertions, 5543 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig
index be33d27c8e6..80d4bf78905 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig
@@ -5,3 +5,6 @@ config STP
 config GARP
 	tristate
 	select STP
+
+config MRP
+	tristate
diff --git a/net/802/Makefile b/net/802/Makefile
index a30d6e385ae..37e654d6615 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
 obj-$(CONFIG_STP)	+= stp.o
 obj-$(CONFIG_GARP)	+= garp.o
+obj-$(CONFIG_MRP)	+= mrp.o
diff --git a/net/802/mrp.c b/net/802/mrp.c
new file mode 100644
index 00000000000..a4cc3229952
--- /dev/null
+++ b/net/802/mrp.c
@@ -0,0 +1,895 @@
+/*
+ *	IEEE 802.1Q Multiple Registration Protocol (MRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/802/garp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/mrp.h>
+#include <asm/unaligned.h>
+
+static unsigned int mrp_join_time __read_mostly = 200;
+module_param(mrp_join_time, uint, 0644);
+MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+MODULE_LICENSE("GPL");
+
+static const u8
+mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VO,
+	},
+	[MRP_APPLICANT_VP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VP,
+	},
+	[MRP_APPLICANT_VN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VN,
+	},
+	[MRP_APPLICANT_AN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AN,
+	},
+	[MRP_APPLICANT_AA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_QA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_LA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_LA,
+	},
+	[MRP_APPLICANT_AO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AO,
+	},
+	[MRP_APPLICANT_QO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_QO,
+	},
+	[MRP_APPLICANT_AP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+	[MRP_APPLICANT_QP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+};
+
+static const u8
+mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	[MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV,
+	[MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL,
+};
+
+static void mrp_attrvalue_inc(void *value, u8 len)
+{
+	u8 *v = (u8 *)value;
+
+	/* Add 1 to the last byte. If it becomes zero,
+	 * go to the previous byte and repeat.
+	 */
+	while (len > 0 && !++v[--len])
+		;
+}
+
+static int mrp_attr_cmp(const struct mrp_attr *attr,
+			 const void *value, u8 len, u8 type)
+{
+	if (attr->type != type)
+		return attr->type - type;
+	if (attr->len != len)
+		return attr->len - len;
+	return memcmp(attr->value, value, len);
+}
+
+static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (parent) {
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			parent = parent->rb_left;
+		else if (d < 0)
+			parent = parent->rb_right;
+		else
+			return attr;
+	}
+	return NULL;
+}
+
+static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = NULL, **p = &app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (*p) {
+		parent = *p;
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			p = &parent->rb_left;
+		else if (d < 0)
+			p = &parent->rb_right;
+		else {
+			/* The attribute already exists; re-use it. */
+			return attr;
+		}
+	}
+	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+	if (!attr)
+		return attr;
+	attr->state = MRP_APPLICANT_VO;
+	attr->type  = type;
+	attr->len   = len;
+	memcpy(attr->value, value, len);
+
+	rb_link_node(&attr->node, parent, p);
+	rb_insert_color(&attr->node, &app->mad);
+	return attr;
+}
+
+static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
+{
+	rb_erase(&attr->node, &app->mad);
+	kfree(attr);
+}
+
+static int mrp_pdu_init(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+	struct mrp_pdu_hdr *ph;
+
+	skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+			GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = app->dev;
+	skb->protocol = app->app->pkttype.type;
+	skb_reserve(skb, LL_RESERVED_SPACE(app->dev));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph));
+	ph->version = app->app->version;
+
+	app->pdu = skb;
+	return 0;
+}
+
+static int mrp_pdu_append_end_mark(struct mrp_applicant *app)
+{
+	__be16 *endmark;
+
+	if (skb_tailroom(app->pdu) < sizeof(*endmark))
+		return -1;
+	endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark));
+	put_unaligned(MRP_END_MARK, endmark);
+	return 0;
+}
+
+static void mrp_pdu_queue(struct mrp_applicant *app)
+{
+	if (!app->pdu)
+		return;
+
+	if (mrp_cb(app->pdu)->mh)
+		mrp_pdu_append_end_mark(app);
+	mrp_pdu_append_end_mark(app);
+
+	dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type),
+			app->app->group_address, app->dev->dev_addr,
+			app->pdu->len);
+
+	skb_queue_tail(&app->queue, app->pdu);
+	app->pdu = NULL;
+}
+
+static void mrp_queue_xmit(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&app->queue)))
+		dev_queue_xmit(skb);
+}
+
+static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app,
+				  u8 attrtype, u8 attrlen)
+{
+	struct mrp_msg_hdr *mh;
+
+	if (mrp_cb(app->pdu)->mh) {
+		if (mrp_pdu_append_end_mark(app) < 0)
+			return -1;
+		mrp_cb(app->pdu)->mh = NULL;
+		mrp_cb(app->pdu)->vah = NULL;
+	}
+
+	if (skb_tailroom(app->pdu) < sizeof(*mh))
+		return -1;
+	mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh));
+	mh->attrtype = attrtype;
+	mh->attrlen = attrlen;
+	mrp_cb(app->pdu)->mh = mh;
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app,
+				      const void *firstattrvalue, u8 attrlen)
+{
+	struct mrp_vecattr_hdr *vah;
+
+	if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen)
+		return -1;
+	vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu,
+						  sizeof(*vah) + attrlen);
+	put_unaligned(0, &vah->lenflags);
+	memcpy(vah->firstattrvalue, firstattrvalue, attrlen);
+	mrp_cb(app->pdu)->vah = vah;
+	memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen);
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app,
+					const struct mrp_attr *attr,
+					enum mrp_vecattr_event vaevent)
+{
+	u16 len, pos;
+	u8 *vaevents;
+	int err;
+again:
+	if (!app->pdu) {
+		err = mrp_pdu_init(app);
+		if (err < 0)
+			return err;
+	}
+
+	/* If there is no Message header in the PDU, or the Message header is
+	 * for a different attribute type, add an EndMark (if necessary) and a
+	 * new Message header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->mh ||
+	    mrp_cb(app->pdu)->mh->attrtype != attr->type ||
+	    mrp_cb(app->pdu)->mh->attrlen != attr->len) {
+		if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0)
+			goto queue;
+	}
+
+	/* If there is no VectorAttribute header for this Message in the PDU,
+	 * or this attribute's value does not sequentially follow the previous
+	 * attribute's value, add a new VectorAttribute header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->vah ||
+	    memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) {
+		if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0)
+			goto queue;
+	}
+
+	len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags));
+	pos = len % 3;
+
+	/* Events are packed into Vectors in the PDU, three to a byte. Add a
+	 * byte to the end of the Vector if necessary.
+	 */
+	if (!pos) {
+		if (skb_tailroom(app->pdu) < sizeof(u8))
+			goto queue;
+		vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8));
+	} else {
+		vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8));
+	}
+
+	switch (pos) {
+	case 0:
+		*vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX *
+				       __MRP_VECATTR_EVENT_MAX);
+		break;
+	case 1:
+		*vaevents += vaevent * __MRP_VECATTR_EVENT_MAX;
+		break;
+	case 2:
+		*vaevents += vaevent;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	/* Increment the length of the VectorAttribute in the PDU, as well as
+	 * the value of the next attribute that would continue its Vector.
+	 */
+	put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags);
+	mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len);
+
+	return 0;
+
+queue:
+	mrp_pdu_queue(app);
+	goto again;
+}
+
+static void mrp_attr_event(struct mrp_applicant *app,
+			   struct mrp_attr *attr, enum mrp_event event)
+{
+	enum mrp_applicant_state state;
+
+	state = mrp_applicant_state_table[attr->state][event];
+	if (state == MRP_APPLICANT_INVALID) {
+		WARN_ON(1);
+		return;
+	}
+
+	if (event == MRP_EVENT_TX) {
+		/* When appending the attribute fails, don't update its state
+		 * in order to retry at the next TX event.
+		 */
+
+		switch (mrp_tx_action_table[attr->state]) {
+		case MRP_TX_ACTION_NONE:
+		case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL:
+		case MRP_TX_ACTION_S_IN_OPTIONAL:
+			break;
+		case MRP_TX_ACTION_S_NEW:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_NEW) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_JOIN_IN:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_LV:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_LV) < 0)
+				return;
+			/* As a pure applicant, sending a leave message
+			 * implies that the attribute was unregistered and
+			 * can be destroyed.
+			 */
+			mrp_attr_destroy(app, attr);
+			return;
+		default:
+			WARN_ON(1);
+		}
+	}
+
+	attr->state = state;
+}
+
+int mrp_request_join(const struct net_device *dev,
+		     const struct mrp_application *appl,
+		     const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -ENOMEM;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_create(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return -ENOMEM;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_JOIN);
+	spin_unlock_bh(&app->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_request_join);
+
+void mrp_request_leave(const struct net_device *dev,
+		       const struct mrp_application *appl,
+		       const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_lookup(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_LV);
+	spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(mrp_request_leave);
+
+static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event)
+{
+	struct rb_node *node, *next;
+	struct mrp_attr *attr;
+
+	for (node = rb_first(&app->mad);
+	     next = node ? rb_next(node) : NULL, node != NULL;
+	     node = next) {
+		attr = rb_entry(node, struct mrp_attr, node);
+		mrp_attr_event(app, attr, event);
+	}
+}
+
+static void mrp_join_timer_arm(struct mrp_applicant *app)
+{
+	unsigned long delay;
+
+	delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32;
+	mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void mrp_join_timer(unsigned long data)
+{
+	struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_queue_xmit(app);
+	mrp_join_timer_arm(app);
+}
+
+static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
+{
+	__be16 endmark;
+
+	if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0)
+		return -1;
+	if (endmark == MRP_END_MARK) {
+		*offset += sizeof(endmark);
+		return -1;
+	}
+	return 0;
+}
+
+static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app,
+					struct sk_buff *skb,
+					enum mrp_vecattr_event vaevent)
+{
+	struct mrp_attr *attr;
+	enum mrp_event event;
+
+	attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue,
+			       mrp_cb(skb)->mh->attrlen,
+			       mrp_cb(skb)->mh->attrtype);
+	if (attr == NULL)
+		return;
+
+	switch (vaevent) {
+	case MRP_VECATTR_EVENT_NEW:
+		event = MRP_EVENT_R_NEW;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_IN:
+		event = MRP_EVENT_R_JOIN_IN;
+		break;
+	case MRP_VECATTR_EVENT_IN:
+		event = MRP_EVENT_R_IN;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_MT:
+		event = MRP_EVENT_R_JOIN_MT;
+		break;
+	case MRP_VECATTR_EVENT_MT:
+		event = MRP_EVENT_R_MT;
+		break;
+	case MRP_VECATTR_EVENT_LV:
+		event = MRP_EVENT_R_LV;
+		break;
+	default:
+		return;
+	}
+
+	mrp_attr_event(app, attr, event);
+}
+
+static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
+				 struct sk_buff *skb, int *offset)
+{
+	struct mrp_vecattr_hdr _vah;
+	u16 valen;
+	u8 vaevents, vaevent;
+
+	mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah),
+					      &_vah);
+	if (!mrp_cb(skb)->vah)
+		return -1;
+	*offset += sizeof(_vah);
+
+	if (get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+	    MRP_VECATTR_HDR_FLAG_LA)
+		mrp_mad_event(app, MRP_EVENT_R_LA);
+	valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+			    MRP_VECATTR_HDR_LEN_MASK);
+
+	/* The VectorAttribute structure in a PDU carries event information
+	 * about one or more attributes having consecutive values. Only the
+	 * value for the first attribute is contained in the structure. So
+	 * we make a copy of that value, and then increment it each time we
+	 * advance to the next event in its Vector.
+	 */
+	if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -1;
+	if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
+			  mrp_cb(skb)->mh->attrlen) < 0)
+		return -1;
+	*offset += mrp_cb(skb)->mh->attrlen;
+
+	/* In a VectorAttribute, the Vector contains events which are packed
+	 * three to a byte. We process one byte of the Vector at a time.
+	 */
+	while (valen > 0) {
+		if (skb_copy_bits(skb, *offset, &vaevents,
+				  sizeof(vaevents)) < 0)
+			return -1;
+		*offset += sizeof(vaevents);
+
+		/* Extract and process the first event. */
+		vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX *
+				      __MRP_VECATTR_EVENT_MAX);
+		if (vaevent >= __MRP_VECATTR_EVENT_MAX) {
+			/* The byte is malformed; stop processing. */
+			return -1;
+		}
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the second event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= (__MRP_VECATTR_EVENT_MAX *
+			     __MRP_VECATTR_EVENT_MAX);
+		vaevent = vaevents / __MRP_VECATTR_EVENT_MAX;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the third event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= __MRP_VECATTR_EVENT_MAX;
+		vaevent = vaevents;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+	}
+	return 0;
+}
+
+static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb,
+			     int *offset)
+{
+	struct mrp_msg_hdr _mh;
+
+	mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh);
+	if (!mrp_cb(skb)->mh)
+		return -1;
+	*offset += sizeof(_mh);
+
+	if (mrp_cb(skb)->mh->attrtype == 0 ||
+	    mrp_cb(skb)->mh->attrtype > app->app->maxattr ||
+	    mrp_cb(skb)->mh->attrlen == 0)
+		return -1;
+
+	while (skb->len > *offset) {
+		if (mrp_pdu_parse_end_mark(skb, offset) < 0)
+			break;
+		if (mrp_pdu_parse_vecattr(app, skb, offset) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static int mrp_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct mrp_application *appl = container_of(pt, struct mrp_application,
+						    pkttype);
+	struct mrp_port *port;
+	struct mrp_applicant *app;
+	struct mrp_pdu_hdr _ph;
+	const struct mrp_pdu_hdr *ph;
+	int offset = skb_network_offset(skb);
+
+	/* If the interface is in promiscuous mode, drop the packet if
+	 * it was unicast to another host.
+	 */
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST))
+		goto out;
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
+	port = rcu_dereference(dev->mrp_port);
+	if (unlikely(!port))
+		goto out;
+	app = rcu_dereference(port->applicants[appl->type]);
+	if (unlikely(!app))
+		goto out;
+
+	ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph);
+	if (!ph)
+		goto out;
+	offset += sizeof(_ph);
+
+	if (ph->version != app->app->version)
+		goto out;
+
+	spin_lock(&app->lock);
+	while (skb->len > offset) {
+		if (mrp_pdu_parse_end_mark(skb, &offset) < 0)
+			break;
+		if (mrp_pdu_parse_msg(app, skb, &offset) < 0)
+			break;
+	}
+	spin_unlock(&app->lock);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mrp_init_port(struct net_device *dev)
+{
+	struct mrp_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+	rcu_assign_pointer(dev->mrp_port, port);
+	return 0;
+}
+
+static void mrp_release_port(struct net_device *dev)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	unsigned int i;
+
+	for (i = 0; i <= MRP_APPLICATION_MAX; i++) {
+		if (rtnl_dereference(port->applicants[i]))
+			return;
+	}
+	RCU_INIT_POINTER(dev->mrp_port, NULL);
+	kfree_rcu(port, rcu);
+}
+
+int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_applicant *app;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!rtnl_dereference(dev->mrp_port)) {
+		err = mrp_init_port(dev);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = -ENOMEM;
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		goto err2;
+
+	err = dev_mc_add(dev, appl->group_address);
+	if (err < 0)
+		goto err3;
+
+	app->dev = dev;
+	app->app = appl;
+	app->mad = RB_ROOT;
+	spin_lock_init(&app->lock);
+	skb_queue_head_init(&app->queue);
+	rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
+	setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
+	mrp_join_timer_arm(app);
+	return 0;
+
+err3:
+	kfree(app);
+err2:
+	mrp_release_port(dev);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(mrp_init_applicant);
+
+void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+
+	ASSERT_RTNL();
+
+	RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+
+	/* Delete timer and generate a final TX event to flush out
+	 * all pending messages before the applicant is gone.
+	 */
+	del_timer_sync(&app->join_timer);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	mrp_queue_xmit(app);
+
+	dev_mc_del(dev, appl->group_address);
+	kfree_rcu(app, rcu);
+	mrp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(mrp_uninit_applicant);
+
+int mrp_register_application(struct mrp_application *appl)
+{
+	appl->pkttype.func = mrp_rcv;
+	dev_add_pack(&appl->pkttype);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_register_application);
+
+void mrp_unregister_application(struct mrp_application *appl)
+{
+	dev_remove_pack(&appl->pkttype);
+}
+EXPORT_SYMBOL_GPL(mrp_unregister_application);
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index fa073a54963..8f7517df41a 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP
 	  automatic propagation of registered VLANs to switches.
 
 	  If unsure, say N.
+
+config VLAN_8021Q_MVRP
+	bool "MVRP (Multiple VLAN Registration Protocol) support"
+	depends on VLAN_8021Q
+	select MRP
+	help
+	  Select this to enable MVRP end-system support. MVRP is used for
+	  automatic propagation of registered VLANs to switches; it
+	  supersedes GVRP and is not backwards-compatible.
+
+	  If unsure, say N.
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 9f4f174ead1..7bc8db08d7e 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q)		+= 8021q.o
 
 8021q-y					:= vlan.o vlan_dev.o vlan_netlink.o
 8021q-$(CONFIG_VLAN_8021Q_GVRP)		+= vlan_gvrp.o
+8021q-$(CONFIG_VLAN_8021Q_MVRP)		+= vlan_mvrp.o
 8021q-$(CONFIG_PROC_FS)			+= vlanproc.o
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a292e8050ef..a18714469bf 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -95,6 +95,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	grp->nr_vlan_devs--;
 
+	if (vlan->flags & VLAN_FLAG_MVRP)
+		vlan_mvrp_request_leave(dev);
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
 
@@ -105,8 +107,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 */
 	unregister_netdevice_queue(dev, head);
 
-	if (grp->nr_vlan_devs == 0)
+	netdev_upper_dev_unlink(real_dev, dev);
+
+	if (grp->nr_vlan_devs == 0) {
+		vlan_mvrp_uninit_applicant(real_dev);
 		vlan_gvrp_uninit_applicant(real_dev);
+	}
 
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
@@ -115,19 +121,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 {
 	const char *name = real_dev->name;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		pr_info("VLANs not supported on %s\n", name);
 		return -EOPNOTSUPP;
 	}
 
-	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
-		pr_info("Device %s has buggy VLAN hw accel\n", name);
-		return -EOPNOTSUPP;
-	}
-
 	if (vlan_find_dev(real_dev, vlan_id) != NULL)
 		return -EEXIST;
 
@@ -156,15 +155,22 @@ int register_vlan_dev(struct net_device *dev)
 		err = vlan_gvrp_init_applicant(real_dev);
 		if (err < 0)
 			goto out_vid_del;
+		err = vlan_mvrp_init_applicant(real_dev);
+		if (err < 0)
+			goto out_uninit_gvrp;
 	}
 
 	err = vlan_group_prealloc_vid(grp, vlan_id);
 	if (err < 0)
-		goto out_uninit_applicant;
+		goto out_uninit_mvrp;
+
+	err = netdev_upper_dev_link(real_dev, dev);
+	if (err)
+		goto out_uninit_mvrp;
 
 	err = register_netdevice(dev);
 	if (err < 0)
-		goto out_uninit_applicant;
+		goto out_upper_dev_unlink;
 
 	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
@@ -180,7 +186,12 @@ int register_vlan_dev(struct net_device *dev)
 
 	return 0;
 
-out_uninit_applicant:
+out_upper_dev_unlink:
+	netdev_upper_dev_unlink(real_dev, dev);
+out_uninit_mvrp:
+	if (grp->nr_vlan_devs == 0)
+		vlan_mvrp_uninit_applicant(real_dev);
+out_uninit_gvrp:
 	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
 out_vid_del:
@@ -654,13 +665,19 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err3;
 
-	err = vlan_netlink_init();
+	err = vlan_mvrp_init();
 	if (err < 0)
 		goto err4;
 
+	err = vlan_netlink_init();
+	if (err < 0)
+		goto err5;
+
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
+err5:
+	vlan_mvrp_uninit();
 err4:
 	vlan_gvrp_uninit();
 err3:
@@ -681,6 +698,7 @@ static void __exit vlan_cleanup_module(void)
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
+	vlan_mvrp_uninit();
 	vlan_gvrp_uninit();
 }
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a4886d94c40..670f1e8cfc0 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; }
 static inline void vlan_gvrp_uninit(void) {}
 #endif
 
+#ifdef CONFIG_VLAN_8021Q_MVRP
+extern int vlan_mvrp_request_join(const struct net_device *dev);
+extern void vlan_mvrp_request_leave(const struct net_device *dev);
+extern int vlan_mvrp_init_applicant(struct net_device *dev);
+extern void vlan_mvrp_uninit_applicant(struct net_device *dev);
+extern int vlan_mvrp_init(void);
+extern void vlan_mvrp_uninit(void);
+#else
+static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; }
+static inline void vlan_mvrp_request_leave(const struct net_device *dev) {}
+static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; }
+static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {}
+static inline int vlan_mvrp_init(void) { return 0; }
+static inline void vlan_mvrp_uninit(void) {}
+#endif
+
 extern const char vlan_fullname[];
 extern const char vlan_version[];
 extern int vlan_netlink_init(void);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 65e06abe023..f3b6f515eba 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -60,21 +60,25 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	return true;
 }
 
-/* Must be invoked with rcu_read_lock or with RTNL. */
-struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
+/* Must be invoked with rcu_read_lock. */
+struct net_device *__vlan_find_dev_deep(struct net_device *dev,
 					u16 vlan_id)
 {
-	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
+	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
 
 	if (vlan_info) {
 		return vlan_group_get_device(&vlan_info->grp, vlan_id);
 	} else {
 		/*
-		 * Bonding slaves do not have grp assigned to themselves.
-		 * Grp is assigned to bonding master instead.
+		 * Lower devices of master uppers (bonding, team) do not have
+		 * grp assigned to themselves. Grp is assigned to upper device
+		 * instead.
 		 */
-		if (netif_is_bond_slave(real_dev))
-			return __vlan_find_dev_deep(real_dev->master, vlan_id);
+		struct net_device *upper_dev;
+
+		upper_dev = netdev_master_upper_dev_get_rcu(dev);
+		if (upper_dev)
+			return __vlan_find_dev_deep(upper_dev, vlan_id);
 	}
 
 	return NULL;
@@ -140,6 +144,7 @@ err_free:
 	kfree_skb(skb);
 	return NULL;
 }
+EXPORT_SYMBOL(vlan_untag);
 
 
 /*
@@ -220,8 +225,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
 	if (!vid_info)
 		return -ENOMEM;
 
-	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    ops->ndo_vlan_rx_add_vid) {
+	if (dev->features & NETIF_F_HW_VLAN_FILTER) {
 		err =  ops->ndo_vlan_rx_add_vid(dev, vid);
 		if (err) {
 			kfree(vid_info);
@@ -278,8 +282,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
 	unsigned short vid = vid_info->vid;
 	int err;
 
-	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	     ops->ndo_vlan_rx_kill_vid) {
+	if (dev->features & NETIF_F_HW_VLAN_FILTER) {
 		err = ops->ndo_vlan_rx_kill_vid(dev, vid);
 		if (err) {
 			pr_warn("failed to kill vid %d for device %s\n",
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4a6d31a082b..19cf81bf9f6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 	u32 old_flags = vlan->flags;
 
 	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		     VLAN_FLAG_LOOSE_BINDING))
+		     VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
@@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 		else
 			vlan_gvrp_request_leave(dev);
 	}
+
+	if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) {
+		if (vlan->flags & VLAN_FLAG_MVRP)
+			vlan_mvrp_request_join(dev);
+		else
+			vlan_mvrp_request_leave(dev);
+	}
 	return 0;
 }
 
@@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
+	if (vlan->flags & VLAN_FLAG_MVRP)
+		vlan_mvrp_request_join(dev);
+
 	if (netif_carrier_ok(real_dev))
 		netif_carrier_on(dev);
 	return 0;
@@ -640,9 +650,9 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
 static void vlan_ethtool_get_drvinfo(struct net_device *dev,
 				     struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, vlan_fullname);
-	strcpy(info->version, vlan_version);
-	strcpy(info->fw_version, "N/A");
+	strlcpy(info->driver, vlan_fullname, sizeof(info->driver));
+	strlcpy(info->version, vlan_version, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
 }
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
@@ -723,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	vlan->netpoll = NULL;
 
-	__netpoll_free_rcu(netpoll);
+	__netpoll_free_async(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
new file mode 100644
index 00000000000..d9ec1d5964a
--- /dev/null
+++ b/net/8021q/vlan_mvrp.c
@@ -0,0 +1,72 @@
+/*
+ *	IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/8021q/vlan_gvrp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/mrp.h>
+#include "vlan.h"
+
+#define MRP_MVRP_ADDRESS	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 }
+
+enum mvrp_attributes {
+	MVRP_ATTR_INVALID,
+	MVRP_ATTR_VID,
+	__MVRP_ATTR_MAX
+};
+#define MVRP_ATTR_MAX	(__MVRP_ATTR_MAX - 1)
+
+static struct mrp_application vlan_mrp_app __read_mostly = {
+	.type		= MRP_APPLICATION_MVRP,
+	.maxattr	= MVRP_ATTR_MAX,
+	.pkttype.type	= htons(ETH_P_MVRP),
+	.group_address	= MRP_MVRP_ADDRESS,
+	.version	= 0,
+};
+
+int vlan_mvrp_request_join(const struct net_device *dev)
+{
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	__be16 vlan_id = htons(vlan->vlan_id);
+
+	return mrp_request_join(vlan->real_dev, &vlan_mrp_app,
+				&vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
+}
+
+void vlan_mvrp_request_leave(const struct net_device *dev)
+{
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	__be16 vlan_id = htons(vlan->vlan_id);
+
+	mrp_request_leave(vlan->real_dev, &vlan_mrp_app,
+			  &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
+}
+
+int vlan_mvrp_init_applicant(struct net_device *dev)
+{
+	return mrp_init_applicant(dev, &vlan_mrp_app);
+}
+
+void vlan_mvrp_uninit_applicant(struct net_device *dev)
+{
+	mrp_uninit_applicant(dev, &vlan_mrp_app);
+}
+
+int __init vlan_mvrp_init(void)
+{
+	return mrp_register_application(&vlan_mrp_app);
+}
+
+void vlan_mvrp_uninit(void)
+{
+	mrp_unregister_application(&vlan_mrp_app);
+}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 708c80ea187..1789658b7cd 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
 		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		      VLAN_FLAG_LOOSE_BINDING))
+		      VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP))
 			return -EINVAL;
 	}
 
diff --git a/net/Kconfig b/net/Kconfig
index 30b48f52313..5a1888bb036 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -209,7 +209,6 @@ source "net/ipx/Kconfig"
 source "drivers/net/appletalk/Kconfig"
 source "net/x25/Kconfig"
 source "net/lapb/Kconfig"
-source "net/wanrouter/Kconfig"
 source "net/phonet/Kconfig"
 source "net/ieee802154/Kconfig"
 source "net/mac802154/Kconfig"
@@ -218,6 +217,7 @@ source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
 source "net/openvswitch/Kconfig"
+source "net/vmw_vsock/Kconfig"
 
 config RPS
 	boolean
@@ -232,7 +232,7 @@ config RFS_ACCEL
 
 config XPS
 	boolean
-	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	depends on SMP && USE_GENERIC_SMP_HELPERS
 	default y
 
 config NETPRIO_CGROUP
diff --git a/net/Makefile b/net/Makefile
index 4f4ee083064..091e7b04f30 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_BRIDGE)		+= bridge/
 obj-$(CONFIG_NET_DSA)		+= dsa/
 obj-$(CONFIG_IPX)		+= ipx/
 obj-$(CONFIG_ATALK)		+= appletalk/
-obj-$(CONFIG_WAN_ROUTER)	+= wanrouter/
 obj-$(CONFIG_X25)		+= x25/
 obj-$(CONFIG_LAPB)		+= lapb/
 obj-$(CONFIG_NETROM)		+= netrom/
@@ -70,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
+obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index a0ba3bff9b3..a4808c29ea3 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 9f3925a85aa..72fe1bbf772 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -123,7 +123,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
 	unsigned int msecs;
 
 	msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
-	msecs += (random32() % 2 * BATADV_JITTER);
+	msecs += prandom_u32() % (2 * BATADV_JITTER);
 
 	return jiffies + msecs_to_jiffies(msecs);
 }
@@ -131,7 +131,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
 /* when do we schedule a ogm packet to be sent */
 static unsigned long batadv_iv_ogm_fwd_send_time(void)
 {
-	return jiffies + msecs_to_jiffies(random32() % (BATADV_JITTER / 2));
+	return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2));
 }
 
 /* apply hop penalty for a normal link */
@@ -183,7 +183,6 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
 	/* adjust all flags and log packets */
 	while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
 					 batadv_ogm_packet->tt_num_changes)) {
-
 		/* we might have aggregated direct link packets with an
 		 * ordinary base packet
 		 */
@@ -261,7 +260,6 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 	 */
 	if ((directlink && (batadv_ogm_packet->header.ttl == 1)) ||
 	    (forw_packet->own && (forw_packet->if_incoming != primary_if))) {
-
 		/* FIXME: what about aggregated packets ? */
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "%s packet (originator %pM, seqno %u, TTL %d) on interface %s [%pM]\n",
@@ -325,7 +323,6 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
 	if (time_before(send_time, forw_packet->send_time) &&
 	    time_after_eq(aggregation_end_time, forw_packet->send_time) &&
 	    (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) {
-
 		/* check aggregation compatibility
 		 * -> direct link packets are broadcasted on
 		 *    their interface only
@@ -815,7 +812,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(tmp_neigh_node, node,
 				 &orig_neigh_node->neigh_list, list) {
-
 		if (!batadv_compare_eth(tmp_neigh_node->addr,
 					orig_neigh_node->orig))
 			continue;
@@ -949,7 +945,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(tmp_neigh_node, node,
 				 &orig_node->neigh_list, list) {
-
 		is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits,
 						orig_node->last_real_seqno,
 						seqno);
@@ -1033,7 +1028,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
 		is_single_hop_neigh = true;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n",
+		   "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %#.4x, changes %u, tq %d, TTL %d, V %d, IDF %d)\n",
 		   ethhdr->h_source, if_incoming->net_dev->name,
 		   if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig,
 		   batadv_ogm_packet->prev_sender,
@@ -1223,7 +1218,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
 
 	/* is single hop (direct) neighbor */
 	if (is_single_hop_neigh) {
-
 		/* mark direct link on incoming interface */
 		batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet,
 				      is_single_hop_neigh,
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 5453b17d8df..973982414d5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index cebaae7e148..a81b9322e38 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5aebe9327d6..30f46526cbb 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -34,13 +34,14 @@
 static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
 
 static void batadv_bla_periodic_work(struct work_struct *work);
-static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
-				     struct batadv_backbone_gw *backbone_gw);
+static void
+batadv_bla_send_announce(struct batadv_priv *bat_priv,
+			 struct batadv_bla_backbone_gw *backbone_gw);
 
 /* return the index of the claim */
 static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
 {
-	struct batadv_claim *claim = (struct batadv_claim *)data;
+	struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
 	uint32_t hash = 0;
 
 	hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
@@ -57,7 +58,7 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
 static inline uint32_t batadv_choose_backbone_gw(const void *data,
 						 uint32_t size)
 {
-	struct batadv_claim *claim = (struct batadv_claim *)data;
+	const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
 	uint32_t hash = 0;
 
 	hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
@@ -75,9 +76,9 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data,
 static int batadv_compare_backbone_gw(const struct hlist_node *node,
 				      const void *data2)
 {
-	const void *data1 = container_of(node, struct batadv_backbone_gw,
+	const void *data1 = container_of(node, struct batadv_bla_backbone_gw,
 					 hash_entry);
-	const struct batadv_backbone_gw *gw1 = data1, *gw2 = data2;
+	const struct batadv_bla_backbone_gw *gw1 = data1, *gw2 = data2;
 
 	if (!batadv_compare_eth(gw1->orig, gw2->orig))
 		return 0;
@@ -92,9 +93,9 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
 static int batadv_compare_claim(const struct hlist_node *node,
 				const void *data2)
 {
-	const void *data1 = container_of(node, struct batadv_claim,
+	const void *data1 = container_of(node, struct batadv_bla_claim,
 					 hash_entry);
-	const struct batadv_claim *cl1 = data1, *cl2 = data2;
+	const struct batadv_bla_claim *cl1 = data1, *cl2 = data2;
 
 	if (!batadv_compare_eth(cl1->addr, cl2->addr))
 		return 0;
@@ -106,7 +107,8 @@ static int batadv_compare_claim(const struct hlist_node *node,
 }
 
 /* free a backbone gw */
-static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw)
+static void
+batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	if (atomic_dec_and_test(&backbone_gw->refcount))
 		kfree_rcu(backbone_gw, rcu);
@@ -115,16 +117,16 @@ static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw)
 /* finally deinitialize the claim */
 static void batadv_claim_free_rcu(struct rcu_head *rcu)
 {
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 
-	claim = container_of(rcu, struct batadv_claim, rcu);
+	claim = container_of(rcu, struct batadv_bla_claim, rcu);
 
 	batadv_backbone_gw_free_ref(claim->backbone_gw);
 	kfree(claim);
 }
 
 /* free a claim, call claim_free_rcu if its the last reference */
-static void batadv_claim_free_ref(struct batadv_claim *claim)
+static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
 {
 	if (atomic_dec_and_test(&claim->refcount))
 		call_rcu(&claim->rcu, batadv_claim_free_rcu);
@@ -136,14 +138,15 @@ static void batadv_claim_free_ref(struct batadv_claim *claim)
  * looks for a claim in the hash, and returns it if found
  * or NULL otherwise.
  */
-static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv,
-						   struct batadv_claim *data)
+static struct batadv_bla_claim
+*batadv_claim_hash_find(struct batadv_priv *bat_priv,
+			struct batadv_bla_claim *data)
 {
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
 	struct hlist_head *head;
 	struct hlist_node *node;
-	struct batadv_claim *claim;
-	struct batadv_claim *claim_tmp = NULL;
+	struct batadv_bla_claim *claim;
+	struct batadv_bla_claim *claim_tmp = NULL;
 	int index;
 
 	if (!hash)
@@ -176,15 +179,15 @@ static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv,
  *
  * Returns claim if found or NULL otherwise.
  */
-static struct batadv_backbone_gw *
+static struct batadv_bla_backbone_gw *
 batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 			  uint8_t *addr, short vid)
 {
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct hlist_head *head;
 	struct hlist_node *node;
-	struct batadv_backbone_gw search_entry, *backbone_gw;
-	struct batadv_backbone_gw *backbone_gw_tmp = NULL;
+	struct batadv_bla_backbone_gw search_entry, *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL;
 	int index;
 
 	if (!hash)
@@ -215,12 +218,12 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 
 /* delete all claims for a backbone */
 static void
-batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw)
+batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	struct batadv_hashtable *hash;
 	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 	int i;
 	spinlock_t *list_lock;	/* protects write access to the hash lists */
 
@@ -235,7 +238,6 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw)
 		spin_lock_bh(list_lock);
 		hlist_for_each_entry_safe(claim, node, node_tmp,
 					  head, hash_entry) {
-
 			if (claim->backbone_gw != backbone_gw)
 				continue;
 
@@ -338,7 +340,6 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
 			   "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n",
 			   ethhdr->h_source, ethhdr->h_dest, vid);
 		break;
-
 	}
 
 	if (vid != -1)
@@ -366,11 +367,11 @@ out:
  * searches for the backbone gw or creates a new one if it could not
  * be found.
  */
-static struct batadv_backbone_gw *
+static struct batadv_bla_backbone_gw *
 batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
 			   short vid, bool own_backbone)
 {
-	struct batadv_backbone_gw *entry;
+	struct batadv_bla_backbone_gw *entry;
 	struct batadv_orig_node *orig_node;
 	int hash_added;
 
@@ -437,7 +438,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
 				  struct batadv_hard_iface *primary_if,
 				  short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 
 	backbone_gw = batadv_bla_get_backbone_gw(bat_priv,
 						 primary_if->net_dev->dev_addr,
@@ -462,8 +463,8 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
 	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
-	struct batadv_claim *claim;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_claim *claim;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	int i;
 
 	batadv_dbg(BATADV_DBG_BLA, bat_priv,
@@ -502,7 +503,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
  * After the request, it will repeat all of his own claims and finally
  * send an announcement claim with which we can check again.
  */
-static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw)
+static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	/* first, remove all old entries */
 	batadv_bla_del_backbone_claims(backbone_gw);
@@ -528,7 +529,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw)
  * places.
  */
 static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
-				     struct batadv_backbone_gw *backbone_gw)
+				     struct batadv_bla_backbone_gw *backbone_gw)
 {
 	uint8_t mac[ETH_ALEN];
 	__be16 crc;
@@ -539,7 +540,6 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
 
 	batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid,
 			      BATADV_CLAIM_TYPE_ANNOUNCE);
-
 }
 
 /**
@@ -551,10 +551,10 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
  */
 static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 				 const uint8_t *mac, const short vid,
-				 struct batadv_backbone_gw *backbone_gw)
+				 struct batadv_bla_backbone_gw *backbone_gw)
 {
-	struct batadv_claim *claim;
-	struct batadv_claim search_claim;
+	struct batadv_bla_claim *claim;
+	struct batadv_bla_claim search_claim;
 	int hash_added;
 
 	memcpy(search_claim.addr, mac, ETH_ALEN);
@@ -598,7 +598,6 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 
 		claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
 		batadv_backbone_gw_free_ref(claim->backbone_gw);
-
 	}
 	/* set (new) backbone gw */
 	atomic_inc(&backbone_gw->refcount);
@@ -617,7 +616,7 @@ claim_free_ref:
 static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
 				 const uint8_t *mac, const short vid)
 {
-	struct batadv_claim search_claim, *claim;
+	struct batadv_bla_claim search_claim, *claim;
 
 	memcpy(search_claim.addr, mac, ETH_ALEN);
 	search_claim.vid = vid;
@@ -643,7 +642,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
 				  uint8_t *an_addr, uint8_t *backbone_addr,
 				  short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	uint16_t crc;
 
 	if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
@@ -661,12 +660,12 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
 	crc = ntohs(*((__be16 *)(&an_addr[4])));
 
 	batadv_dbg(BATADV_DBG_BLA, bat_priv,
-		   "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %04x\n",
+		   "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
 		   vid, backbone_gw->orig, crc);
 
 	if (backbone_gw->crc != crc) {
 		batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
-			   "handle_announce(): CRC FAILED for %pM/%d (my = %04x, sent = %04x)\n",
+			   "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
 			   backbone_gw->orig, backbone_gw->vid,
 			   backbone_gw->crc, crc);
 
@@ -715,7 +714,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
 				 uint8_t *backbone_addr,
 				 uint8_t *claim_addr, short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 
 	/* unclaim in any case if it is our own */
 	if (primary_if && batadv_compare_eth(backbone_addr,
@@ -744,7 +743,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
 			       uint8_t *backbone_addr, uint8_t *claim_addr,
 			       short vid)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 
 	/* register the gateway if not yet available, and add the claim. */
 
@@ -835,7 +834,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
 	/* if our mesh friends mac is bigger, use it for ourselves. */
 	if (ntohs(bla_dst->group) > ntohs(bla_dst_own->group)) {
 		batadv_dbg(BATADV_DBG_BLA, bat_priv,
-			   "taking other backbones claim group: %04x\n",
+			   "taking other backbones claim group: %#.4x\n",
 			   ntohs(bla_dst->group));
 		bla_dst_own->group = bla_dst->group;
 	}
@@ -958,7 +957,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
  */
 static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
@@ -1013,7 +1012,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    int now)
 {
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
@@ -1062,7 +1061,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    struct batadv_hard_iface *oldif)
 {
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
@@ -1104,16 +1103,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 	}
 }
 
-
-
-/* (re)start the timer */
-static void batadv_bla_start_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work);
-	queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
-			   msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
-}
-
 /* periodic work to do:
  *  * purge structures when they are too old
  *  * send announcements
@@ -1125,7 +1114,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
 	struct batadv_priv_bla *priv_bla;
 	struct hlist_node *node;
 	struct hlist_head *head;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hashtable *hash;
 	struct batadv_hard_iface *primary_if;
 	int i;
@@ -1184,7 +1173,8 @@ out:
 	if (primary_if)
 		batadv_hardif_free_ref(primary_if);
 
-	batadv_bla_start_timer(bat_priv);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
+			   msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
 }
 
 /* The hash for claim and backbone hash receive the same key because they
@@ -1242,7 +1232,10 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
 
 	batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n");
 
-	batadv_bla_start_timer(bat_priv);
+	INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work);
+
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
+			   msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
 	return 0;
 }
 
@@ -1330,7 +1323,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct hlist_head *head;
 	struct hlist_node *node;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	int i;
 
 	if (!atomic_read(&bat_priv->bridge_loop_avoidance))
@@ -1371,7 +1364,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
 {
 	struct ethhdr *ethhdr;
 	struct vlan_ethhdr *vhdr;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	short vid = -1;
 
 	if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
@@ -1442,7 +1435,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid,
 		  bool is_bcast)
 {
 	struct ethhdr *ethhdr;
-	struct batadv_claim search_claim, *claim = NULL;
+	struct batadv_bla_claim search_claim, *claim = NULL;
 	struct batadv_hard_iface *primary_if;
 	int ret;
 
@@ -1536,7 +1529,7 @@ out:
 int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid)
 {
 	struct ethhdr *ethhdr;
-	struct batadv_claim search_claim, *claim = NULL;
+	struct batadv_bla_claim search_claim, *claim = NULL;
 	struct batadv_hard_iface *primary_if;
 	int ret = 0;
 
@@ -1612,7 +1605,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
-	struct batadv_claim *claim;
+	struct batadv_bla_claim *claim;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_node *node;
 	struct hlist_head *head;
@@ -1626,10 +1619,10 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 
 	primary_addr = primary_if->net_dev->dev_addr;
 	seq_printf(seq,
-		   "Claims announced for the mesh %s (orig %pM, group id %04x)\n",
+		   "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n",
 		   net_dev->name, primary_addr,
 		   ntohs(bat_priv->bla.claim_dest.group));
-	seq_printf(seq, "   %-17s    %-5s    %-17s [o] (%-4s)\n",
+	seq_printf(seq, "   %-17s    %-5s    %-17s [o] (%-6s)\n",
 		   "Client", "VID", "Originator", "CRC");
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -1638,7 +1631,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
 			is_own = batadv_compare_eth(claim->backbone_gw->orig,
 						    primary_addr);
-			seq_printf(seq,	" * %pM on % 5d by %pM [%c] (%04x)\n",
+			seq_printf(seq,	" * %pM on % 5d by %pM [%c] (%#.4x)\n",
 				   claim->addr, claim->vid,
 				   claim->backbone_gw->orig,
 				   (is_own ? 'x' : ' '),
@@ -1657,7 +1650,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
-	struct batadv_backbone_gw *backbone_gw;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_node *node;
 	struct hlist_head *head;
@@ -1672,10 +1665,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 
 	primary_addr = primary_if->net_dev->dev_addr;
 	seq_printf(seq,
-		   "Backbones announced for the mesh %s (orig %pM, group id %04x)\n",
+		   "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n",
 		   net_dev->name, primary_addr,
 		   ntohs(bat_priv->bla.claim_dest.group));
-	seq_printf(seq, "   %-17s    %-5s %-9s (%-4s)\n",
+	seq_printf(seq, "   %-17s    %-5s %-9s (%-6s)\n",
 		   "Originator", "VID", "last seen", "CRC");
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -1693,7 +1686,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 				continue;
 
 			seq_printf(seq,
-				   " * %pM on % 5d % 4i.%03is (%04x)\n",
+				   " * %pM on % 5d % 4i.%03is (%#.4x)\n",
 				   backbone_gw->orig, backbone_gw->vid,
 				   secs, msecs, backbone_gw->crc);
 		}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 196d9a0254b..dea2fbc5d98 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 6f58ddd53bf..6ae86516db4 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -40,13 +40,14 @@ static struct dentry *batadv_debugfs;
 
 static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
 
-static char *batadv_log_char_addr(struct batadv_debug_log *debug_log,
+static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
 				  size_t idx)
 {
 	return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
 }
 
-static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c)
+static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
+				 char c)
 {
 	char *char_addr;
 
@@ -59,7 +60,7 @@ static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c)
 }
 
 __printf(2, 3)
-static int batadv_fdebug_log(struct batadv_debug_log *debug_log,
+static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
 			     const char *fmt, ...)
 {
 	va_list args;
@@ -114,7 +115,7 @@ static int batadv_log_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int batadv_log_empty(struct batadv_debug_log *debug_log)
+static int batadv_log_empty(struct batadv_priv_debug_log *debug_log)
 {
 	return !(debug_log->log_start - debug_log->log_end);
 }
@@ -123,7 +124,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
 	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_debug_log *debug_log = bat_priv->debug_log;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
 	int error, i = 0;
 	char *char_addr;
 	char c;
@@ -164,7 +165,6 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
 
 		buf++;
 		i++;
-
 	}
 
 	spin_unlock_bh(&debug_log->lock);
@@ -178,7 +178,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
 static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
 {
 	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_debug_log *debug_log = bat_priv->debug_log;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
 
 	poll_wait(file, &debug_log->queue_wait, wait);
 
@@ -230,7 +230,6 @@ static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
 #else /* CONFIG_BATMAN_ADV_DEBUG */
 static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
 {
-	bat_priv->debug_log = NULL;
 	return 0;
 }
 
@@ -397,10 +396,8 @@ err:
 
 void batadv_debugfs_destroy(void)
 {
-	if (batadv_debugfs) {
-		debugfs_remove_recursive(batadv_debugfs);
-		batadv_debugfs = NULL;
-	}
+	debugfs_remove_recursive(batadv_debugfs);
+	batadv_debugfs = NULL;
 }
 
 int batadv_debugfs_add_meshif(struct net_device *dev)
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 3319e1f21f5..f8c3849edff 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 8e1d89d2b1c..ea0bd31d41c 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -738,6 +738,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
 	struct arphdr *arphdr;
 	struct ethhdr *ethhdr;
 	__be32 ip_src, ip_dst;
+	uint8_t *hw_src, *hw_dst;
 	uint16_t type = 0;
 
 	/* pull the ethernet header */
@@ -777,9 +778,23 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
 	ip_src = batadv_arp_ip_src(skb, hdr_size);
 	ip_dst = batadv_arp_ip_dst(skb, hdr_size);
 	if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) ||
-	    ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst))
+	    ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst) ||
+	    ipv4_is_zeronet(ip_src) || ipv4_is_lbcast(ip_src) ||
+	    ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst))
 		goto out;
 
+	hw_src = batadv_arp_hw_src(skb, hdr_size);
+	if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src))
+		goto out;
+
+	/* we don't care about the destination MAC address in ARP requests */
+	if (arphdr->ar_op != htons(ARPOP_REQUEST)) {
+		hw_dst = batadv_arp_hw_dst(skb, hdr_size);
+		if (is_zero_ether_addr(hw_dst) ||
+		    is_multicast_ether_addr(hw_dst))
+			goto out;
+	}
+
 	type = ntohs(arphdr->ar_op);
 out:
 	return type;
@@ -1012,6 +1027,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
 	 */
 	ret = !batadv_is_my_client(bat_priv, hw_dst);
 out:
+	if (ret)
+		kfree_skb(skb);
 	/* if ret == false -> packet has to be delivered to the interface */
 	return ret;
 }
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index d060c033e7d..125c8c6fcfa 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index dd07c7e3654..074107f2cfa 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index f0d129e323c..039902dca4a 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 9001208d175..84bb2b18d71 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 13697f6e711..509b2bf8c2f 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index f1d37cd8181..368219e026a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -457,6 +457,24 @@ out:
 		batadv_hardif_free_ref(primary_if);
 }
 
+/**
+ * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
+ * @work: work queue item
+ *
+ * Free the parts of the hard interface which can not be removed under
+ * rtnl lock (to prevent deadlock situations).
+ */
+static void batadv_hardif_remove_interface_finish(struct work_struct *work)
+{
+	struct batadv_hard_iface *hard_iface;
+
+	hard_iface = container_of(work, struct batadv_hard_iface,
+				  cleanup_work);
+
+	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+	batadv_hardif_free_ref(hard_iface);
+}
+
 static struct batadv_hard_iface *
 batadv_hardif_add_interface(struct net_device *net_dev)
 {
@@ -484,6 +502,9 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 	hard_iface->soft_iface = NULL;
 	hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 	INIT_LIST_HEAD(&hard_iface->list);
+	INIT_WORK(&hard_iface->cleanup_work,
+		  batadv_hardif_remove_interface_finish);
+
 	/* extra reference for return */
 	atomic_set(&hard_iface->refcount, 2);
 
@@ -518,8 +539,7 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
 		return;
 
 	hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
-	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
-	batadv_hardif_free_ref(hard_iface);
+	queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
 }
 
 void batadv_hardif_remove_interfaces(void)
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 3732366e744..308437d52e2 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 15a849c2d41..7198dafd3bf 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index e05333905af..1b4da72f209 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
@@ -89,7 +89,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
  *
  *	Returns the new hash value.
  */
-static inline uint32_t batadv_hash_bytes(uint32_t hash, void *data,
+static inline uint32_t batadv_hash_bytes(uint32_t hash, const void *data,
 					 uint32_t size)
 {
 	const unsigned char *key = data;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 87ca8095b01..0ba6c899b2d 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 29443a1dbb5..1fcca37b622 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index f65a222b7b8..21fe6987733 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2f85577086a..ced08b936a9 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -26,7 +26,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2012.5.0"
+#define BATADV_SOURCE_VERSION "2013.1.0"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -41,9 +41,11 @@
  * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE
  */
 #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */
-#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */
+#define BATADV_TT_LOCAL_TIMEOUT 600000 /* in milliseconds */
 #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */
 #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
+#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
+#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
 #define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */
 /* sliding packet range of received originator messages in sequence numbers
  * (should be a multiple of our word size)
@@ -276,9 +278,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
 static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
 				      size_t count)
 {
-	int cpu = get_cpu();
-	per_cpu_ptr(bat_priv->bat_counters, cpu)[idx] += count;
-	put_cpu();
+	this_cpu_add(bat_priv->bat_counters[idx], count);
 }
 
 #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8c32cf1c2de..457ea445217 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -29,14 +29,10 @@
 #include "soft-interface.h"
 #include "bridge_loop_avoidance.h"
 
-static void batadv_purge_orig(struct work_struct *work);
+/* hash class keys */
+static struct lock_class_key batadv_orig_hash_lock_class_key;
 
-static void batadv_start_purge_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig);
-	queue_delayed_work(batadv_event_workqueue,
-			   &bat_priv->orig_work, msecs_to_jiffies(1000));
-}
+static void batadv_purge_orig(struct work_struct *work);
 
 /* returns 1 if they are the same originator */
 static int batadv_compare_orig(const struct hlist_node *node, const void *data2)
@@ -57,7 +53,14 @@ int batadv_originator_init(struct batadv_priv *bat_priv)
 	if (!bat_priv->orig_hash)
 		goto err;
 
-	batadv_start_purge_timer(bat_priv);
+	batadv_hash_set_lock_class(bat_priv->orig_hash,
+				   &batadv_orig_hash_lock_class_key);
+
+	INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig);
+	queue_delayed_work(batadv_event_workqueue,
+			   &bat_priv->orig_work,
+			   msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
+
 	return 0;
 
 err:
@@ -178,7 +181,6 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
 		spin_lock_bh(list_lock);
 		hlist_for_each_entry_safe(orig_node, node, node_tmp,
 					  head, hash_entry) {
-
 			hlist_del_rcu(node);
 			batadv_orig_node_free_ref(orig_node);
 		}
@@ -285,7 +287,6 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 	/* for all neighbors towards this originator ... */
 	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
 				  &orig_node->neigh_list, list) {
-
 		last_seen = neigh_node->last_seen;
 		if_incoming = neigh_node->if_incoming;
 
@@ -293,7 +294,6 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 		    (if_incoming->if_status == BATADV_IF_INACTIVE) ||
 		    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
 		    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
-
 			if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
 			    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
 			    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
@@ -393,7 +393,9 @@ static void batadv_purge_orig(struct work_struct *work)
 	delayed_work = container_of(work, struct delayed_work, work);
 	bat_priv = container_of(delayed_work, struct batadv_priv, orig_work);
 	_batadv_purge_orig(bat_priv);
-	batadv_start_purge_timer(bat_priv);
+	queue_delayed_work(batadv_event_workqueue,
+			   &bat_priv->orig_work,
+			   msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
 }
 
 void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 9778e656dec..286bf743e76 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index cb6405bf755..ed0aa89bbf8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
index c8f61e395b7..ccab0bbdbb5 100644
--- a/net/batman-adv/ring_buffer.c
+++ b/net/batman-adv/ring_buffer.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
index fda8c17df27..3f92ae248e8 100644
--- a/net/batman-adv/ring_buffer.h
+++ b/net/batman-adv/ring_buffer.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 1aa1722d018..60ba03fc839 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -80,7 +80,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 
 	/* route added */
 	} else if ((!curr_router) && (neigh_node)) {
-
 		batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
 			   "Adding route towards: %pM (via %pM)\n",
 			   orig_node->orig, neigh_node->addr);
@@ -172,7 +171,6 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
 	 */
 	hlist_for_each_entry_rcu(tmp_neigh_node, node,
 				 &orig_node->neigh_list, list) {
-
 		if (tmp_neigh_node == neigh_node)
 			continue;
 
@@ -836,7 +834,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 	if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG &&
 	    batadv_frag_can_reassemble(skb,
 				       neigh_node->if_incoming->net_dev->mtu)) {
-
 		ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
 
 		if (ret == NET_RX_DROP)
@@ -1103,7 +1100,6 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
 
 	/* packet for me */
 	if (batadv_is_my_mac(unicast_packet->dest)) {
-
 		ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
 
 		if (ret == NET_RX_DROP)
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 9262279ea66..99eeafaba40 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 4425af9dad4..80ca65fc89a 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -155,8 +155,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
 
 	/* start timer for this packet */
-	INIT_DELAYED_WORK(&forw_packet->delayed_work,
-			  batadv_send_outstanding_bcast_packet);
 	queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work,
 			   send_time);
 }
@@ -210,6 +208,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	/* how often did we send the bcast packet ? */
 	forw_packet->num_packets = 0;
 
+	INIT_DELAYED_WORK(&forw_packet->delayed_work,
+			  batadv_send_outstanding_bcast_packet);
+
 	_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
 	return NETDEV_TX_OK;
 
@@ -330,7 +331,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 	spin_lock_bh(&bat_priv->forw_bcast_list_lock);
 	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
 				  &bat_priv->forw_bcast_list, list) {
-
 		/* if purge_outstanding_packets() was called with an argument
 		 * we delete only packets belonging to the given interface
 		 */
@@ -357,7 +357,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 	spin_lock_bh(&bat_priv->forw_bat_list_lock);
 	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
 				  &bat_priv->forw_bat_list, list) {
-
 		/* if purge_outstanding_packets() was called with an argument
 		 * we delete only packets belonging to the given interface
 		 */
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 0078dece1ab..38e662f619a 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6b548fde8e0..2711e870f55 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -124,7 +124,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 		batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX);
 	}
 
-	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 	return 0;
 }
 
@@ -181,7 +180,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
 		goto dropped;
 
 	/* Register the client MAC in the transtable */
-	batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
+	if (!is_multicast_ether_addr(ethhdr->h_source))
+		batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
 
 	/* don't accept stp packets. STP does not help in meshes.
 	 * better use the bridge loop avoidance ...
@@ -449,6 +449,30 @@ static void batadv_interface_setup(struct net_device *dev)
 	memset(priv, 0, sizeof(*priv));
 }
 
+/**
+ * batadv_softif_destroy_finish - cleans up the remains of a softif
+ * @work: work queue item
+ *
+ * Free the parts of the soft interface which can not be removed under
+ * rtnl lock (to prevent deadlock situations).
+ */
+static void batadv_softif_destroy_finish(struct work_struct *work)
+{
+	struct batadv_priv *bat_priv;
+	struct net_device *soft_iface;
+
+	bat_priv = container_of(work, struct batadv_priv,
+				cleanup_work);
+	soft_iface = bat_priv->soft_iface;
+
+	batadv_debugfs_del_meshif(soft_iface);
+	batadv_sysfs_del_meshif(soft_iface);
+
+	rtnl_lock();
+	unregister_netdevice(soft_iface);
+	rtnl_unlock();
+}
+
 struct net_device *batadv_softif_create(const char *name)
 {
 	struct net_device *soft_iface;
@@ -463,6 +487,8 @@ struct net_device *batadv_softif_create(const char *name)
 		goto out;
 
 	bat_priv = netdev_priv(soft_iface);
+	bat_priv->soft_iface = soft_iface;
+	INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
 
 	/* batadv_interface_stats() needs to be available as soon as
 	 * register_netdevice() has been called
@@ -480,7 +506,9 @@ struct net_device *batadv_softif_create(const char *name)
 
 	atomic_set(&bat_priv->aggregated_ogms, 1);
 	atomic_set(&bat_priv->bonding, 0);
+#ifdef CONFIG_BATMAN_ADV_BLA
 	atomic_set(&bat_priv->bridge_loop_avoidance, 0);
+#endif
 #ifdef CONFIG_BATMAN_ADV_DAT
 	atomic_set(&bat_priv->distributed_arp_table, 1);
 #endif
@@ -491,7 +519,9 @@ struct net_device *batadv_softif_create(const char *name)
 	atomic_set(&bat_priv->gw_bandwidth, 41);
 	atomic_set(&bat_priv->orig_interval, 1000);
 	atomic_set(&bat_priv->hop_penalty, 30);
+#ifdef CONFIG_BATMAN_ADV_DEBUG
 	atomic_set(&bat_priv->log_level, 0);
+#endif
 	atomic_set(&bat_priv->fragmentation, 1);
 	atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN);
 	atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN);
@@ -547,10 +577,10 @@ out:
 
 void batadv_softif_destroy(struct net_device *soft_iface)
 {
-	batadv_debugfs_del_meshif(soft_iface);
-	batadv_sysfs_del_meshif(soft_iface);
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+
 	batadv_mesh_free(soft_iface);
-	unregister_netdevice(soft_iface);
+	queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
 }
 
 int batadv_softif_is_valid(const struct net_device *net_dev)
@@ -581,10 +611,10 @@ static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static void batadv_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, "B.A.T.M.A.N. advanced");
-	strcpy(info->version, BATADV_SOURCE_VERSION);
-	strcpy(info->fw_version, "N/A");
-	strcpy(info->bus_info, "batman");
+	strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+	strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+	strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
 }
 
 static u32 batadv_get_msglevel(struct net_device *dev)
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 07a08fed28b..43182e5e603 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 84a55cb19b0..afbba319d73 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 3fd1412b062..479acf4c16f 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 22457a7952b..d44672f4a34 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
@@ -29,6 +29,10 @@
 
 #include <linux/crc16.h>
 
+/* hash class keys */
+static struct lock_class_key batadv_tt_local_hash_lock_class_key;
+static struct lock_class_key batadv_tt_global_hash_lock_class_key;
+
 static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
 				 struct batadv_orig_node *orig_node);
 static void batadv_tt_purge(struct work_struct *work);
@@ -48,13 +52,6 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
 	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
 }
 
-static void batadv_tt_start_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
-	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
-			   msecs_to_jiffies(5000));
-}
-
 static struct batadv_tt_common_entry *
 batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data)
 {
@@ -112,7 +109,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data)
 					       struct batadv_tt_global_entry,
 					       common);
 	return tt_global_entry;
-
 }
 
 static void
@@ -235,6 +231,9 @@ static int batadv_tt_local_init(struct batadv_priv *bat_priv)
 	if (!bat_priv->tt.local_hash)
 		return -ENOMEM;
 
+	batadv_hash_set_lock_class(bat_priv->tt.local_hash,
+				   &batadv_tt_local_hash_lock_class_key);
+
 	return 0;
 }
 
@@ -249,7 +248,6 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
 	batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
 			   batadv_choose_orig, tt_global->common.addr);
 	batadv_tt_global_entry_free_ref(tt_global);
-
 }
 
 void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
@@ -305,7 +303,11 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 		   (uint8_t)atomic_read(&bat_priv->tt.vn));
 
 	memcpy(tt_local->common.addr, addr, ETH_ALEN);
-	tt_local->common.flags = BATADV_NO_FLAGS;
+	/* The local entry has to be marked as NEW to avoid to send it in
+	 * a full table response going out before the next ttvn increment
+	 * (consistency check)
+	 */
+	tt_local->common.flags = BATADV_TT_CLIENT_NEW;
 	if (batadv_is_wifi_iface(ifindex))
 		tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
 	atomic_set(&tt_local->common.refcount, 2);
@@ -316,12 +318,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	if (batadv_compare_eth(addr, soft_iface->dev_addr))
 		tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
 
-	/* The local entry has to be marked as NEW to avoid to send it in
-	 * a full table response going out before the next ttvn increment
-	 * (consistency check)
-	 */
-	tt_local->common.flags |= BATADV_TT_CLIENT_NEW;
-
 	hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
 				     batadv_choose_orig, &tt_local->common,
 				     &tt_local->common.hash_entry);
@@ -472,18 +468,27 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common_entry;
+	struct batadv_tt_local_entry *tt_local;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
+	int last_seen_secs;
+	int last_seen_msecs;
+	unsigned long last_seen_jiffies;
+	bool no_purge;
+	uint16_t np_flag = BATADV_TT_CLIENT_NOPURGE;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
 	if (!primary_if)
 		goto out;
 
 	seq_printf(seq,
-		   "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
-		   net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn));
+		   "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.4x):\n",
+		   net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn),
+		   bat_priv->tt.local_crc);
+	seq_printf(seq, "       %-13s %-7s %-10s\n", "Client", "Flags",
+		   "Last seen");
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -491,18 +496,29 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(tt_common_entry, node,
 					 head, hash_entry) {
-			seq_printf(seq, " * %pM [%c%c%c%c%c]\n",
+			tt_local = container_of(tt_common_entry,
+						struct batadv_tt_local_entry,
+						common);
+			last_seen_jiffies = jiffies - tt_local->last_seen;
+			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
+			last_seen_secs = last_seen_msecs / 1000;
+			last_seen_msecs = last_seen_msecs % 1000;
+
+			no_purge = tt_common_entry->flags & np_flag;
+
+			seq_printf(seq, " * %pM [%c%c%c%c%c] %3u.%03u\n",
 				   tt_common_entry->addr,
 				   (tt_common_entry->flags &
 				    BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
-				   (tt_common_entry->flags &
-				    BATADV_TT_CLIENT_NOPURGE ? 'P' : '.'),
+				   no_purge ? 'P' : '.',
 				   (tt_common_entry->flags &
 				    BATADV_TT_CLIENT_NEW ? 'N' : '.'),
 				   (tt_common_entry->flags &
 				    BATADV_TT_CLIENT_PENDING ? 'X' : '.'),
 				   (tt_common_entry->flags &
-				    BATADV_TT_CLIENT_WIFI ? 'W' : '.'));
+				    BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
+				   no_purge ? 0 : last_seen_secs,
+				   no_purge ? 0 : last_seen_msecs);
 		}
 		rcu_read_unlock();
 	}
@@ -627,7 +643,6 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
 		batadv_tt_local_purge_list(bat_priv, head);
 		spin_unlock_bh(list_lock);
 	}
-
 }
 
 static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
@@ -676,6 +691,9 @@ static int batadv_tt_global_init(struct batadv_priv *bat_priv)
 	if (!bat_priv->tt.global_hash)
 		return -ENOMEM;
 
+	batadv_hash_set_lock_class(bat_priv->tt.global_hash,
+				   &batadv_tt_global_hash_lock_class_key);
+
 	return 0;
 }
 
@@ -967,10 +985,11 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 	best_entry = batadv_transtable_best_orig(tt_global_entry);
 	if (best_entry) {
 		last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn);
-		seq_printf(seq,	" %c %pM  (%3u) via %pM     (%3u)   [%c%c%c]\n",
+		seq_printf(seq,
+			   " %c %pM  (%3u) via %pM     (%3u)   (%#.4x) [%c%c%c]\n",
 			   '*', tt_global_entry->common.addr,
 			   best_entry->ttvn, best_entry->orig_node->orig,
-			   last_ttvn,
+			   last_ttvn, best_entry->orig_node->tt_crc,
 			   (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
 			   (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
 			   (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
@@ -1012,8 +1031,9 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq,
 		   "Globally announced TT entries received via the mesh %s\n",
 		   net_dev->name);
-	seq_printf(seq, "       %-13s %s       %-15s %s %s\n",
-		   "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags");
+	seq_printf(seq, "       %-13s %s       %-15s %s (%-6s) %s\n",
+		   "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC",
+		   "Flags");
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -1049,7 +1069,6 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
 		batadv_tt_orig_list_entry_free_ref(orig_entry);
 	}
 	spin_unlock_bh(&tt_global_entry->list_lock);
-
 }
 
 static void
@@ -1825,7 +1844,6 @@ out:
 	if (!ret)
 		kfree_skb(skb);
 	return ret;
-
 }
 
 static bool
@@ -2111,7 +2129,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
 	if (ret < 0)
 		return ret;
 
-	batadv_tt_start_timer(bat_priv);
+	INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
+			   msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
 
 	return 1;
 }
@@ -2261,7 +2281,8 @@ static void batadv_tt_purge(struct work_struct *work)
 	batadv_tt_req_purge(bat_priv);
 	batadv_tt_roam_purge(bat_priv);
 
-	batadv_tt_start_timer(bat_priv);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
+			   msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
 }
 
 void batadv_tt_free(struct batadv_priv *bat_priv)
@@ -2352,7 +2373,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 		}
 		spin_unlock_bh(list_lock);
 	}
-
 }
 
 static int batadv_tt_commit_changes(struct batadv_priv *bat_priv,
@@ -2496,7 +2516,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv,
 		    orig_node->tt_crc != tt_crc) {
 request_table:
 			batadv_dbg(BATADV_DBG_TT, bat_priv,
-				   "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %u last_crc: %u num_changes: %u)\n",
+				   "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.4x last_crc: %#.4x num_changes: %u)\n",
 				   orig_node->orig, ttvn, orig_ttvn, tt_crc,
 				   orig_node->tt_crc, tt_num_changes);
 			batadv_send_tt_request(bat_priv, orig_node, ttvn,
@@ -2549,7 +2569,6 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
 	batadv_tt_local_entry_free_ref(tt_local_entry);
 out:
 	return ret;
-
 }
 
 bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 46d4451a59e..ab8e683b402 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ae9ac9aca8c..4cd87a0b5b8 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -24,6 +24,9 @@
 #include "bitarray.h"
 #include <linux/kernel.h>
 
+/**
+ * Maximum overhead for the encapsulation for a payload packet
+ */
 #define BATADV_HEADER_LEN \
 	(ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \
 			sizeof(struct batadv_bcast_packet)))
@@ -51,6 +54,22 @@ struct batadv_hard_iface_bat_iv {
 	atomic_t ogm_seqno;
 };
 
+/**
+ * struct batadv_hard_iface - network device known to batman-adv
+ * @list: list node for batadv_hardif_list
+ * @if_num: identificator of the interface
+ * @if_status: status of the interface for batman-adv
+ * @net_dev: pointer to the net_device
+ * @frag_seqno: last fragment sequence number sent by this interface
+ * @hardif_obj: kobject of the per interface sysfs "mesh" directory
+ * @refcount: number of contexts the object is used
+ * @batman_adv_ptype: packet type describing packets that should be processed by
+ *  batman-adv for this interface
+ * @soft_iface: the batman-adv interface which uses this network interface
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @bat_iv: BATMAN IV specific per hard interface data
+ * @cleanup_work: work queue callback item for hard interface deinit
+ */
 struct batadv_hard_iface {
 	struct list_head list;
 	int16_t if_num;
@@ -63,22 +82,52 @@ struct batadv_hard_iface {
 	struct net_device *soft_iface;
 	struct rcu_head rcu;
 	struct batadv_hard_iface_bat_iv bat_iv;
+	struct work_struct cleanup_work;
 };
 
 /**
- *	struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
- *	@primary_addr: hosts primary interface address
- *	@last_seen: when last packet from this node was received
- *	@bcast_seqno_reset: time when the broadcast seqno window was reset
- *	@batman_seqno_reset: time when the batman seqno window was reset
- *	@gw_flags: flags related to gateway class
- *	@flags: for now only VIS_SERVER flag
- *	@last_real_seqno: last and best known sequence number
- *	@last_ttl: ttl of last received packet
- *	@last_bcast_seqno: last broadcast sequence number received by this host
- *
- *	@candidates: how many candidates are available
- *	@selected: next bonding candidate
+ * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
+ * @orig: originator ethernet address
+ * @primary_addr: hosts primary interface address
+ * @router: router that should be used to reach this originator
+ * @batadv_dat_addr_t:  address of the orig node in the distributed hash
+ * @bcast_own: bitfield containing the number of our OGMs this orig_node
+ *  rebroadcasted "back" to us (relative to last_real_seqno)
+ * @bcast_own_sum: counted result of bcast_own
+ * @last_seen: time when last packet from this node was received
+ * @bcast_seqno_reset: time when the broadcast seqno window was reset
+ * @batman_seqno_reset: time when the batman seqno window was reset
+ * @gw_flags: flags related to gateway class
+ * @flags: for now only VIS_SERVER flag
+ * @last_ttvn: last seen translation table version number
+ * @tt_crc: CRC of the translation table
+ * @tt_buff: last tt changeset this node received from the orig node
+ * @tt_buff_len: length of the last tt changeset this node received from the
+ *  orig node
+ * @tt_buff_lock: lock that protects tt_buff and tt_buff_len
+ * @tt_size: number of global TT entries announced by the orig node
+ * @tt_initialised: bool keeping track of whether or not this node have received
+ *  any translation table information from the orig node yet
+ * @last_real_seqno: last and best known sequence number
+ * @last_ttl: ttl of last received packet
+ * @bcast_bits: bitfield containing the info which payload broadcast originated
+ *  from this orig node this host already has seen (relative to
+ *  last_bcast_seqno)
+ * @last_bcast_seqno: last broadcast sequence number received by this host
+ * @neigh_list: list of potential next hop neighbor towards this orig node
+ * @frag_list: fragmentation buffer list for fragment re-assembly
+ * @last_frag_packet: time when last fragmented packet from this node was
+ *  received
+ * @neigh_list_lock: lock protecting neigh_list, router and bonding_list
+ * @hash_entry: hlist node for batadv_priv::orig_hash
+ * @bat_priv: pointer to soft_iface this orig node belongs to
+ * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
+ *  neigh_node->real_bits & neigh_node->real_packet_count
+ * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno
+ * @bond_candidates: how many candidates are available
+ * @bond_list: list of bonding candidates
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
  */
 struct batadv_orig_node {
 	uint8_t orig[ETH_ALEN];
@@ -94,11 +143,11 @@ struct batadv_orig_node {
 	unsigned long batman_seqno_reset;
 	uint8_t gw_flags;
 	uint8_t flags;
-	atomic_t last_ttvn; /* last seen translation table version number */
+	atomic_t last_ttvn;
 	uint16_t tt_crc;
 	unsigned char *tt_buff;
 	int16_t tt_buff_len;
-	spinlock_t tt_buff_lock; /* protects tt_buff */
+	spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
 	atomic_t tt_size;
 	bool tt_initialised;
 	uint32_t last_real_seqno;
@@ -107,23 +156,31 @@ struct batadv_orig_node {
 	uint32_t last_bcast_seqno;
 	struct hlist_head neigh_list;
 	struct list_head frag_list;
-	spinlock_t neigh_list_lock; /* protects neigh_list and router */
-	atomic_t refcount;
-	struct rcu_head rcu;
+	unsigned long last_frag_packet;
+	/* neigh_list_lock protects: neigh_list, router & bonding_list */
+	spinlock_t neigh_list_lock;
 	struct hlist_node hash_entry;
 	struct batadv_priv *bat_priv;
-	unsigned long last_frag_packet;
 	/* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
-	 * neigh_node->real_bits, neigh_node->real_packet_count
+	 * neigh_node->real_bits & neigh_node->real_packet_count
 	 */
 	spinlock_t ogm_cnt_lock;
-	/* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */
+	/* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
 	spinlock_t bcast_seqno_lock;
-	spinlock_t tt_list_lock; /* protects tt_list */
 	atomic_t bond_candidates;
 	struct list_head bond_list;
+	atomic_t refcount;
+	struct rcu_head rcu;
 };
 
+/**
+ * struct batadv_gw_node - structure for orig nodes announcing gw capabilities
+ * @list: list node for batadv_priv_gw::list
+ * @orig_node: pointer to corresponding orig node
+ * @deleted: this struct is scheduled for deletion
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
 struct batadv_gw_node {
 	struct hlist_node list;
 	struct batadv_orig_node *orig_node;
@@ -132,13 +189,28 @@ struct batadv_gw_node {
 	struct rcu_head rcu;
 };
 
-/*	batadv_neigh_node
- *	@last_seen: when last packet via this neighbor was received
+/**
+ * struct batadv_neigh_node - structure for single hop neighbors
+ * @list: list node for batadv_orig_node::neigh_list
+ * @addr: mac address of neigh node
+ * @tq_recv: ring buffer of received TQ values from this neigh node
+ * @tq_index: ring buffer index
+ * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
+ * @last_ttl: last received ttl from this neigh node
+ * @bonding_list: list node for batadv_orig_node::bond_list
+ * @last_seen: when last packet via this neighbor was received
+ * @real_bits: bitfield containing the number of OGMs received from this neigh
+ *  node (relative to orig_node->last_real_seqno)
+ * @real_packet_count: counted result of real_bits
+ * @orig_node: pointer to corresponding orig_node
+ * @if_incoming: pointer to incoming hard interface
+ * @lq_update_lock: lock protecting tq_recv & tq_index
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
  */
 struct batadv_neigh_node {
 	struct hlist_node list;
 	uint8_t addr[ETH_ALEN];
-	uint8_t real_packet_count;
 	uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
 	uint8_t tq_index;
 	uint8_t tq_avg;
@@ -146,13 +218,20 @@ struct batadv_neigh_node {
 	struct list_head bonding_list;
 	unsigned long last_seen;
 	DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
-	atomic_t refcount;
-	struct rcu_head rcu;
+	uint8_t real_packet_count;
 	struct batadv_orig_node *orig_node;
 	struct batadv_hard_iface *if_incoming;
-	spinlock_t lq_update_lock;	/* protects: tq_recv, tq_index */
+	spinlock_t lq_update_lock; /* protects tq_recv & tq_index */
+	atomic_t refcount;
+	struct rcu_head rcu;
 };
 
+/**
+ * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
+ * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast
+ * @crc: crc32 checksum of broadcast payload
+ * @entrytime: time when the broadcast packet was received
+ */
 #ifdef CONFIG_BATMAN_ADV_BLA
 struct batadv_bcast_duplist_entry {
 	uint8_t orig[ETH_ALEN];
@@ -161,6 +240,33 @@ struct batadv_bcast_duplist_entry {
 };
 #endif
 
+/**
+ * enum batadv_counters - indices for traffic counters
+ * @BATADV_CNT_TX: transmitted payload traffic packet counter
+ * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter
+ * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter
+ * @BATADV_CNT_RX: received payload traffic packet counter
+ * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter
+ * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter
+ * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter
+ * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter
+ * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter
+ * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
+ * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter
+ * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
+ * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter
+ * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter
+ * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter
+ * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter
+ * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter
+ * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
+ * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter
+ * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter
+ * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
+ * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
+ *  counter
+ * @BATADV_CNT_NUM: number of traffic counters
+ */
 enum batadv_counters {
 	BATADV_CNT_TX,
 	BATADV_CNT_TX_BYTES,
@@ -192,14 +298,23 @@ enum batadv_counters {
 /**
  * struct batadv_priv_tt - per mesh interface translation table data
  * @vn: translation table version number
+ * @ogm_append_cnt: counter of number of OGMs containing the local tt diff
  * @local_changes: changes registered in an originator interval
- * @poss_change: Detect an ongoing roaming phase. If true, then this node
- *  received a roaming_adv and has to inspect every packet directed to it to
- *  check whether it still is the true destination or not. This flag will be
- *  reset to false as soon as the this node's ttvn is increased
  * @changes_list: tracks tt local changes within an originator interval
- * @req_list: list of pending tt_requests
+ * @local_hash: local translation table hash table
+ * @global_hash: global translation table hash table
+ * @req_list: list of pending & unanswered tt_requests
+ * @roam_list: list of the last roaming events of each client limiting the
+ *  number of roaming events to avoid route flapping
+ * @changes_list_lock: lock protecting changes_list
+ * @req_list_lock: lock protecting req_list
+ * @roam_list_lock: lock protecting roam_list
+ * @local_entry_num: number of entries in the local hash table
  * @local_crc: Checksum of the local table, recomputed before sending a new OGM
+ * @last_changeset: last tt changeset this host has generated
+ * @last_changeset_len: length of last tt changeset this host has generated
+ * @last_changeset_lock: lock protecting last_changeset & last_changeset_len
+ * @work: work queue callback item for translation table purging
  */
 struct batadv_priv_tt {
 	atomic_t vn;
@@ -217,36 +332,83 @@ struct batadv_priv_tt {
 	uint16_t local_crc;
 	unsigned char *last_changeset;
 	int16_t last_changeset_len;
-	spinlock_t last_changeset_lock; /* protects last_changeset */
+	/* protects last_changeset & last_changeset_len */
+	spinlock_t last_changeset_lock;
 	struct delayed_work work;
 };
 
+/**
+ * struct batadv_priv_bla - per mesh interface bridge loope avoidance data
+ * @num_requests; number of bla requests in flight
+ * @claim_hash: hash table containing mesh nodes this host has claimed
+ * @backbone_hash: hash table containing all detected backbone gateways
+ * @bcast_duplist: recently received broadcast packets array (for broadcast
+ *  duplicate suppression)
+ * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist
+ * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr
+ * @claim_dest: local claim data (e.g. claim group)
+ * @work: work queue callback item for cleanups & bla announcements
+ */
 #ifdef CONFIG_BATMAN_ADV_BLA
 struct batadv_priv_bla {
-	atomic_t num_requests; /* number of bla requests in flight */
+	atomic_t num_requests;
 	struct batadv_hashtable *claim_hash;
 	struct batadv_hashtable *backbone_hash;
 	struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
 	int bcast_duplist_curr;
-	/* protects bcast_duplist and bcast_duplist_curr */
+	/* protects bcast_duplist & bcast_duplist_curr */
 	spinlock_t bcast_duplist_lock;
 	struct batadv_bla_claim_dst claim_dest;
 	struct delayed_work work;
 };
 #endif
 
+/**
+ * struct batadv_debug_log - debug logging data
+ * @log_buff: buffer holding the logs (ring bufer)
+ * @log_start: index of next character to read
+ * @log_end: index of next character to write
+ * @lock: lock protecting log_buff, log_start & log_end
+ * @queue_wait: log reader's wait queue
+ */
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+struct batadv_priv_debug_log {
+	char log_buff[BATADV_LOG_BUF_LEN];
+	unsigned long log_start;
+	unsigned long log_end;
+	spinlock_t lock; /* protects log_buff, log_start and log_end */
+	wait_queue_head_t queue_wait;
+};
+#endif
+
+/**
+ * struct batadv_priv_gw - per mesh interface gateway data
+ * @list: list of available gateway nodes
+ * @list_lock: lock protecting gw_list & curr_gw
+ * @curr_gw: pointer to currently selected gateway node
+ * @reselect: bool indicating a gateway re-selection is in progress
+ */
 struct batadv_priv_gw {
 	struct hlist_head list;
-	spinlock_t list_lock; /* protects gw_list and curr_gw */
+	spinlock_t list_lock; /* protects gw_list & curr_gw */
 	struct batadv_gw_node __rcu *curr_gw;  /* rcu protected pointer */
 	atomic_t reselect;
 };
 
+/**
+ * struct batadv_priv_vis - per mesh interface vis data
+ * @send_list: list of batadv_vis_info packets to sent
+ * @hash: hash table containing vis data from other nodes in the network
+ * @hash_lock: lock protecting the hash table
+ * @list_lock: lock protecting my_info::recv_list
+ * @work: work queue callback item for vis packet sending
+ * @my_info: holds this node's vis data sent on a regular basis
+ */
 struct batadv_priv_vis {
 	struct list_head send_list;
 	struct batadv_hashtable *hash;
 	spinlock_t hash_lock; /* protects hash */
-	spinlock_t list_lock; /* protects info::recv_list */
+	spinlock_t list_lock; /* protects my_info::recv_list */
 	struct delayed_work work;
 	struct batadv_vis_info *my_info;
 };
@@ -265,30 +427,78 @@ struct batadv_priv_dat {
 };
 #endif
 
+/**
+ * struct batadv_priv - per mesh interface data
+ * @mesh_state: current status of the mesh (inactive/active/deactivating)
+ * @soft_iface: net device which holds this struct as private data
+ * @stats: structure holding the data for the ndo_get_stats() call
+ * @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
+ * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
+ * @bonding: bool indicating whether traffic bonding is enabled
+ * @fragmentation: bool indicating whether traffic fragmentation is enabled
+ * @ap_isolation: bool indicating whether ap isolation is enabled
+ * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
+ *  enabled
+ * @distributed_arp_table: bool indicating whether distributed ARP table is
+ *  enabled
+ * @vis_mode: vis operation: client or server (see batadv_vis_packettype)
+ * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
+ * @gw_sel_class: gateway selection class (applies if gw_mode client)
+ * @gw_bandwidth: gateway announced bandwidth (applies if gw_mode server)
+ * @orig_interval: OGM broadcast interval in milliseconds
+ * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
+ * @log_level: configured log level (see batadv_dbg_level)
+ * @bcast_seqno: last sent broadcast packet sequence number
+ * @bcast_queue_left: number of remaining buffered broadcast packet slots
+ * @batman_queue_left: number of remaining OGM packet slots
+ * @num_ifaces: number of interfaces assigned to this mesh interface
+ * @mesh_obj: kobject for sysfs mesh subdirectory
+ * @debug_dir: dentry for debugfs batman-adv subdirectory
+ * @forw_bat_list: list of aggregated OGMs that will be forwarded
+ * @forw_bcast_list: list of broadcast packets that will be rebroadcasted
+ * @orig_hash: hash table containing mesh participants (orig nodes)
+ * @forw_bat_list_lock: lock protecting forw_bat_list
+ * @forw_bcast_list_lock: lock protecting forw_bcast_list
+ * @orig_work: work queue callback item for orig node purging
+ * @cleanup_work: work queue callback item for soft interface deinit
+ * @primary_if: one of the hard interfaces assigned to this mesh interface
+ *  becomes the primary interface
+ * @bat_algo_ops: routing algorithm used by this mesh interface
+ * @bla: bridge loope avoidance data
+ * @debug_log: holding debug logging relevant data
+ * @gw: gateway data
+ * @tt: translation table data
+ * @vis: vis data
+ * @dat: distributed arp table data
+ */
 struct batadv_priv {
 	atomic_t mesh_state;
+	struct net_device *soft_iface;
 	struct net_device_stats stats;
 	uint64_t __percpu *bat_counters; /* Per cpu counters */
-	atomic_t aggregated_ogms;	/* boolean */
-	atomic_t bonding;		/* boolean */
-	atomic_t fragmentation;		/* boolean */
-	atomic_t ap_isolation;		/* boolean */
-	atomic_t bridge_loop_avoidance;	/* boolean */
+	atomic_t aggregated_ogms;
+	atomic_t bonding;
+	atomic_t fragmentation;
+	atomic_t ap_isolation;
+#ifdef CONFIG_BATMAN_ADV_BLA
+	atomic_t bridge_loop_avoidance;
+#endif
 #ifdef CONFIG_BATMAN_ADV_DAT
-	atomic_t distributed_arp_table;	/* boolean */
+	atomic_t distributed_arp_table;
+#endif
+	atomic_t vis_mode;
+	atomic_t gw_mode;
+	atomic_t gw_sel_class;
+	atomic_t gw_bandwidth;
+	atomic_t orig_interval;
+	atomic_t hop_penalty;
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	atomic_t log_level;
 #endif
-	atomic_t vis_mode;		/* VIS_TYPE_* */
-	atomic_t gw_mode;		/* GW_MODE_* */
-	atomic_t gw_sel_class;		/* uint */
-	atomic_t gw_bandwidth;		/* gw bandwidth */
-	atomic_t orig_interval;		/* uint */
-	atomic_t hop_penalty;		/* uint */
-	atomic_t log_level;		/* uint */
 	atomic_t bcast_seqno;
 	atomic_t bcast_queue_left;
 	atomic_t batman_queue_left;
 	char num_ifaces;
-	struct batadv_debug_log *debug_log;
 	struct kobject *mesh_obj;
 	struct dentry *debug_dir;
 	struct hlist_head forw_bat_list;
@@ -297,11 +507,15 @@ struct batadv_priv {
 	spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
 	spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
 	struct delayed_work orig_work;
+	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
 	struct batadv_algo_ops *bat_algo_ops;
 #ifdef CONFIG_BATMAN_ADV_BLA
 	struct batadv_priv_bla bla;
 #endif
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	struct batadv_priv_debug_log *debug_log;
+#endif
 	struct batadv_priv_gw gw;
 	struct batadv_priv_tt tt;
 	struct batadv_priv_vis vis;
@@ -310,21 +524,97 @@ struct batadv_priv {
 #endif
 };
 
+/**
+ * struct batadv_socket_client - layer2 icmp socket client data
+ * @queue_list: packet queue for packets destined for this socket client
+ * @queue_len: number of packets in the packet queue (queue_list)
+ * @index: socket client's index in the batadv_socket_client_hash
+ * @lock: lock protecting queue_list, queue_len & index
+ * @queue_wait: socket client's wait queue
+ * @bat_priv: pointer to soft_iface this client belongs to
+ */
 struct batadv_socket_client {
 	struct list_head queue_list;
 	unsigned int queue_len;
 	unsigned char index;
-	spinlock_t lock; /* protects queue_list, queue_len, index */
+	spinlock_t lock; /* protects queue_list, queue_len & index */
 	wait_queue_head_t queue_wait;
 	struct batadv_priv *bat_priv;
 };
 
+/**
+ * struct batadv_socket_packet - layer2 icmp packet for socket client
+ * @list: list node for batadv_socket_client::queue_list
+ * @icmp_len: size of the layer2 icmp packet
+ * @icmp_packet: layer2 icmp packet
+ */
 struct batadv_socket_packet {
 	struct list_head list;
 	size_t icmp_len;
 	struct batadv_icmp_packet_rr icmp_packet;
 };
 
+/**
+ * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
+ * @orig: originator address of backbone node (mac address of primary iface)
+ * @vid: vlan id this gateway was detected on
+ * @hash_entry: hlist node for batadv_priv_bla::backbone_hash
+ * @bat_priv: pointer to soft_iface this backbone gateway belongs to
+ * @lasttime: last time we heard of this backbone gw
+ * @wait_periods: grace time for bridge forward delays and bla group forming at
+ *  bootup phase - no bcast traffic is formwared until it has elapsed
+ * @request_sent: if this bool is set to true we are out of sync with this
+ *  backbone gateway - no bcast traffic is formwared until the situation was
+ *  resolved
+ * @crc: crc16 checksum over all claims
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+#ifdef CONFIG_BATMAN_ADV_BLA
+struct batadv_bla_backbone_gw {
+	uint8_t orig[ETH_ALEN];
+	short vid;
+	struct hlist_node hash_entry;
+	struct batadv_priv *bat_priv;
+	unsigned long lasttime;
+	atomic_t wait_periods;
+	atomic_t request_sent;
+	uint16_t crc;
+	atomic_t refcount;
+	struct rcu_head rcu;
+};
+
+/**
+ * struct batadv_bla_claim - claimed non-mesh client structure
+ * @addr: mac address of claimed non-mesh client
+ * @vid: vlan id this client was detected on
+ * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client
+ * @lasttime: last time we heard of claim (locals only)
+ * @hash_entry: hlist node for batadv_priv_bla::claim_hash
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_bla_claim {
+	uint8_t addr[ETH_ALEN];
+	short vid;
+	struct batadv_bla_backbone_gw *backbone_gw;
+	unsigned long lasttime;
+	struct hlist_node hash_entry;
+	struct rcu_head rcu;
+	atomic_t refcount;
+};
+#endif
+
+/**
+ * struct batadv_tt_common_entry - tt local & tt global common data
+ * @addr: mac address of non-mesh client
+ * @hash_entry: hlist node for batadv_priv_tt::local_hash or for
+ *  batadv_priv_tt::global_hash
+ * @flags: various state handling flags (see batadv_tt_client_flags)
+ * @added_at: timestamp used for purging stale tt common entries
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
 struct batadv_tt_common_entry {
 	uint8_t addr[ETH_ALEN];
 	struct hlist_node hash_entry;
@@ -334,62 +624,76 @@ struct batadv_tt_common_entry {
 	struct rcu_head rcu;
 };
 
+/**
+ * struct batadv_tt_local_entry - translation table local entry data
+ * @common: general translation table data
+ * @last_seen: timestamp used for purging stale tt local entries
+ */
 struct batadv_tt_local_entry {
 	struct batadv_tt_common_entry common;
 	unsigned long last_seen;
 };
 
+/**
+ * struct batadv_tt_global_entry - translation table global entry data
+ * @common: general translation table data
+ * @orig_list: list of orig nodes announcing this non-mesh client
+ * @list_lock: lock protecting orig_list
+ * @roam_at: time at which TT_GLOBAL_ROAM was set
+ */
 struct batadv_tt_global_entry {
 	struct batadv_tt_common_entry common;
 	struct hlist_head orig_list;
-	spinlock_t list_lock;	/* protects the list */
-	unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */
+	spinlock_t list_lock;	/* protects orig_list */
+	unsigned long roam_at;
 };
 
+/**
+ * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
+ * @orig_node: pointer to orig node announcing this non-mesh client
+ * @ttvn: translation table version number which added the non-mesh client
+ * @list: list node for batadv_tt_global_entry::orig_list
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
 struct batadv_tt_orig_list_entry {
 	struct batadv_orig_node *orig_node;
 	uint8_t ttvn;
-	atomic_t refcount;
-	struct rcu_head rcu;
 	struct hlist_node list;
-};
-
-#ifdef CONFIG_BATMAN_ADV_BLA
-struct batadv_backbone_gw {
-	uint8_t orig[ETH_ALEN];
-	short vid;		/* used VLAN ID */
-	struct hlist_node hash_entry;
-	struct batadv_priv *bat_priv;
-	unsigned long lasttime;	/* last time we heard of this backbone gw */
-	atomic_t wait_periods;
-	atomic_t request_sent;
 	atomic_t refcount;
 	struct rcu_head rcu;
-	uint16_t crc;		/* crc checksum over all claims */
-};
-
-struct batadv_claim {
-	uint8_t addr[ETH_ALEN];
-	short vid;
-	struct batadv_backbone_gw *backbone_gw;
-	unsigned long lasttime;	/* last time we heard of claim (locals only) */
-	struct rcu_head rcu;
-	atomic_t refcount;
-	struct hlist_node hash_entry;
 };
-#endif
 
+/**
+ * struct batadv_tt_change_node - structure for tt changes occured
+ * @list: list node for batadv_priv_tt::changes_list
+ * @change: holds the actual translation table diff data
+ */
 struct batadv_tt_change_node {
 	struct list_head list;
 	struct batadv_tt_change change;
 };
 
+/**
+ * struct batadv_tt_req_node - data to keep track of the tt requests in flight
+ * @addr: mac address address of the originator this request was sent to
+ * @issued_at: timestamp used for purging stale tt requests
+ * @list: list node for batadv_priv_tt::req_list
+ */
 struct batadv_tt_req_node {
 	uint8_t addr[ETH_ALEN];
 	unsigned long issued_at;
 	struct list_head list;
 };
 
+/**
+ * struct batadv_tt_roam_node - roaming client data
+ * @addr: mac address of the client in the roaming phase
+ * @counter: number of allowed roaming events per client within a single
+ *  OGM interval (changes are committed with each OGM)
+ * @first_time: timestamp used for purging stale roaming node entries
+ * @list: list node for batadv_priv_tt::roam_list
+ */
 struct batadv_tt_roam_node {
 	uint8_t addr[ETH_ALEN];
 	atomic_t counter;
@@ -397,8 +701,19 @@ struct batadv_tt_roam_node {
 	struct list_head list;
 };
 
-/*	forw_packet - structure for forw_list maintaining packets to be
- *	              send/forwarded
+/**
+ * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
+ * @list: list node for batadv_socket_client::queue_list
+ * @send_time: execution time for delayed_work (packet sending)
+ * @own: bool for locally generated packets (local OGMs are re-scheduled after
+ *  sending)
+ * @skb: bcast packet's skb buffer
+ * @packet_len: size of aggregated OGM packet inside the skb buffer
+ * @direct_link_flags: direct link flags for aggregated OGM packets
+ * @num_packets: counter for bcast packet retransmission
+ * @delayed_work: work queue callback item for packet sending
+ * @if_incoming: pointer incoming hard-iface or primary iface if locally
+ *  generated packet
  */
 struct batadv_forw_packet {
 	struct hlist_node list;
@@ -412,72 +727,98 @@ struct batadv_forw_packet {
 	struct batadv_hard_iface *if_incoming;
 };
 
-/* While scanning for vis-entries of a particular vis-originator
- * this list collects its interfaces to create a subgraph/cluster
- * out of them later
+/**
+ * struct batadv_frag_packet_list_entry - storage for fragment packet
+ * @list: list node for orig_node::frag_list
+ * @seqno: sequence number of the fragment
+ * @skb: fragment's skb buffer
  */
-struct batadv_if_list_entry {
-	uint8_t addr[ETH_ALEN];
-	bool primary;
-	struct hlist_node list;
-};
-
-struct batadv_debug_log {
-	char log_buff[BATADV_LOG_BUF_LEN];
-	unsigned long log_start;
-	unsigned long log_end;
-	spinlock_t lock; /* protects log_buff, log_start and log_end */
-	wait_queue_head_t queue_wait;
-};
-
 struct batadv_frag_packet_list_entry {
 	struct list_head list;
 	uint16_t seqno;
 	struct sk_buff *skb;
 };
 
+/**
+ * struct batadv_vis_info - local data for vis information
+ * @first_seen: timestamp used for purging stale vis info entries
+ * @recv_list: List of server-neighbors we have received this packet from. This
+ *  packet should not be re-forward to them again. List elements are struct
+ *  batadv_vis_recvlist_node
+ * @send_list: list of packets to be forwarded
+ * @refcount: number of contexts the object is used
+ * @hash_entry: hlist node for batadv_priv_vis::hash
+ * @bat_priv: pointer to soft_iface this orig node belongs to
+ * @skb_packet: contains the vis packet
+ */
 struct batadv_vis_info {
 	unsigned long first_seen;
-	/* list of server-neighbors we received a vis-packet
-	 * from.  we should not reply to them.
-	 */
 	struct list_head recv_list;
 	struct list_head send_list;
 	struct kref refcount;
 	struct hlist_node hash_entry;
 	struct batadv_priv *bat_priv;
-	/* this packet might be part of the vis send queue. */
 	struct sk_buff *skb_packet;
-	/* vis_info may follow here */
 } __packed;
 
+/**
+ * struct batadv_vis_info_entry - contains link information for vis
+ * @src: source MAC of the link, all zero for local TT entry
+ * @dst: destination MAC of the link, client mac address for local TT entry
+ * @quality: transmission quality of the link, or 0 for local TT entry
+ */
 struct batadv_vis_info_entry {
 	uint8_t  src[ETH_ALEN];
 	uint8_t  dest[ETH_ALEN];
-	uint8_t  quality;	/* quality = 0 client */
+	uint8_t  quality;
 } __packed;
 
-struct batadv_recvlist_node {
+/**
+ * struct batadv_vis_recvlist_node - list entry for batadv_vis_info::recv_list
+ * @list: list node for batadv_vis_info::recv_list
+ * @mac: MAC address of the originator from where the vis_info was received
+ */
+struct batadv_vis_recvlist_node {
 	struct list_head list;
 	uint8_t mac[ETH_ALEN];
 };
 
+/**
+ * struct batadv_vis_if_list_entry - auxiliary data for vis data generation
+ * @addr: MAC address of the interface
+ * @primary: true if this interface is the primary interface
+ * @list: list node the interface list
+ *
+ * While scanning for vis-entries of a particular vis-originator
+ * this list collects its interfaces to create a subgraph/cluster
+ * out of them later
+ */
+struct batadv_vis_if_list_entry {
+	uint8_t addr[ETH_ALEN];
+	bool primary;
+	struct hlist_node list;
+};
+
+/**
+ * struct batadv_algo_ops - mesh algorithm callbacks
+ * @list: list node for the batadv_algo_list
+ * @name: name of the algorithm
+ * @bat_iface_enable: init routing info when hard-interface is enabled
+ * @bat_iface_disable: de-init routing info when hard-interface is disabled
+ * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
+ *  belonging to this hard-interface
+ * @bat_primary_iface_set: called when primary interface is selected / changed
+ * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
+ * @bat_ogm_emit: send scheduled OGM
+ */
 struct batadv_algo_ops {
 	struct hlist_node list;
 	char *name;
-	/* init routing info when hard-interface is enabled */
 	int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
-	/* de-init routing info when hard-interface is disabled */
 	void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
-	/* (re-)init mac addresses of the protocol information
-	 * belonging to this hard-interface
-	 */
 	void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
-	/* called when primary interface is selected / changed */
 	void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
-	/* prepare a new outgoing OGM for the send queue */
 	void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
-	/* send scheduled OGM */
 	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
 };
 
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 10aff49fcf2..50e079f00be 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Andreas Langer
  *
@@ -133,7 +133,6 @@ batadv_frag_search_packet(struct list_head *head,
 	is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD);
 
 	list_for_each_entry(tfp, head, list) {
-
 		if (!tfp->skb)
 			continue;
 
@@ -162,7 +161,6 @@ void batadv_frag_list_free(struct list_head *head)
 	struct batadv_frag_packet_list_entry *pf, *tmp_pf;
 
 	if (!list_empty(head)) {
-
 		list_for_each_entry_safe(pf, tmp_pf, head, list) {
 			kfree_skb(pf->skb);
 			list_del(&pf->list);
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index 61abba58bd8..429cf8a4a31 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
  *
  * Andreas Langer
  *
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 0f65a9de5f7..22d2785177d 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -28,14 +28,15 @@
 
 #define BATADV_MAX_VIS_PACKET_SIZE 1000
 
-static void batadv_start_vis_timer(struct batadv_priv *bat_priv);
+/* hash class keys */
+static struct lock_class_key batadv_vis_hash_lock_class_key;
 
 /* free the info */
 static void batadv_free_info(struct kref *ref)
 {
 	struct batadv_vis_info *info;
 	struct batadv_priv *bat_priv;
-	struct batadv_recvlist_node *entry, *tmp;
+	struct batadv_vis_recvlist_node *entry, *tmp;
 
 	info = container_of(ref, struct batadv_vis_info, refcount);
 	bat_priv = info->bat_priv;
@@ -126,7 +127,7 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface,
 					     struct hlist_head *if_list,
 					     bool primary)
 {
-	struct batadv_if_list_entry *entry;
+	struct batadv_vis_if_list_entry *entry;
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(entry, pos, if_list, list) {
@@ -146,7 +147,7 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface,
 static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
 					  const struct hlist_head *if_list)
 {
-	struct batadv_if_list_entry *entry;
+	struct batadv_vis_if_list_entry *entry;
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(entry, pos, if_list, list) {
@@ -196,7 +197,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
 					 struct batadv_vis_info_entry *entries)
 {
 	int i;
-	struct batadv_if_list_entry *entry;
+	struct batadv_vis_if_list_entry *entry;
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(entry, pos, list, list) {
@@ -222,7 +223,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq,
 	struct batadv_vis_packet *packet;
 	uint8_t *entries_pos;
 	struct batadv_vis_info_entry *entries;
-	struct batadv_if_list_entry *entry;
+	struct batadv_vis_if_list_entry *entry;
 	struct hlist_node *pos, *n;
 
 	HLIST_HEAD(vis_if_list);
@@ -304,7 +305,7 @@ static void batadv_send_list_del(struct batadv_vis_info *info)
 static void batadv_recv_list_add(struct batadv_priv *bat_priv,
 				 struct list_head *recv_list, const char *mac)
 {
-	struct batadv_recvlist_node *entry;
+	struct batadv_vis_recvlist_node *entry;
 
 	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry)
@@ -321,7 +322,7 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv,
 				  const struct list_head *recv_list,
 				  const char *mac)
 {
-	const struct batadv_recvlist_node *entry;
+	const struct batadv_vis_recvlist_node *entry;
 
 	spin_lock_bh(&bat_priv->vis.list_lock);
 	list_for_each_entry(entry, recv_list, list) {
@@ -827,7 +828,9 @@ static void batadv_send_vis_packets(struct work_struct *work)
 		kref_put(&info->refcount, batadv_free_info);
 	}
 	spin_unlock_bh(&bat_priv->vis.hash_lock);
-	batadv_start_vis_timer(bat_priv);
+
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
+			   msecs_to_jiffies(BATADV_VIS_INTERVAL));
 }
 
 /* init the vis server. this may only be called when if_list is already
@@ -852,6 +855,9 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
 		goto err;
 	}
 
+	batadv_hash_set_lock_class(bat_priv->vis.hash,
+				   &batadv_vis_hash_lock_class_key);
+
 	bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
 	if (!bat_priv->vis.my_info)
 		goto err;
@@ -894,7 +900,11 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
 	}
 
 	spin_unlock_bh(&bat_priv->vis.hash_lock);
-	batadv_start_vis_timer(bat_priv);
+
+	INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets);
+	queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
+			   msecs_to_jiffies(BATADV_VIS_INTERVAL));
+
 	return 0;
 
 free_info:
@@ -931,11 +941,3 @@ void batadv_vis_quit(struct batadv_priv *bat_priv)
 	bat_priv->vis.my_info = NULL;
 	spin_unlock_bh(&bat_priv->vis.hash_lock);
 }
-
-/* schedule packets for (re)transmission */
-static void batadv_start_vis_timer(struct batadv_priv *bat_priv)
-{
-	INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets);
-	queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
-			   msecs_to_jiffies(BATADV_VIS_INTERVAL));
-}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
index 873282fa86d..ad92b0e3c23 100644
--- a/net/batman-adv/vis.h
+++ b/net/batman-adv/vis.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 201fdf73720..b23e2713fea 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -257,7 +257,7 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s)
 {
 	BT_DBG("session %p state %ld", s, s->state);
 
-	if (timer_pending(&s->timer) && del_timer(&s->timer))
+	if (del_timer(&s->timer))
 		rfcomm_session_put(s);
 }
 
@@ -285,7 +285,7 @@ static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d)
 {
 	BT_DBG("dlc %p state %ld", d, d->state);
 
-	if (timer_pending(&d->timer) && del_timer(&d->timer))
+	if (del_timer(&d->timer))
 		rfcomm_dlc_put(d);
 }
 
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 6dee7bf648a..aa0d3b2f1bb 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -46,3 +46,17 @@ config BRIDGE_IGMP_SNOOPING
 	  Say N to exclude this support and reduce the binary size.
 
 	  If unsure, say Y.
+
+config BRIDGE_VLAN_FILTERING
+	bool "VLAN filtering"
+	depends on BRIDGE
+	depends on VLAN_8021Q
+	default n
+	---help---
+	  If you say Y here, then the Ethernet bridge will be able selectively
+	  receive and forward traffic based on VLAN information in the packet
+	  any VLAN information configured on the bridge port or bridge device.
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say Y.
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e859098f5ee..e85498b2f16 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -14,4 +14,6 @@ bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
 bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
+bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 7c78e264019..d5f1d3fd4b2 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -30,6 +30,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
+	u16 vid = 0;
 
 	rcu_read_lock();
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -45,6 +46,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	brstats->tx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
+	if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid))
+		goto out;
+
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
 	skb_reset_mac_header(skb);
@@ -67,7 +71,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			br_multicast_deliver(mdst, skb);
 		else
 			br_flood_deliver(br, skb);
-	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
+	} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
 		br_deliver(dst->dst, skb);
 	else
 		br_flood_deliver(br, skb);
@@ -172,12 +176,10 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 
 	spin_lock_bh(&br->lock);
 	if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
-		dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 		memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
 		br_fdb_change_mac_address(br, addr->sa_data);
 		br_stp_change_bridge_id(br, addr->sa_data);
 	}
-	br->flags |= BR_SET_MAC_ADDR;
 	spin_unlock_bh(&br->lock);
 
 	return 0;
@@ -185,10 +187,10 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 
 static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, "bridge");
-	strcpy(info->version, BR_VERSION);
-	strcpy(info->fw_version, "N/A");
-	strcpy(info->bus_info, "N/A");
+	strlcpy(info->driver, "bridge", sizeof(info->driver));
+	strlcpy(info->version, BR_VERSION, sizeof(info->version));
+	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+	strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
 }
 
 static netdev_features_t br_fix_features(struct net_device *dev,
@@ -267,7 +269,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	__netpoll_free_rcu(np);
+	__netpoll_free_async(np);
 }
 
 #endif
@@ -315,6 +317,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_fdb_dump		 = br_fdb_dump,
 	.ndo_bridge_getlink	 = br_getlink,
 	.ndo_bridge_setlink	 = br_setlink,
+	.ndo_bridge_dellink	 = br_dellink,
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d9576e6de2b..8117900af4d 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,11 +23,12 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <asm/unaligned.h>
+#include <linux/if_vlan.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		      const unsigned char *addr);
+		      const unsigned char *addr, u16 vid);
 static void fdb_notify(struct net_bridge *br,
 		       const struct net_bridge_fdb_entry *, int);
 
@@ -67,11 +68,11 @@ static inline int has_expired(const struct net_bridge *br,
 		time_before_eq(fdb->updated + hold_time(br), jiffies);
 }
 
-static inline int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
 {
-	/* use 1 byte of OUI cnd 3 bytes of NIC */
+	/* use 1 byte of OUI and 3 bytes of NIC */
 	u32 key = get_unaligned((u32 *)(mac + 2));
-	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
+	return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
 }
 
 static void fdb_rcu_free(struct rcu_head *head)
@@ -91,6 +92,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
 	struct net_bridge *br = p->br;
+	bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false;
 	int i;
 
 	spin_lock_bh(&br->hash_lock);
@@ -105,10 +107,12 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 			if (f->dst == p && f->is_local) {
 				/* maybe another port has same hw addr? */
 				struct net_bridge_port *op;
+				u16 vid = f->vlan_id;
 				list_for_each_entry(op, &br->port_list, list) {
 					if (op != p &&
 					    ether_addr_equal(op->dev->dev_addr,
-							     f->addr.addr)) {
+							     f->addr.addr) &&
+					    nbp_vlan_find(op, vid)) {
 						f->dst = op;
 						goto insert;
 					}
@@ -116,27 +120,55 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 
 				/* delete old one */
 				fdb_delete(br, f);
-				goto insert;
+insert:
+				/* insert new address,  may fail if invalid
+				 * address or dup.
+				 */
+				fdb_insert(br, p, newaddr, vid);
+
+				/* if this port has no vlan information
+				 * configured, we can safely be done at
+				 * this point.
+				 */
+				if (no_vlan)
+					goto done;
 			}
 		}
 	}
- insert:
-	/* insert new address,  may fail if invalid address or dup. */
-	fdb_insert(br, p, newaddr);
 
+done:
 	spin_unlock_bh(&br->hash_lock);
 }
 
 void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 {
 	struct net_bridge_fdb_entry *f;
+	struct net_port_vlans *pv;
+	u16 vid = 0;
 
 	/* If old entry was unassociated with any port, then delete it. */
-	f = __br_fdb_get(br, br->dev->dev_addr);
+	f = __br_fdb_get(br, br->dev->dev_addr, 0);
 	if (f && f->is_local && !f->dst)
 		fdb_delete(br, f);
 
-	fdb_insert(br, NULL, newaddr);
+	fdb_insert(br, NULL, newaddr, 0);
+
+	/* Now remove and add entries for every VLAN configured on the
+	 * bridge.  This function runs under RTNL so the bitmap will not
+	 * change from under us.
+	 */
+	pv = br_get_vlan_info(br);
+	if (!pv)
+		return;
+
+	for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid);
+	     vid < BR_VLAN_BITMAP_LEN;
+	     vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) {
+		f = __br_fdb_get(br, br->dev->dev_addr, vid);
+		if (f && f->is_local && !f->dst)
+			fdb_delete(br, f);
+		fdb_insert(br, NULL, newaddr, vid);
+	}
 }
 
 void br_fdb_cleanup(unsigned long _data)
@@ -231,13 +263,16 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 
 /* No locking or refcounting, assumes caller has rcu_read_lock */
 struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
-					  const unsigned char *addr)
+					  const unsigned char *addr,
+					  __u16 vid)
 {
 	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr)) {
+	hlist_for_each_entry_rcu(fdb, h,
+				&br->hash[br_mac_hash(addr, vid)], hlist) {
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid) {
 			if (unlikely(has_expired(br, fdb)))
 				break;
 			return fdb;
@@ -261,7 +296,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	if (!port)
 		ret = 0;
 	else {
-		fdb = __br_fdb_get(port->br, addr);
+		fdb = __br_fdb_get(port->br, addr, 0);
 		ret = fdb && fdb->dst && fdb->dst->dev != dev &&
 			fdb->dst->state == BR_STATE_FORWARDING;
 	}
@@ -325,26 +360,30 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 }
 
 static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
-					     const unsigned char *addr)
+					     const unsigned char *addr,
+					     __u16 vid)
 {
 	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
 	hlist_for_each_entry(fdb, h, head, hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr))
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid)
 			return fdb;
 	}
 	return NULL;
 }
 
 static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
-						 const unsigned char *addr)
+						 const unsigned char *addr,
+						 __u16 vid)
 {
 	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
 	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr))
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid)
 			return fdb;
 	}
 	return NULL;
@@ -352,7 +391,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
 
 static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 					       struct net_bridge_port *source,
-					       const unsigned char *addr)
+					       const unsigned char *addr,
+					       __u16 vid)
 {
 	struct net_bridge_fdb_entry *fdb;
 
@@ -360,6 +400,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 	if (fdb) {
 		memcpy(fdb->addr.addr, addr, ETH_ALEN);
 		fdb->dst = source;
+		fdb->vlan_id = vid;
 		fdb->is_local = 0;
 		fdb->is_static = 0;
 		fdb->updated = fdb->used = jiffies;
@@ -369,15 +410,15 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 }
 
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		  const unsigned char *addr)
+		  const unsigned char *addr, u16 vid)
 {
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vid);
 	if (fdb) {
 		/* it is okay to have multiple ports with same
 		 * address, just use the first one.
@@ -390,7 +431,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		fdb_delete(br, fdb);
 	}
 
-	fdb = fdb_create(head, source, addr);
+	fdb = fdb_create(head, source, addr, vid);
 	if (!fdb)
 		return -ENOMEM;
 
@@ -401,20 +442,20 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 
 /* Add entry for local address of interface */
 int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		  const unsigned char *addr)
+		  const unsigned char *addr, u16 vid)
 {
 	int ret;
 
 	spin_lock_bh(&br->hash_lock);
-	ret = fdb_insert(br, source, addr);
+	ret = fdb_insert(br, source, addr, vid);
 	spin_unlock_bh(&br->hash_lock);
 	return ret;
 }
 
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-		   const unsigned char *addr)
+		   const unsigned char *addr, u16 vid)
 {
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
 	/* some users want to always flood. */
@@ -426,7 +467,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	      source->state == BR_STATE_FORWARDING))
 		return;
 
-	fdb = fdb_find_rcu(head, addr);
+	fdb = fdb_find_rcu(head, addr, vid);
 	if (likely(fdb)) {
 		/* attempt to update an entry for a local interface */
 		if (unlikely(fdb->is_local)) {
@@ -441,8 +482,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		}
 	} else {
 		spin_lock(&br->hash_lock);
-		if (likely(!fdb_find(head, addr))) {
-			fdb = fdb_create(head, source, addr);
+		if (likely(!fdb_find(head, addr, vid))) {
+			fdb = fdb_create(head, source, addr, vid);
 			if (fdb)
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
@@ -495,6 +536,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ci.ndm_refcnt	 = 0;
 	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
+
+	if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -506,6 +551,7 @@ static inline size_t fdb_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
@@ -571,18 +617,18 @@ out:
 
 /* Update (create or replace) forwarding database entry */
 static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-			 __u16 state, __u16 flags)
+			 __u16 state, __u16 flags, __u16 vid)
 {
 	struct net_bridge *br = source->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vid);
 	if (fdb == NULL) {
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
-		fdb = fdb_create(head, source, addr);
+		fdb = fdb_create(head, source, addr, vid);
 		if (!fdb)
 			return -ENOMEM;
 		fdb_notify(br, fdb, RTM_NEWNEIGH);
@@ -607,6 +653,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 	return 0;
 }
 
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
+	       const unsigned char *addr, u16 nlh_flags, u16 vid)
+{
+	int err = 0;
+
+	if (ndm->ndm_flags & NTF_USE) {
+		rcu_read_lock();
+		br_fdb_update(p->br, p, addr, vid);
+		rcu_read_unlock();
+	} else {
+		spin_lock_bh(&p->br->hash_lock);
+		err = fdb_add_entry(p, addr, ndm->ndm_state,
+				    nlh_flags, vid);
+		spin_unlock_bh(&p->br->hash_lock);
+	}
+
+	return err;
+}
+
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
@@ -614,12 +679,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 {
 	struct net_bridge_port *p;
 	int err = 0;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
 	}
 
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
+
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
+
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
@@ -627,40 +709,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (ndm->ndm_flags & NTF_USE) {
-		rcu_read_lock();
-		br_fdb_update(p->br, p, addr);
-		rcu_read_unlock();
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_NEWNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
+
+		/* VID was specified, so use it. */
+		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
-		spin_unlock_bh(&p->br->hash_lock);
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+			goto out;
+		}
+
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		while (vid < BR_VLAN_BITMAP_LEN) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+			if (err)
+				goto out;
+			vid = find_next_bit(pv->vlan_bitmap,
+					    BR_VLAN_BITMAP_LEN, vid+1);
+		}
 	}
 
+out:
 	return err;
 }
 
-static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
+int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
+		       u16 vlan)
 {
-	struct net_bridge *br = p->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vlan);
 	if (!fdb)
 		return -ENOENT;
 
-	fdb_delete(p->br, fdb);
+	fdb_delete(br, fdb);
 	return 0;
 }
 
+static int __br_fdb_delete(struct net_bridge_port *p,
+			   const unsigned char *addr, u16 vid)
+{
+	int err;
+
+	spin_lock_bh(&p->br->hash_lock);
+	err = fdb_delete_by_addr(p->br, addr, vid);
+	spin_unlock_bh(&p->br->hash_lock);
+
+	return err;
+}
+
 /* Remove neighbor entry with RTM_DELNEIGH */
-int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+		  struct net_device *dev,
 		  const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 	int err;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
+
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
 
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
@@ -668,9 +800,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
 		return -EINVAL;
 	}
 
-	spin_lock_bh(&p->br->hash_lock);
-	err = fdb_delete_by_addr(p, addr);
-	spin_unlock_bh(&p->br->hash_lock);
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_DELNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
+
+		err = __br_fdb_delete(p, addr, vid);
+	} else {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_delete(p, addr, 0);
+			goto out;
+		}
 
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		err = -ENOENT;
+		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		while (vid < BR_VLAN_BITMAP_LEN) {
+			err &= __br_fdb_delete(p, addr, vid);
+			vid = find_next_bit(pv->vlan_bitmap,
+					    BR_VLAN_BITMAP_LEN, vid+1);
+		}
+	}
+out:
 	return err;
 }
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 02015a505d2..092b20e4ee4 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -31,6 +31,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 				 const struct sk_buff *skb)
 {
 	return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
+		br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) &&
 		p->state == BR_STATE_FORWARDING);
 }
 
@@ -63,6 +64,10 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+	skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+	if (!skb)
+		return;
+
 	skb->dev = to->dev;
 
 	if (unlikely(netpoll_tx_running(to->br->dev))) {
@@ -88,6 +93,10 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 		return;
 	}
 
+	skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+	if (!skb)
+		return;
+
 	indev = skb->dev;
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 1c8fdc3558c..ef1b91431c6 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -23,6 +23,7 @@
 #include <linux/if_ether.h>
 #include <linux/slab.h>
 #include <net/sock.h>
+#include <linux/if_vlan.h>
 
 #include "br_private.h"
 
@@ -66,14 +67,14 @@ void br_port_carrier_check(struct net_bridge_port *p)
 	struct net_device *dev = p->dev;
 	struct net_bridge *br = p->br;
 
-	if (netif_running(dev) && netif_carrier_ok(dev))
+	if (netif_running(dev) && netif_oper_up(dev))
 		p->path_cost = port_cost(dev);
 
 	if (!netif_running(br->dev))
 		return;
 
 	spin_lock_bh(&br->lock);
-	if (netif_running(dev) && netif_carrier_ok(dev)) {
+	if (netif_running(dev) && netif_oper_up(dev)) {
 		if (p->state == BR_STATE_DISABLED)
 			br_stp_enable_port(p);
 	} else {
@@ -139,6 +140,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	br_ifinfo_notify(RTM_DELLINK, p);
 
+	nbp_vlan_flush(p);
 	br_fdb_delete_by_port(br, p, 1);
 
 	list_del_rcu(&p->list);
@@ -148,7 +150,7 @@ static void del_nbp(struct net_bridge_port *p)
 	netdev_rx_handler_unregister(dev);
 	synchronize_net();
 
-	netdev_set_master(dev, NULL);
+	netdev_upper_dev_unlink(dev, br->dev);
 
 	br_multicast_del_port(p);
 
@@ -364,13 +366,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
 		goto err3;
 
-	err = netdev_set_master(dev, br->dev);
+	err = netdev_master_upper_dev_link(dev, br->dev);
 	if (err)
-		goto err3;
+		goto err4;
 
 	err = netdev_rx_handler_register(dev, br_handle_frame, p);
 	if (err)
-		goto err4;
+		goto err5;
 
 	dev->priv_flags |= IFF_BRIDGE_PORT;
 
@@ -383,7 +385,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);
 
-	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
+	if (netif_running(dev) && netif_oper_up(dev) &&
 	    (br->dev->flags & IFF_UP))
 		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
@@ -395,15 +397,17 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
 
-	if (br_fdb_insert(br, p, dev->dev_addr))
+	if (br_fdb_insert(br, p, dev->dev_addr, 0))
 		netdev_err(dev, "failed insert local address bridge forwarding table\n");
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
 
+err5:
+	netdev_upper_dev_unlink(dev, br->dev);
 err4:
-	netdev_set_master(dev, NULL);
+	br_netpoll_disable(p);
 err3:
 	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 4b34207419b..48033015189 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -17,6 +17,7 @@
 #include <linux/etherdevice.h>
 #include <linux/netfilter_bridge.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
 #include "br_private.h"
 
 /* Hook for brouter */
@@ -34,6 +35,20 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	brstats->rx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
+	/* Bridge is just like any other port.  Make sure the
+	 * packet is allowed except in promisc modue when someone
+	 * may be running packet capture.
+	 */
+	if (!(brdev->flags & IFF_PROMISC) &&
+	    !br_allowed_egress(br, br_get_vlan_info(br), skb)) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	skb = br_handle_vlan(br, br_get_vlan_info(br), skb);
+	if (!skb)
+		return NET_RX_DROP;
+
 	indev = skb->dev;
 	skb->dev = brdev;
 
@@ -50,13 +65,17 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct sk_buff *skb2;
+	u16 vid = 0;
 
 	if (!p || p->state == BR_STATE_DISABLED)
 		goto drop;
 
+	if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
+		goto drop;
+
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
-	br_fdb_update(br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
 
 	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
 	    br_multicast_rcv(br, p, skb))
@@ -91,7 +110,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
 			skb2 = skb;
 
 		br->dev->stats.multicast++;
-	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
+	} else if ((dst = __br_fdb_get(br, dest, vid)) &&
+			dst->is_local) {
 		skb2 = skb;
 		/* Do not forward the packet since it's local. */
 		skb = NULL;
@@ -119,8 +139,10 @@ drop:
 static int br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+	u16 vid = 0;
 
-	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+	br_vlan_get_tag(skb, &vid);
+	br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
 	return 0;	 /* process further */
 }
 
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index acc9f4cc18f..38991e03646 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -272,9 +272,6 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net_device *dev;
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL);
 	if (err < 0)
 		return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 5391ca43336..7d886b0a8b7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -39,6 +39,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 {
 	if (a->proto != b->proto)
 		return 0;
+	if (a->vid != b->vid)
+		return 0;
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
@@ -50,16 +52,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	return 0;
 }
 
-static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
+static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
+				__u16 vid)
 {
-	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
+	return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
-				const struct in6_addr *ip)
+				const struct in6_addr *ip,
+				__u16 vid)
 {
-	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
+	return jhash_2words(ipv6_addr_hash(ip), vid,
+			    mdb->secret) & (mdb->max - 1);
 }
 #endif
 
@@ -68,10 +73,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 {
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
-		return __br_ip4_hash(mdb, ip->u.ip4);
+		return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return __br_ip6_hash(mdb, &ip->u.ip6);
+		return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
 #endif
 	}
 	return 0;
@@ -101,24 +106,27 @@ struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb,
 }
 
 static struct net_bridge_mdb_entry *br_mdb_ip4_get(
-	struct net_bridge_mdb_htable *mdb, __be32 dst)
+	struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid)
 {
 	struct br_ip br_dst;
 
 	br_dst.u.ip4 = dst;
 	br_dst.proto = htons(ETH_P_IP);
+	br_dst.vid = vid;
 
 	return br_mdb_ip_get(mdb, &br_dst);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct net_bridge_mdb_entry *br_mdb_ip6_get(
-	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
+	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
+	__u16 vid)
 {
 	struct br_ip br_dst;
 
 	br_dst.u.ip6 = *dst;
 	br_dst.proto = htons(ETH_P_IPV6);
+	br_dst.vid = vid;
 
 	return br_mdb_ip_get(mdb, &br_dst);
 }
@@ -694,7 +702,8 @@ err:
 
 static int br_ip4_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
-				      __be32 group)
+				      __be32 group,
+				      __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -703,6 +712,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
+	br_group.vid = vid;
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -710,7 +720,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
-				      const struct in6_addr *group)
+				      const struct in6_addr *group,
+				      __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -719,6 +730,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
+	br_group.vid = vid;
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -895,10 +907,12 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	int type;
 	int err = 0;
 	__be32 group;
+	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*ih)))
 		return -EINVAL;
 
+	br_vlan_get_tag(skb, &vid);
 	ih = igmpv3_report_hdr(skb);
 	num = ntohs(ih->ngrec);
 	len = sizeof(*ih);
@@ -930,7 +944,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip4_multicast_add_group(br, port, group);
+		err = br_ip4_multicast_add_group(br, port, group, vid);
 		if (err)
 			break;
 	}
@@ -949,10 +963,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 	int len;
 	int num;
 	int err = 0;
+	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
 		return -EINVAL;
 
+	br_vlan_get_tag(skb, &vid);
 	icmp6h = icmp6_hdr(skb);
 	num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
 	len = sizeof(*icmp6h);
@@ -990,7 +1006,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
+		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
+						 vid);
 		if (!err)
 			break;
 	}
@@ -1074,6 +1091,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	unsigned long now = jiffies;
 	__be32 group;
 	int err = 0;
+	u16 vid = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -1108,7 +1126,8 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
+	br_vlan_get_tag(skb, &vid);
+	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
 		goto out;
 
@@ -1149,6 +1168,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	unsigned long now = jiffies;
 	const struct in6_addr *group = NULL;
 	int err = 0;
+	u16 vid = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -1180,7 +1200,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
+	br_vlan_get_tag(skb, &vid);
+	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
 	if (!mp)
 		goto out;
 
@@ -1286,7 +1307,8 @@ out:
 
 static void br_ip4_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 __be32 group)
+					 __be32 group,
+					 __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -1295,6 +1317,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
+	br_group.vid = vid;
 
 	br_multicast_leave_group(br, port, &br_group);
 }
@@ -1302,7 +1325,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 const struct in6_addr *group)
+					 const struct in6_addr *group,
+					 __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -1311,6 +1335,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
+	br_group.vid = vid;
 
 	br_multicast_leave_group(br, port, &br_group);
 }
@@ -1326,6 +1351,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	unsigned int len;
 	unsigned int offset;
 	int err;
+	u16 vid = 0;
 
 	/* We treat OOM as packet loss for now. */
 	if (!pskb_may_pull(skb, sizeof(*iph)))
@@ -1386,6 +1412,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 
 	err = 0;
 
+	br_vlan_get_tag(skb2, &vid);
 	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	ih = igmp_hdr(skb2);
 
@@ -1393,7 +1420,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-		err = br_ip4_multicast_add_group(br, port, ih->group);
+		err = br_ip4_multicast_add_group(br, port, ih->group, vid);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		err = br_ip4_multicast_igmp3_report(br, port, skb2);
@@ -1402,7 +1429,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		err = br_ip4_multicast_query(br, port, skb2);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
-		br_ip4_multicast_leave_group(br, port, ih->group);
+		br_ip4_multicast_leave_group(br, port, ih->group, vid);
 		break;
 	}
 
@@ -1427,6 +1454,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	unsigned int len;
 	int offset;
 	int err;
+	u16 vid = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 		return -EINVAL;
@@ -1510,6 +1538,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 
 	err = 0;
 
+	br_vlan_get_tag(skb, &vid);
 	BR_INPUT_SKB_CB(skb)->igmp = 1;
 
 	switch (icmp6_type) {
@@ -1522,7 +1551,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 		}
 		mld = (struct mld_msg *)skb_transport_header(skb2);
 		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
+		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid);
 		break;
 	    }
 	case ICMPV6_MLD2_REPORT:
@@ -1539,7 +1568,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 			goto out;
 		}
 		mld = (struct mld_msg *)skb_transport_header(skb2);
-		br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
+		br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid);
 	    }
 	}
 
@@ -1608,7 +1637,6 @@ void br_multicast_init(struct net_bridge *br)
 		    br_multicast_querier_expired, (unsigned long)br);
 	setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
 		    (unsigned long)br);
-	br_mdb_init();
 }
 
 void br_multicast_open(struct net_bridge *br)
@@ -1633,7 +1661,6 @@ void br_multicast_stop(struct net_bridge *br)
 	del_timer_sync(&br->multicast_querier_timer);
 	del_timer_sync(&br->multicast_query_timer);
 
-	br_mdb_uninit();
 	spin_lock_bh(&br->multicast_lock);
 	mdb = mlock_dereference(br->mdb, br);
 	if (!mdb)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 97ba0189c6f..27aa3ee517c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -64,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb,
  * Create one netlink message for one interface
  * Contains port and master info as well as carrier and bridge state.
  */
-static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port,
-			  u32 pid, u32 seq, int event, unsigned int flags)
+static int br_fill_ifinfo(struct sk_buff *skb,
+			  const struct net_bridge_port *port,
+			  u32 pid, u32 seq, int event, unsigned int flags,
+			  u32 filter_mask, const struct net_device *dev)
 {
-	const struct net_bridge *br = port->br;
-	const struct net_device *dev = port->dev;
+	const struct net_bridge *br;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
+	if (port)
+		br = port->br;
+	else
+		br = netdev_priv(dev);
+
 	br_debug(br, "br_fill_info event %d port %s master %s\n",
 		     event, dev->name, br->dev->name);
 
@@ -98,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
 		goto nla_put_failure;
 
-	if (event == RTM_NEWLINK) {
+	if (event == RTM_NEWLINK && port) {
 		struct nlattr *nest
 			= nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
 
@@ -107,6 +114,48 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 		nla_nest_end(skb, nest);
 	}
 
+	/* Check if  the VID information is requested */
+	if (filter_mask & RTEXT_FILTER_BRVLAN) {
+		struct nlattr *af;
+		const struct net_port_vlans *pv;
+		struct bridge_vlan_info vinfo;
+		u16 vid;
+		u16 pvid;
+
+		if (port)
+			pv = nbp_get_vlan_info(port);
+		else
+			pv = br_get_vlan_info(br);
+
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+			goto done;
+
+		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		if (!af)
+			goto nla_put_failure;
+
+		pvid = br_get_pvid(pv);
+		for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		     vid < BR_VLAN_BITMAP_LEN;
+		     vid = find_next_bit(pv->vlan_bitmap,
+					 BR_VLAN_BITMAP_LEN, vid+1)) {
+			vinfo.vid = vid;
+			vinfo.flags = 0;
+			if (vid == pvid)
+				vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
+
+			if (test_bit(vid, pv->untagged_bitmap))
+				vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+			if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+				    sizeof(vinfo), &vinfo))
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(skb, af);
+	}
+
+done:
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -119,10 +168,14 @@ nla_put_failure:
  */
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
-	struct net *net = dev_net(port->dev);
+	struct net *net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	if (!port)
+		return;
+
+	net = dev_net(port->dev);
 	br_debug(port->br, "port %u(%s) event %d\n",
 		 (unsigned int)port->port_no, port->dev->name, event);
 
@@ -130,7 +183,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	if (skb == NULL)
 		goto errout;
 
-	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in br_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -144,24 +197,85 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
+
 /*
  * Dump information about all ports, in response to GETLINK
  */
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-	       struct net_device *dev)
+	       struct net_device *dev, u32 filter_mask)
 {
 	int err = 0;
 	struct net_bridge_port *port = br_port_get_rcu(dev);
 
-	/* not a bridge port */
-	if (!port)
+	/* not a bridge port and  */
+	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
 		goto out;
 
-	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
+	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
+			     filter_mask, dev);
 out:
 	return err;
 }
 
+static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
+	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct bridge_vlan_info), },
+};
+
+static int br_afspec(struct net_bridge *br,
+		     struct net_bridge_port *p,
+		     struct nlattr *af_spec,
+		     int cmd)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	int err = 0;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
+		struct bridge_vlan_info *vinfo;
+
+		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+
+		if (vinfo->vid >= VLAN_N_VID)
+			return -EINVAL;
+
+		switch (cmd) {
+		case RTM_SETLINK:
+			if (p) {
+				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+				if (err)
+					break;
+
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					err = br_vlan_add(p->br, vinfo->vid,
+							  vinfo->flags);
+			} else
+				err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+
+			if (err)
+				break;
+
+			break;
+
+		case RTM_DELLINK:
+			if (p) {
+				nbp_vlan_delete(p, vinfo->vid);
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					br_vlan_delete(p->br, vinfo->vid);
+			} else
+				br_vlan_delete(br, vinfo->vid);
+			break;
+		}
+	}
+
+	return err;
+}
+
 static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_STATE]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_COST]	= { .type = NLA_U32 },
@@ -181,8 +295,11 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
 	if (p->br->stp_enabled == BR_KERNEL_STP)
 		return -EBUSY;
 
+	/* if device is not up, change is not allowed
+	 * if link is not present, only allowable state is disabled
+	 */
 	if (!netif_running(p->dev) ||
-	    (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED))
+	    (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED))
 		return -ENETDOWN;
 
 	p->state = state;
@@ -238,6 +355,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
 	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
+	struct nlattr *afspec;
 	struct net_bridge_port *p;
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
 	int err;
@@ -245,38 +363,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 	ifm = nlmsg_data(nlh);
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
-	if (!protinfo)
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!protinfo && !afspec)
 		return 0;
 
 	p = br_port_get_rtnl(dev);
-	if (!p)
+	/* We want to accept dev as bridge itself if the AF_SPEC
+	 * is set to see if someone is setting vlan info on the brigde
+	 */
+	if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec))
 		return -EINVAL;
 
-	if (protinfo->nla_type & NLA_F_NESTED) {
-		err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
-				       protinfo, ifla_brport_policy);
+	if (p && protinfo) {
+		if (protinfo->nla_type & NLA_F_NESTED) {
+			err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
+					       protinfo, ifla_brport_policy);
+			if (err)
+				return err;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_setport(p, tb);
+			spin_unlock_bh(&p->br->lock);
+		} else {
+			/* Binary compatability with old RSTP */
+			if (nla_len(protinfo) < sizeof(u8))
+				return -EINVAL;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_set_port_state(p, nla_get_u8(protinfo));
+			spin_unlock_bh(&p->br->lock);
+		}
 		if (err)
-			return err;
-
-		spin_lock_bh(&p->br->lock);
-		err = br_setport(p, tb);
-		spin_unlock_bh(&p->br->lock);
-	} else {
-		/* Binary compatability with old RSTP */
-		if (nla_len(protinfo) < sizeof(u8))
-			return -EINVAL;
+			goto out;
+	}
 
-		spin_lock_bh(&p->br->lock);
-		err = br_set_port_state(p, nla_get_u8(protinfo));
-		spin_unlock_bh(&p->br->lock);
+	if (afspec) {
+		err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+				afspec, RTM_SETLINK);
 	}
 
 	if (err == 0)
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
+out:
 	return err;
 }
 
+/* Delete port information */
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
+{
+	struct ifinfomsg *ifm;
+	struct nlattr *afspec;
+	struct net_bridge_port *p;
+	int err;
+
+	ifm = nlmsg_data(nlh);
+
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!afspec)
+		return 0;
+
+	p = br_port_get_rtnl(dev);
+	/* We want to accept dev as bridge itself as well */
+	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
+		return -EINVAL;
+
+	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+			afspec, RTM_DELLINK);
+
+	return err;
+}
 static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
@@ -289,6 +445,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static size_t br_get_link_af_size(const struct net_device *dev)
+{
+	struct net_port_vlans *pv;
+
+	if (br_port_exists(dev))
+		pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+	else if (dev->priv_flags & IFF_EBRIDGE)
+		pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
+	else
+		return 0;
+
+	if (!pv)
+		return 0;
+
+	/* Each VLAN is returned in bridge_vlan_info along with flags */
+	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
+}
+
+static struct rtnl_af_ops br_af_ops = {
+	.family			= AF_BRIDGE,
+	.get_link_af_size	= br_get_link_af_size,
+};
+
 struct rtnl_link_ops br_link_ops __read_mostly = {
 	.kind		= "bridge",
 	.priv_size	= sizeof(struct net_bridge),
@@ -299,10 +478,29 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
 
 int __init br_netlink_init(void)
 {
-	return rtnl_link_register(&br_link_ops);
+	int err;
+
+	br_mdb_init();
+	err = rtnl_af_register(&br_af_ops);
+	if (err)
+		goto out;
+
+	err = rtnl_link_register(&br_link_ops);
+	if (err)
+		goto out_af;
+
+	return 0;
+
+out_af:
+	rtnl_af_unregister(&br_af_ops);
+out:
+	br_mdb_uninit();
+	return err;
 }
 
 void __exit br_netlink_fini(void)
 {
+	br_mdb_uninit();
+	rtnl_af_unregister(&br_af_ops);
 	rtnl_link_unregister(&br_link_ops);
 }
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index a76b6213555..1644b3e1f94 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -82,7 +82,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 
 	case NETDEV_UP:
-		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
+		if (netif_running(br->dev) && netif_oper_up(dev)) {
 			spin_lock_bh(&br->lock);
 			br_stp_enable_port(p);
 			spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8d83be5ffed..6d314c4e6bc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@
 #include <linux/netpoll.h>
 #include <linux/u64_stats_sync.h>
 #include <net/route.h>
+#include <linux/if_vlan.h>
 
 #define BR_HASH_BITS 8
 #define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -26,6 +27,7 @@
 
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
+#define BR_VLAN_BITMAP_LEN	BITS_TO_LONGS(VLAN_N_VID)
 
 #define BR_VERSION	"2.3"
 
@@ -61,6 +63,20 @@ struct br_ip
 #endif
 	} u;
 	__be16		proto;
+	__u16		vid;
+};
+
+struct net_port_vlans {
+	u16				port_idx;
+	u16				pvid;
+	union {
+		struct net_bridge_port		*port;
+		struct net_bridge		*br;
+	}				parent;
+	struct rcu_head			rcu;
+	unsigned long			vlan_bitmap[BR_VLAN_BITMAP_LEN];
+	unsigned long			untagged_bitmap[BR_VLAN_BITMAP_LEN];
+	u16				num_vlans;
 };
 
 struct net_bridge_fdb_entry
@@ -74,6 +90,7 @@ struct net_bridge_fdb_entry
 	mac_addr			addr;
 	unsigned char			is_local;
 	unsigned char			is_static;
+	__u16				vlan_id;
 };
 
 struct net_bridge_port_group {
@@ -156,6 +173,9 @@ struct net_bridge_port
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll			*np;
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	struct net_port_vlans __rcu	*vlan_info;
+#endif
 };
 
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
@@ -197,9 +217,6 @@ struct net_bridge
 	bool				nf_call_ip6tables;
 	bool				nf_call_arptables;
 #endif
-	unsigned long			flags;
-#define BR_SET_MAC_ADDR		0x00000001
-
 	u16				group_fwd_mask;
 
 	/* STP */
@@ -260,6 +277,10 @@ struct net_bridge
 	struct timer_list		topology_change_timer;
 	struct timer_list		gc_timer;
 	struct kobject			*ifobj;
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	u8				vlan_enabled;
+	struct net_port_vlans __rcu	*vlan_info;
+#endif
 };
 
 struct br_input_skb_cb {
@@ -355,18 +376,22 @@ extern void br_fdb_cleanup(unsigned long arg);
 extern void br_fdb_delete_by_port(struct net_bridge *br,
 				  const struct net_bridge_port *p, int do_all);
 extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
-						 const unsigned char *addr);
+						 const unsigned char *addr,
+						 __u16 vid);
 extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
 extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 			  unsigned long count, unsigned long off);
 extern int br_fdb_insert(struct net_bridge *br,
 			 struct net_bridge_port *source,
-			 const unsigned char *addr);
+			 const unsigned char *addr,
+			 u16 vid);
 extern void br_fdb_update(struct net_bridge *br,
 			  struct net_bridge_port *source,
-			  const unsigned char *addr);
+			  const unsigned char *addr,
+			  u16 vid);
+extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
 
-extern int br_fdb_delete(struct ndmsg *ndm,
+extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 			 struct net_device *dev,
 			 const unsigned char *addr);
 extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
@@ -526,6 +551,148 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
 {
 	return 0;
 }
+static inline void br_mdb_init(void)
+{
+}
+static inline void br_mdb_uninit(void)
+{
+}
+#endif
+
+/* br_vlan.c */
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
+			       struct sk_buff *skb, u16 *vid);
+extern bool br_allowed_egress(struct net_bridge *br,
+			      const struct net_port_vlans *v,
+			      const struct sk_buff *skb);
+extern struct sk_buff *br_handle_vlan(struct net_bridge *br,
+				      const struct net_port_vlans *v,
+				      struct sk_buff *skb);
+extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
+extern int br_vlan_delete(struct net_bridge *br, u16 vid);
+extern void br_vlan_flush(struct net_bridge *br);
+extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
+extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
+extern void nbp_vlan_flush(struct net_bridge_port *port);
+extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
+
+static inline struct net_port_vlans *br_get_vlan_info(
+						const struct net_bridge *br)
+{
+	return rcu_dereference_rtnl(br->vlan_info);
+}
+
+static inline struct net_port_vlans *nbp_get_vlan_info(
+						const struct net_bridge_port *p)
+{
+	return rcu_dereference_rtnl(p->vlan_info);
+}
+
+/* Since bridge now depends on 8021Q module, but the time bridge sees the
+ * skb, the vlan tag will always be present if the frame was tagged.
+ */
+static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
+{
+	int err = 0;
+
+	if (vlan_tx_tag_present(skb))
+		*vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
+	else {
+		*vid = 0;
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static inline u16 br_get_pvid(const struct net_port_vlans *v)
+{
+	/* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if
+	 * vid wasn't set
+	 */
+	smp_rmb();
+	return (v->pvid & VLAN_TAG_PRESENT) ?
+			(v->pvid & ~VLAN_TAG_PRESENT) :
+			VLAN_N_VID;
+}
+
+#else
+static inline bool br_allowed_ingress(struct net_bridge *br,
+				      struct net_port_vlans *v,
+				      struct sk_buff *skb,
+				      u16 *vid)
+{
+	return true;
+}
+
+static inline bool br_allowed_egress(struct net_bridge *br,
+				     const struct net_port_vlans *v,
+				     const struct sk_buff *skb)
+{
+	return true;
+}
+
+static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
+					     const struct net_port_vlans *v,
+					     struct sk_buff *skb)
+{
+	return skb;
+}
+
+static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int br_vlan_delete(struct net_bridge *br, u16 vid)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void br_vlan_flush(struct net_bridge *br)
+{
+}
+
+static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void nbp_vlan_flush(struct net_bridge_port *port)
+{
+}
+
+static inline struct net_port_vlans *br_get_vlan_info(
+						const struct net_bridge *br)
+{
+	return NULL;
+}
+static inline struct net_port_vlans *nbp_get_vlan_info(
+						const struct net_bridge_port *p)
+{
+	return NULL;
+}
+
+static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
+{
+	return false;
+}
+
+static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
+{
+	return 0;
+}
+static inline u16 br_get_pvid(const struct net_port_vlans *v)
+{
+	return VLAN_N_VID;	/* Returns invalid vid */
+}
 #endif
 
 /* br_netfilter.c */
@@ -588,8 +755,9 @@ extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-		      struct net_device *dev);
+		      struct net_device *dev, u32 filter_mask);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 7f884e3fb95..8660ea3be70 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -16,6 +16,7 @@
 #include <linux/etherdevice.h>
 #include <linux/llc.h>
 #include <linux/slab.h>
+#include <linux/pkt_sched.h>
 #include <net/net_namespace.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
@@ -40,6 +41,7 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
 	skb->dev = p->dev;
 	skb->protocol = htons(ETH_P_802_2);
+	skb->priority = TC_PRIO_CONTROL;
 
 	skb_reserve(skb, LLC_RESERVE);
 	memcpy(__skb_put(skb, length), data, length);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 9d5a414a394..0bdb4ebd362 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -54,7 +54,7 @@ void br_stp_enable_bridge(struct net_bridge *br)
 	br_config_bpdu_generation(br);
 
 	list_for_each_entry(p, &br->port_list, list) {
-		if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev))
+		if (netif_running(p->dev) && netif_oper_up(p->dev))
 			br_stp_enable_port(p);
 
 	}
@@ -216,7 +216,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
 	struct net_bridge_port *p;
 
 	/* user has chosen a value so keep it */
-	if (br->flags & BR_SET_MAC_ADDR)
+	if (br->dev->addr_assign_type == NET_ADDR_SET)
 		return false;
 
 	list_for_each_entry(p, &br->port_list, list) {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 5913a3a0047..8baa9c08e1a 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -691,6 +691,24 @@ static ssize_t store_nf_call_arptables(
 static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR,
 		   show_nf_call_arptables, store_nf_call_arptables);
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+static ssize_t show_vlan_filtering(struct device *d,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%d\n", br->vlan_enabled);
+}
+
+static ssize_t store_vlan_filtering(struct device *d,
+				    struct device_attribute *attr,
+				    const char *buf, size_t len)
+{
+	return store_bridge_parm(d, buf, len, br_vlan_filter_toggle);
+}
+static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR,
+		   show_vlan_filtering, store_vlan_filtering);
+#endif
 
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
@@ -732,6 +750,9 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_nf_call_ip6tables.attr,
 	&dev_attr_nf_call_arptables.attr,
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	&dev_attr_vlan_filtering.attr,
+#endif
 	NULL
 };
 
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
new file mode 100644
index 00000000000..93dde75923f
--- /dev/null
+++ b/net/bridge/br_vlan.c
@@ -0,0 +1,415 @@
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "br_private.h"
+
+static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid)
+{
+	if (v->pvid == vid)
+		return;
+
+	smp_wmb();
+	v->pvid = vid;
+}
+
+static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid)
+{
+	if (v->pvid != vid)
+		return;
+
+	smp_wmb();
+	v->pvid = 0;
+}
+
+static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
+{
+	if (flags & BRIDGE_VLAN_INFO_PVID)
+		__vlan_add_pvid(v, vid);
+
+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
+		set_bit(vid, v->untagged_bitmap);
+}
+
+static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
+{
+	struct net_bridge_port *p = NULL;
+	struct net_bridge *br;
+	struct net_device *dev;
+	int err;
+
+	if (test_bit(vid, v->vlan_bitmap)) {
+		__vlan_add_flags(v, vid, flags);
+		return 0;
+	}
+
+	if (vid) {
+		if (v->port_idx) {
+			p = v->parent.port;
+			br = p->br;
+			dev = p->dev;
+		} else {
+			br = v->parent.br;
+			dev = br->dev;
+		}
+
+		if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) {
+			/* Add VLAN to the device filter if it is supported.
+			 * Stricly speaking, this is not necessary now, since
+			 * devices are made promiscuous by the bridge, but if
+			 * that ever changes this code will allow tagged
+			 * traffic to enter the bridge.
+			 */
+			err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid);
+			if (err)
+				return err;
+		}
+
+		err = br_fdb_insert(br, p, dev->dev_addr, vid);
+		if (err) {
+			br_err(br, "failed insert local address into bridge "
+			       "forwarding table\n");
+			goto out_filt;
+		}
+
+	}
+
+	set_bit(vid, v->vlan_bitmap);
+	v->num_vlans++;
+	__vlan_add_flags(v, vid, flags);
+
+	return 0;
+
+out_filt:
+	if (p && (dev->features & NETIF_F_HW_VLAN_FILTER))
+		dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid);
+	return err;
+}
+
+static int __vlan_del(struct net_port_vlans *v, u16 vid)
+{
+	if (!test_bit(vid, v->vlan_bitmap))
+		return -EINVAL;
+
+	__vlan_delete_pvid(v, vid);
+	clear_bit(vid, v->untagged_bitmap);
+
+	if (v->port_idx && vid) {
+		struct net_device *dev = v->parent.port->dev;
+
+		if (dev->features & NETIF_F_HW_VLAN_FILTER)
+			dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid);
+	}
+
+	clear_bit(vid, v->vlan_bitmap);
+	v->num_vlans--;
+	if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+		if (v->port_idx)
+			rcu_assign_pointer(v->parent.port->vlan_info, NULL);
+		else
+			rcu_assign_pointer(v->parent.br->vlan_info, NULL);
+		kfree_rcu(v, rcu);
+	}
+	return 0;
+}
+
+static void __vlan_flush(struct net_port_vlans *v)
+{
+	smp_wmb();
+	v->pvid = 0;
+	bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+	if (v->port_idx)
+		rcu_assign_pointer(v->parent.port->vlan_info, NULL);
+	else
+		rcu_assign_pointer(v->parent.br->vlan_info, NULL);
+	kfree_rcu(v, rcu);
+}
+
+/* Strip the tag from the packet.  Will return skb with tci set 0.  */
+static struct sk_buff *br_vlan_untag(struct sk_buff *skb)
+{
+	if (skb->protocol != htons(ETH_P_8021Q)) {
+		skb->vlan_tci = 0;
+		return skb;
+	}
+
+	skb->vlan_tci = 0;
+	skb = vlan_untag(skb);
+	if (skb)
+		skb->vlan_tci = 0;
+
+	return skb;
+}
+
+struct sk_buff *br_handle_vlan(struct net_bridge *br,
+			       const struct net_port_vlans *pv,
+			       struct sk_buff *skb)
+{
+	u16 vid;
+
+	if (!br->vlan_enabled)
+		goto out;
+
+	/* At this point, we know that the frame was filtered and contains
+	 * a valid vlan id.  If the vlan id is set in the untagged bitmap,
+	 * send untagged; otherwise, send taged.
+	 */
+	br_vlan_get_tag(skb, &vid);
+	if (test_bit(vid, pv->untagged_bitmap))
+		skb = br_vlan_untag(skb);
+	else {
+		/* Egress policy says "send tagged".  If output device
+		 * is the  bridge, we need to add the VLAN header
+		 * ourselves since we'll be going through the RX path.
+		 * Sending to ports puts the frame on the TX path and
+		 * we let dev_hard_start_xmit() add the header.
+		 */
+		if (skb->protocol != htons(ETH_P_8021Q) &&
+		    pv->port_idx == 0) {
+			/* vlan_put_tag expects skb->data to point to
+			 * mac header.
+			 */
+			skb_push(skb, ETH_HLEN);
+			skb = __vlan_put_tag(skb, skb->vlan_tci);
+			if (!skb)
+				goto out;
+			/* put skb->data back to where it was */
+			skb_pull(skb, ETH_HLEN);
+			skb->vlan_tci = 0;
+		}
+	}
+
+out:
+	return skb;
+}
+
+/* Called under RCU */
+bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
+			struct sk_buff *skb, u16 *vid)
+{
+	/* If VLAN filtering is disabled on the bridge, all packets are
+	 * permitted.
+	 */
+	if (!br->vlan_enabled)
+		return true;
+
+	/* If there are no vlan in the permitted list, all packets are
+	 * rejected.
+	 */
+	if (!v)
+		return false;
+
+	if (br_vlan_get_tag(skb, vid)) {
+		u16 pvid = br_get_pvid(v);
+
+		/* Frame did not have a tag.  See if pvid is set
+		 * on this port.  That tells us which vlan untagged
+		 * traffic belongs to.
+		 */
+		if (pvid == VLAN_N_VID)
+			return false;
+
+		/* PVID is set on this port.  Any untagged ingress
+		 * frame is considered to belong to this vlan.
+		 */
+		__vlan_hwaccel_put_tag(skb, pvid);
+		return true;
+	}
+
+	/* Frame had a valid vlan tag.  See if vlan is allowed */
+	if (test_bit(*vid, v->vlan_bitmap))
+		return true;
+
+	return false;
+}
+
+/* Called under RCU. */
+bool br_allowed_egress(struct net_bridge *br,
+		       const struct net_port_vlans *v,
+		       const struct sk_buff *skb)
+{
+	u16 vid;
+
+	if (!br->vlan_enabled)
+		return true;
+
+	if (!v)
+		return false;
+
+	br_vlan_get_tag(skb, &vid);
+	if (test_bit(vid, v->vlan_bitmap))
+		return true;
+
+	return false;
+}
+
+/* Must be protected by RTNL */
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+{
+	struct net_port_vlans *pv = NULL;
+	int err;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(br->vlan_info);
+	if (pv)
+		return __vlan_add(pv, vid, flags);
+
+	/* Create port vlan infomration
+	 */
+	pv = kzalloc(sizeof(*pv), GFP_KERNEL);
+	if (!pv)
+		return -ENOMEM;
+
+	pv->parent.br = br;
+	err = __vlan_add(pv, vid, flags);
+	if (err)
+		goto out;
+
+	rcu_assign_pointer(br->vlan_info, pv);
+	return 0;
+out:
+	kfree(pv);
+	return err;
+}
+
+/* Must be protected by RTNL */
+int br_vlan_delete(struct net_bridge *br, u16 vid)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(br->vlan_info);
+	if (!pv)
+		return -EINVAL;
+
+	if (vid) {
+		/* If the VID !=0 remove fdb for this vid. VID 0 is special
+		 * in that it's the default and is always there in the fdb.
+		 */
+		spin_lock_bh(&br->hash_lock);
+		fdb_delete_by_addr(br, br->dev->dev_addr, vid);
+		spin_unlock_bh(&br->hash_lock);
+	}
+
+	__vlan_del(pv, vid);
+	return 0;
+}
+
+void br_vlan_flush(struct net_bridge *br)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+	pv = rtnl_dereference(br->vlan_info);
+	if (!pv)
+		return;
+
+	__vlan_flush(pv);
+}
+
+int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
+{
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (br->vlan_enabled == val)
+		goto unlock;
+
+	br->vlan_enabled = val;
+
+unlock:
+	rtnl_unlock();
+	return 0;
+}
+
+/* Must be protected by RTNL */
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+{
+	struct net_port_vlans *pv = NULL;
+	int err;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(port->vlan_info);
+	if (pv)
+		return __vlan_add(pv, vid, flags);
+
+	/* Create port vlan infomration
+	 */
+	pv = kzalloc(sizeof(*pv), GFP_KERNEL);
+	if (!pv) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	pv->port_idx = port->port_no;
+	pv->parent.port = port;
+	err = __vlan_add(pv, vid, flags);
+	if (err)
+		goto clean_up;
+
+	rcu_assign_pointer(port->vlan_info, pv);
+	return 0;
+
+clean_up:
+	kfree(pv);
+	return err;
+}
+
+/* Must be protected by RTNL */
+int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(port->vlan_info);
+	if (!pv)
+		return -EINVAL;
+
+	if (vid) {
+		/* If the VID !=0 remove fdb for this vid. VID 0 is special
+		 * in that it's the default and is always there in the fdb.
+		 */
+		spin_lock_bh(&port->br->hash_lock);
+		fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
+		spin_unlock_bh(&port->br->hash_lock);
+	}
+
+	return __vlan_del(pv, vid);
+}
+
+void nbp_vlan_flush(struct net_bridge_port *port)
+{
+	struct net_port_vlans *pv;
+
+	ASSERT_RTNL();
+
+	pv = rtnl_dereference(port->vlan_info);
+	if (!pv)
+		return;
+
+	__vlan_flush(pv);
+}
+
+bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
+{
+	struct net_port_vlans *pv;
+	bool found = false;
+
+	rcu_read_lock();
+	pv = rcu_dereference(port->vlan_info);
+
+	if (!pv)
+		goto out;
+
+	if (test_bit(vid, pv->vlan_bitmap))
+		found = true;
+
+out:
+	rcu_read_unlock();
+	return found;
+}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3476ec46974..3bf43f7bb9d 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -70,8 +70,7 @@ static void ulog_send(unsigned int nlgroup)
 {
 	ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup];
 
-	if (timer_pending(&ub->timer))
-		del_timer(&ub->timer);
+	del_timer(&ub->timer);
 
 	if (!ub->skb)
 		return;
@@ -319,8 +318,7 @@ static void __exit ebt_ulog_fini(void)
 	xt_unregister_target(&ebt_ulog_tg_reg);
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
-		if (timer_pending(&ub->timer))
-			del_timer(&ub->timer);
+		del_timer(&ub->timer);
 		spin_lock_bh(&ub->lock);
 		if (ub->skb) {
 			kfree_skb(ub->skb);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5fe2ff3b01e..8d493c91a56 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1472,16 +1472,17 @@ static int do_ebt_set_ctl(struct sock *sk,
 	int cmd, void __user *user, unsigned int len)
 {
 	int ret;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	switch(cmd) {
 	case EBT_SO_SET_ENTRIES:
-		ret = do_replace(sock_net(sk), user, len);
+		ret = do_replace(net, user, len);
 		break;
 	case EBT_SO_SET_COUNTERS:
-		ret = update_counters(sock_net(sk), user, len);
+		ret = update_counters(net, user, len);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1494,14 +1495,15 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	int ret;
 	struct ebt_replace tmp;
 	struct ebt_table *t;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
-	t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;
 
@@ -2279,16 +2281,17 @@ static int compat_do_ebt_set_ctl(struct sock *sk,
 		int cmd, void __user *user, unsigned int len)
 {
 	int ret;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	switch (cmd) {
 	case EBT_SO_SET_ENTRIES:
-		ret = compat_do_replace(sock_net(sk), user, len);
+		ret = compat_do_replace(net, user, len);
 		break;
 	case EBT_SO_SET_COUNTERS:
-		ret = compat_update_counters(sock_net(sk), user, len);
+		ret = compat_update_counters(net, user, len);
 		break;
 	default:
 		ret = -EINVAL;
@@ -2302,8 +2305,9 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 	int ret;
 	struct compat_ebt_replace tmp;
 	struct ebt_table *t;
+	struct net *net = sock_net(sk);
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	/* try real handler in case userland supplied needed padding */
@@ -2314,7 +2318,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
-	t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;
 
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 03200699d27..a15c0e0d1fc 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -16,10 +16,11 @@ menuconfig CAN
 	  If you want CAN support you should say Y here and also to the
 	  specific driver for your controller(s) below.
 
+if CAN
+
 config CAN_RAW
 	tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
-	depends on CAN
-	default N
+	default y
 	---help---
 	  The raw CAN protocol option offers access to the CAN bus via
 	  the BSD socket API. You probably want to use the raw socket in
@@ -29,8 +30,7 @@ config CAN_RAW
 
 config CAN_BCM
 	tristate "Broadcast Manager CAN Protocol (with content filtering)"
-	depends on CAN
-	default N
+	default y
 	---help---
 	  The Broadcast Manager offers content filtering, timeout monitoring,
 	  sending of RTR frames, and cyclic CAN messages without permanent user
@@ -42,8 +42,7 @@ config CAN_BCM
 
 config CAN_GW
 	tristate "CAN Gateway/Router (with netlink configuration)"
-	depends on CAN
-	default N
+	default y
 	---help---
 	  The CAN Gateway/Router is used to route (and modify) CAN frames.
 	  It is based on the PF_CAN core infrastructure for msg filtering and
@@ -53,3 +52,5 @@ config CAN_GW
 	  by the netlink configuration interface known e.g. from iptables.
 
 source "drivers/net/can/Kconfig"
+
+endif
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 969b7cdff59..28e12d18f0f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -54,6 +54,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/bcm.h>
 #include <linux/slab.h>
 #include <net/sock.h>
@@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op)
 		return;
 	}
 
-	skb = alloc_skb(CFSIZ, gfp_any());
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any());
 	if (!skb)
 		goto out;
 
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
+
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
 	/* send with loopback */
@@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!ifindex)
 		return -ENODEV;
 
-	skb = alloc_skb(CFSIZ, GFP_KERNEL);
-
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
+	can_skb_reserve(skb);
+
 	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 		return -ENODEV;
 	}
 
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 	skb->dev = dev;
 	skb->sk  = sk;
 	err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/gw.c b/net/can/gw.c
index 574dda78eb0..c185fcd5e82 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -42,6 +42,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
@@ -52,19 +53,31 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/gw.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
-#define CAN_GW_VERSION "20101209"
-static __initconst const char banner[] =
-	KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n";
+#define CAN_GW_VERSION "20130117"
+#define CAN_GW_NAME "can-gw"
 
 MODULE_DESCRIPTION("PF_CAN netlink gateway");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
-MODULE_ALIAS("can-gw");
+MODULE_ALIAS(CAN_GW_NAME);
+
+#define CGW_MIN_HOPS 1
+#define CGW_MAX_HOPS 6
+#define CGW_DEFAULT_HOPS 1
+
+static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
+module_param(max_hops, uint, S_IRUGO);
+MODULE_PARM_DESC(max_hops,
+		 "maximum " CAN_GW_NAME " routing hops for CAN frames "
+		 "(valid values: " __stringify(CGW_MIN_HOPS) "-"
+		 __stringify(CGW_MAX_HOPS) " hops, "
+		 "default: " __stringify(CGW_DEFAULT_HOPS) ")");
 
 static HLIST_HEAD(cgw_list);
 static struct notifier_block notifier;
@@ -118,6 +131,7 @@ struct cgw_job {
 	struct rcu_head rcu;
 	u32 handled_frames;
 	u32 dropped_frames;
+	u32 deleted_frames;
 	struct cf_mod mod;
 	union {
 		/* CAN frame data source */
@@ -338,15 +352,38 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 	struct sk_buff *nskb;
 	int modidx = 0;
 
-	/* do not handle already routed frames - see comment below */
-	if (skb_mac_header_was_set(skb))
+	/*
+	 * Do not handle CAN frames routed more than 'max_hops' times.
+	 * In general we should never catch this delimiter which is intended
+	 * to cover a misconfiguration protection (e.g. circular CAN routes).
+	 *
+	 * The Controller Area Network controllers only accept CAN frames with
+	 * correct CRCs - which are not visible in the controller registers.
+	 * According to skbuff.h documentation the csum_start element for IP
+	 * checksums is undefined/unsued when ip_summed == CHECKSUM_UNNECESSARY.
+	 * Only CAN skbs can be processed here which already have this property.
+	 */
+
+#define cgw_hops(skb) ((skb)->csum_start)
+
+	BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY);
+
+	if (cgw_hops(skb) >= max_hops) {
+		/* indicate deleted frames due to misconfiguration */
+		gwj->deleted_frames++;
 		return;
+	}
 
 	if (!(gwj->dst.dev->flags & IFF_UP)) {
 		gwj->dropped_frames++;
 		return;
 	}
 
+	/* is sending the skb back to the incoming interface not allowed? */
+	if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) &&
+	    can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
+		return;
+
 	/*
 	 * clone the given skb, which has not been done in can_rcv()
 	 *
@@ -363,15 +400,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 		return;
 	}
 
-	/*
-	 * Mark routed frames by setting some mac header length which is
-	 * not relevant for the CAN frames located in the skb->data section.
-	 *
-	 * As dev->header_ops is not set in CAN netdevices no one is ever
-	 * accessing the various header offsets in the CAN skbuffs anyway.
-	 * E.g. using the packet socket to read CAN frames is still working.
-	 */
-	skb_set_mac_header(nskb, 8);
+	/* put the incremented hop counter in the cloned skb */
+	cgw_hops(nskb) = cgw_hops(skb) + 1;
 	nskb->dev = gwj->dst.dev;
 
 	/* pointer to modifiable CAN frame */
@@ -472,6 +502,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
+	if (gwj->deleted_frames) {
+		if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0)
+			goto cancel;
+	}
+
 	/* check non default settings of attributes */
 
 	if (gwj->mod.modtype.and) {
@@ -771,6 +806,7 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 
 	gwj->handled_frames = 0;
 	gwj->dropped_frames = 0;
+	gwj->deleted_frames = 0;
 	gwj->flags = r->flags;
 	gwj->gwtype = r->gwtype;
 
@@ -895,7 +931,11 @@ static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 
 static __init int cgw_module_init(void)
 {
-	printk(banner);
+	/* sanitize given module parameter */
+	max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);
+
+	pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
+		max_hops);
 
 	cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
 				      0, 0, NULL);
diff --git a/net/can/raw.c b/net/can/raw.c
index 5b0e3e330d9..c1764e41dda 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -50,6 +50,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/raw.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
@@ -699,11 +700,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!dev)
 		return -ENXIO;
 
-	skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
-				  &err);
+	skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto put_dev;
 
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
+
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto free_skb;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 4d111fd2b49..5ccf87ed8d6 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -506,6 +506,7 @@ static void reset_connection(struct ceph_connection *con)
 {
 	/* reset connection, out_queue, msg_ and connect_seq */
 	/* discard existing out_queue and msg_seq */
+	dout("reset_connection %p\n", con);
 	ceph_msg_remove_list(&con->out_queue);
 	ceph_msg_remove_list(&con->out_sent);
 
@@ -561,7 +562,7 @@ void ceph_con_open(struct ceph_connection *con,
 	mutex_lock(&con->mutex);
 	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
 
-	BUG_ON(con->state != CON_STATE_CLOSED);
+	WARN_ON(con->state != CON_STATE_CLOSED);
 	con->state = CON_STATE_PREOPEN;
 
 	con->peer_name.type = (__u8) entity_type;
@@ -1506,13 +1507,6 @@ static int process_banner(struct ceph_connection *con)
 	return 0;
 }
 
-static void fail_protocol(struct ceph_connection *con)
-{
-	reset_connection(con);
-	BUG_ON(con->state != CON_STATE_NEGOTIATING);
-	con->state = CON_STATE_CLOSED;
-}
-
 static int process_connect(struct ceph_connection *con)
 {
 	u64 sup_feat = con->msgr->supported_features;
@@ -1530,7 +1524,7 @@ static int process_connect(struct ceph_connection *con)
 		       ceph_pr_addr(&con->peer_addr.in_addr),
 		       sup_feat, server_feat, server_feat & ~sup_feat);
 		con->error_msg = "missing required protocol features";
-		fail_protocol(con);
+		reset_connection(con);
 		return -1;
 
 	case CEPH_MSGR_TAG_BADPROTOVER:
@@ -1541,7 +1535,7 @@ static int process_connect(struct ceph_connection *con)
 		       le32_to_cpu(con->out_connect.protocol_version),
 		       le32_to_cpu(con->in_reply.protocol_version));
 		con->error_msg = "protocol version mismatch";
-		fail_protocol(con);
+		reset_connection(con);
 		return -1;
 
 	case CEPH_MSGR_TAG_BADAUTHORIZER:
@@ -1631,11 +1625,11 @@ static int process_connect(struct ceph_connection *con)
 			       ceph_pr_addr(&con->peer_addr.in_addr),
 			       req_feat, server_feat, req_feat & ~server_feat);
 			con->error_msg = "missing required protocol features";
-			fail_protocol(con);
+			reset_connection(con);
 			return -1;
 		}
 
-		BUG_ON(con->state != CON_STATE_NEGOTIATING);
+		WARN_ON(con->state != CON_STATE_NEGOTIATING);
 		con->state = CON_STATE_OPEN;
 
 		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
@@ -2132,7 +2126,6 @@ more:
 		if (ret < 0)
 			goto out;
 
-		BUG_ON(con->state != CON_STATE_CONNECTING);
 		con->state = CON_STATE_NEGOTIATING;
 
 		/*
@@ -2160,7 +2153,7 @@ more:
 		goto more;
 	}
 
-	BUG_ON(con->state != CON_STATE_OPEN);
+	WARN_ON(con->state != CON_STATE_OPEN);
 
 	if (con->in_base_pos < 0) {
 		/*
@@ -2382,7 +2375,7 @@ static void ceph_fault(struct ceph_connection *con)
 	dout("fault %p state %lu to peer %s\n",
 	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
 
-	BUG_ON(con->state != CON_STATE_CONNECTING &&
+	WARN_ON(con->state != CON_STATE_CONNECTING &&
 	       con->state != CON_STATE_NEGOTIATING &&
 	       con->state != CON_STATE_OPEN);
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 780caf6b049..eb9a4447876 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1270,7 +1270,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
  * Requeue requests whose mapping to an OSD has changed.  If requests map to
  * no osd, request a new map.
  *
- * Caller should hold map_sem for read and request_mutex.
+ * Caller should hold map_sem for read.
  */
 static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 {
@@ -1284,6 +1284,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 	for (p = rb_first(&osdc->requests); p; ) {
 		req = rb_entry(p, struct ceph_osd_request, r_node);
 		p = rb_next(p);
+
+		/*
+		 * For linger requests that have not yet been
+		 * registered, move them to the linger list; they'll
+		 * be sent to the osd in the loop below.  Unregister
+		 * the request before re-registering it as a linger
+		 * request to ensure the __map_request() below
+		 * will decide it needs to be sent.
+		 */
+		if (req->r_linger && list_empty(&req->r_linger_item)) {
+			dout("%p tid %llu restart on osd%d\n",
+			     req, req->r_tid,
+			     req->r_osd ? req->r_osd->o_osd : -1);
+			__unregister_request(osdc, req);
+			__register_linger_request(osdc, req);
+			continue;
+		}
+
 		err = __map_request(osdc, req, force_resend);
 		if (err < 0)
 			continue;  /* error */
@@ -1298,17 +1316,6 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 				req->r_flags |= CEPH_OSD_FLAG_RETRY;
 			}
 		}
-		if (req->r_linger && list_empty(&req->r_linger_item)) {
-			/*
-			 * register as a linger so that we will
-			 * re-submit below and get a new tid
-			 */
-			dout("%p tid %llu restart on osd%d\n",
-			     req, req->r_tid,
-			     req->r_osd ? req->r_osd->o_osd : -1);
-			__register_linger_request(osdc, req);
-			__unregister_request(osdc, req);
-		}
 	}
 
 	list_for_each_entry_safe(req, nreq, &osdc->req_linger,
@@ -1316,6 +1323,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 		dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
 
 		err = __map_request(osdc, req, force_resend);
+		dout("__map_request returned %d\n", err);
 		if (err == 0)
 			continue;  /* no change and no osd was specified */
 		if (err < 0)
@@ -1337,6 +1345,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 		dout("%d requests for down osds, need new map\n", needmap);
 		ceph_monc_request_next_osdmap(&osdc->client->monc);
 	}
+	reset_changed_osds(osdc);
 }
 
 
@@ -1393,7 +1402,6 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 				osdc->osdmap = newmap;
 			}
 			kick_requests(osdc, 0);
-			reset_changed_osds(osdc);
 		} else {
 			dout("ignoring incremental map %u len %d\n",
 			     epoch, maplen);
diff --git a/net/core/Makefile b/net/core/Makefile
index 674641b13ae..0c5e3618c80 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o
+			sock_diag.o dev_ioctl.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 0337e2b7686..368f9c3f9dc 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 		skb_queue_walk(queue, skb) {
 			*peeked = skb->peeked;
 			if (flags & MSG_PEEK) {
-				if (*off >= skb->len) {
+				if (*off >= skb->len && skb->len) {
 					*off -= skb->len;
 					continue;
 				}
diff --git a/net/core/dev.c b/net/core/dev.c
index d0cbc93fcf3..1932d351ed7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -106,7 +106,6 @@
 #include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
-#include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
@@ -132,9 +131,7 @@
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
-#include <linux/net_tstamp.h>
 #include <linux/static_key.h>
-#include <net/flow_keys.h>
 
 #include "net-sysfs.h"
 
@@ -203,7 +200,7 @@ static struct list_head offload_base __read_mostly;
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
-DEFINE_SEQLOCK(devnet_rename_seq);
+seqcount_t devnet_rename_seq;
 
 static inline void dev_base_seq_inc(struct net *net)
 {
@@ -1093,10 +1090,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 
-	write_seqlock(&devnet_rename_seq);
+	write_seqcount_begin(&devnet_rename_seq);
 
 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return 0;
 	}
 
@@ -1104,7 +1101,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	err = dev_get_valid_name(net, dev, newname);
 	if (err < 0) {
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return err;
 	}
 
@@ -1112,11 +1109,11 @@ rollback:
 	ret = device_rename(&dev->dev, dev->name);
 	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return ret;
 	}
 
-	write_sequnlock(&devnet_rename_seq);
+	write_seqcount_end(&devnet_rename_seq);
 
 	write_lock_bh(&dev_base_lock);
 	hlist_del_rcu(&dev->name_hlist);
@@ -1135,7 +1132,7 @@ rollback:
 		/* err >= 0 after dev_alloc_name() or stores the first errno */
 		if (err >= 0) {
 			err = ret;
-			write_seqlock(&devnet_rename_seq);
+			write_seqcount_begin(&devnet_rename_seq);
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			goto rollback;
 		} else {
@@ -1227,36 +1224,6 @@ void netdev_notify_peers(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_notify_peers);
 
-/**
- *	dev_load 	- load a network module
- *	@net: the applicable net namespace
- *	@name: name of interface
- *
- *	If a network interface is not present and the process has suitable
- *	privileges this function loads the module. If module loading is not
- *	available in this kernel then it becomes a nop.
- */
-
-void dev_load(struct net *net, const char *name)
-{
-	struct net_device *dev;
-	int no_module;
-
-	rcu_read_lock();
-	dev = dev_get_by_name_rcu(net, name);
-	rcu_read_unlock();
-
-	no_module = !dev;
-	if (no_module && capable(CAP_NET_ADMIN))
-		no_module = request_module("netdev-%s", name);
-	if (no_module && capable(CAP_SYS_MODULE)) {
-		if (!request_module("%s", name))
-			pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
-				name);
-	}
-}
-EXPORT_SYMBOL(dev_load);
-
 static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -1267,6 +1234,14 @@ static int __dev_open(struct net_device *dev)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
+	/* Block netpoll from trying to do any rx path servicing.
+	 * If we don't do this there is a chance ndo_poll_controller
+	 * or ndo_poll may be running while we open the device
+	 */
+	ret = netpoll_rx_disable(dev);
+	if (ret)
+		return ret;
+
 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -1280,6 +1255,8 @@ static int __dev_open(struct net_device *dev)
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 
+	netpoll_rx_enable(dev);
+
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
@@ -1371,9 +1348,16 @@ static int __dev_close(struct net_device *dev)
 	int retval;
 	LIST_HEAD(single);
 
+	/* Temporarily disable netpoll until the interface is down */
+	retval = netpoll_rx_disable(dev);
+	if (retval)
+		return retval;
+
 	list_add(&dev->unreg_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
+
+	netpoll_rx_enable(dev);
 	return retval;
 }
 
@@ -1409,14 +1393,22 @@ static int dev_close_many(struct list_head *head)
  */
 int dev_close(struct net_device *dev)
 {
+	int ret = 0;
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 
+		/* Block netpoll rx while the interface is going down */
+		ret = netpoll_rx_disable(dev);
+		if (ret)
+			return ret;
+
 		list_add(&dev->unreg_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
+
+		netpoll_rx_enable(dev);
 	}
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -1621,57 +1613,6 @@ static inline void net_timestamp_set(struct sk_buff *skb)
 			__net_timestamp(SKB);		\
 	}						\
 
-static int net_hwtstamp_validate(struct ifreq *ifr)
-{
-	struct hwtstamp_config cfg;
-	enum hwtstamp_tx_types tx_type;
-	enum hwtstamp_rx_filters rx_filter;
-	int tx_type_valid = 0;
-	int rx_filter_valid = 0;
-
-	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	if (cfg.flags) /* reserved for future extensions */
-		return -EINVAL;
-
-	tx_type = cfg.tx_type;
-	rx_filter = cfg.rx_filter;
-
-	switch (tx_type) {
-	case HWTSTAMP_TX_OFF:
-	case HWTSTAMP_TX_ON:
-	case HWTSTAMP_TX_ONESTEP_SYNC:
-		tx_type_valid = 1;
-		break;
-	}
-
-	switch (rx_filter) {
-	case HWTSTAMP_FILTER_NONE:
-	case HWTSTAMP_FILTER_ALL:
-	case HWTSTAMP_FILTER_SOME:
-	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		rx_filter_valid = 1;
-		break;
-	}
-
-	if (!tx_type_valid || !rx_filter_valid)
-		return -ERANGE;
-
-	return 0;
-}
-
 static inline bool is_skb_forwardable(struct net_device *dev,
 				      struct sk_buff *skb)
 {
@@ -1857,6 +1798,228 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 	}
 }
 
+#ifdef CONFIG_XPS
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
+					int cpu, u16 index)
+{
+	struct xps_map *map = NULL;
+	int pos;
+
+	if (dev_maps)
+		map = xmap_dereference(dev_maps->cpu_map[cpu]);
+
+	for (pos = 0; map && pos < map->len; pos++) {
+		if (map->queues[pos] == index) {
+			if (map->len > 1) {
+				map->queues[pos] = map->queues[--map->len];
+			} else {
+				RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
+				kfree_rcu(map, rcu);
+				map = NULL;
+			}
+			break;
+		}
+	}
+
+	return map;
+}
+
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+{
+	struct xps_dev_maps *dev_maps;
+	int cpu, i;
+	bool active = false;
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (!dev_maps)
+		goto out_no_maps;
+
+	for_each_possible_cpu(cpu) {
+		for (i = index; i < dev->num_tx_queues; i++) {
+			if (!remove_xps_queue(dev_maps, cpu, i))
+				break;
+		}
+		if (i == dev->num_tx_queues)
+			active = true;
+	}
+
+	if (!active) {
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
+
+	for (i = index; i < dev->num_tx_queues; i++)
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+					     NUMA_NO_NODE);
+
+out_no_maps:
+	mutex_unlock(&xps_map_mutex);
+}
+
+static struct xps_map *expand_xps_map(struct xps_map *map,
+				      int cpu, u16 index)
+{
+	struct xps_map *new_map;
+	int alloc_len = XPS_MIN_MAP_ALLOC;
+	int i, pos;
+
+	for (pos = 0; map && pos < map->len; pos++) {
+		if (map->queues[pos] != index)
+			continue;
+		return map;
+	}
+
+	/* Need to add queue to this CPU's existing map */
+	if (map) {
+		if (pos < map->alloc_len)
+			return map;
+
+		alloc_len = map->alloc_len * 2;
+	}
+
+	/* Need to allocate new map to store queue on this CPU's map */
+	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+			       cpu_to_node(cpu));
+	if (!new_map)
+		return NULL;
+
+	for (i = 0; i < pos; i++)
+		new_map->queues[i] = map->queues[i];
+	new_map->alloc_len = alloc_len;
+	new_map->len = pos;
+
+	return new_map;
+}
+
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+{
+	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	struct xps_map *map, *new_map;
+	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
+	int cpu, numa_node_id = -2;
+	bool active = false;
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	/* allocate memory for queue storage */
+	for_each_online_cpu(cpu) {
+		if (!cpumask_test_cpu(cpu, mask))
+			continue;
+
+		if (!new_dev_maps)
+			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+		if (!new_dev_maps)
+			return -ENOMEM;
+
+		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+				 NULL;
+
+		map = expand_xps_map(map, cpu, index);
+		if (!map)
+			goto error;
+
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+	}
+
+	if (!new_dev_maps)
+		goto out_no_new_maps;
+
+	for_each_possible_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
+			/* add queue to CPU maps */
+			int pos = 0;
+
+			map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+			while ((pos < map->len) && (map->queues[pos] != index))
+				pos++;
+
+			if (pos == map->len)
+				map->queues[map->len++] = index;
+#ifdef CONFIG_NUMA
+			if (numa_node_id == -2)
+				numa_node_id = cpu_to_node(cpu);
+			else if (numa_node_id != cpu_to_node(cpu))
+				numa_node_id = -1;
+#endif
+		} else if (dev_maps) {
+			/* fill in the new device map from the old device map */
+			map = xmap_dereference(dev_maps->cpu_map[cpu]);
+			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+		}
+
+	}
+
+	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+
+	/* Cleanup old maps */
+	if (dev_maps) {
+		for_each_possible_cpu(cpu) {
+			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+			map = xmap_dereference(dev_maps->cpu_map[cpu]);
+			if (map && map != new_map)
+				kfree_rcu(map, rcu);
+		}
+
+		kfree_rcu(dev_maps, rcu);
+	}
+
+	dev_maps = new_dev_maps;
+	active = true;
+
+out_no_new_maps:
+	/* update Tx queue numa node */
+	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+				     (numa_node_id >= 0) ? numa_node_id :
+				     NUMA_NO_NODE);
+
+	if (!dev_maps)
+		goto out_no_maps;
+
+	/* removes queue from unused CPUs */
+	for_each_possible_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
+			continue;
+
+		if (remove_xps_queue(dev_maps, cpu, index))
+			active = true;
+	}
+
+	/* free map if not active */
+	if (!active) {
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
+
+out_no_maps:
+	mutex_unlock(&xps_map_mutex);
+
+	return 0;
+error:
+	/* remove any maps that we added */
+	for_each_possible_cpu(cpu) {
+		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+				 NULL;
+		if (new_map && new_map != map)
+			kfree(new_map);
+	}
+
+	mutex_unlock(&xps_map_mutex);
+
+	kfree(new_dev_maps);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(netif_set_xps_queue);
+
+#endif
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1880,8 +2043,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 
-		if (txq < dev->real_num_tx_queues)
+		if (txq < dev->real_num_tx_queues) {
 			qdisc_reset_all_tx_gt(dev, txq);
+#ifdef CONFIG_XPS
+			netif_reset_xps_queues_gt(dev, txq);
+#endif
+		}
 	}
 
 	dev->real_num_tx_queues = txq;
@@ -2046,6 +2213,15 @@ int skb_checksum_help(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
+	/* Before computing a checksum, we should make sure no frag could
+	 * be modified by an external entity : checksum could be wrong.
+	 */
+	if (skb_has_shared_frag(skb)) {
+		ret = __skb_linearize(skb);
+		if (ret)
+			goto out;
+	}
+
 	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -2069,25 +2245,19 @@ out:
 EXPORT_SYMBOL(skb_checksum_help);
 
 /**
- *	skb_gso_segment - Perform segmentation on skb.
+ *	skb_mac_gso_segment - mac layer segmentation handler.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
- *
- *	This function segments the given skb and returns a list of segments.
- *
- *	It may return NULL if the skb requires no segmentation.  This is
- *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	int vlan_depth = ETH_HLEN;
-	int err;
 
 	while (type == htons(ETH_P_8021Q)) {
+		int vlan_depth = ETH_HLEN;
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2098,22 +2268,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 		vlan_depth += VLAN_HLEN;
 	}
 
-	skb_reset_mac_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-		skb_warn_bad_offload(skb);
-
-		if (skb_header_cloned(skb) &&
-		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-			return ERR_PTR(err);
-	}
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+				int err;
+
 				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -2131,7 +2293,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
-EXPORT_SYMBOL(skb_gso_segment);
+EXPORT_SYMBOL(skb_mac_gso_segment);
+
+
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL;
+	else
+		return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ *	__skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
+{
+	if (unlikely(skb_needs_check(skb, tx_path))) {
+		int err;
+
+		skb_warn_bad_offload(skb);
+
+		if (skb_header_cloned(skb) &&
+		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			return ERR_PTR(err);
+	}
+
+	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	return skb_mac_gso_segment(skb, features);
+}
+EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
@@ -2410,126 +2615,28 @@ out:
 	return rc;
 }
 
-static u32 hashrnd __read_mostly;
-
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
-		  unsigned int num_tx_queues)
+static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
-	u32 hash;
-	u16 qoffset = 0;
-	u16 qcount = num_tx_queues;
-
-	if (skb_rx_queue_recorded(skb)) {
-		hash = skb_get_rx_queue(skb);
-		while (unlikely(hash >= num_tx_queues))
-			hash -= num_tx_queues;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
-	}
-
-	if (skb->sk && skb->sk->sk_hash)
-		hash = skb->sk->sk_hash;
-	else
-		hash = (__force u16) skb->protocol;
-	hash = jhash_1word(hash, hashrnd);
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
-}
-EXPORT_SYMBOL(__skb_tx_hash);
-
-static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
-{
-	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
-		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
-				     dev->name, queue_index,
-				     dev->real_num_tx_queues);
-		return 0;
-	}
-	return queue_index;
-}
-
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
-{
-#ifdef CONFIG_XPS
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
-	int queue_index = -1;
-
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
-	if (dev_maps) {
-		map = rcu_dereference(
-		    dev_maps->cpu_map[raw_smp_processor_id()]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else {
-				u32 hash;
-				if (skb->sk && skb->sk->sk_hash)
-					hash = skb->sk->sk_hash;
-				else
-					hash = (__force u16) skb->protocol ^
-					    skb->rxhash;
-				hash = jhash_1word(hash, hashrnd);
-				queue_index = map->queues[
-				    ((u64)hash * map->len) >> 32];
-			}
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
-		}
-	}
-	rcu_read_unlock();
-
-	return queue_index;
-#else
-	return -1;
-#endif
-}
-
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb)
-{
-	int queue_index;
-	const struct net_device_ops *ops = dev->netdev_ops;
-
-	if (dev->real_num_tx_queues == 1)
-		queue_index = 0;
-	else if (ops->ndo_select_queue) {
-		queue_index = ops->ndo_select_queue(dev, skb);
-		queue_index = dev_cap_txqueue(dev, queue_index);
-	} else {
-		struct sock *sk = skb->sk;
-		queue_index = sk_tx_queue_get(sk);
-
-		if (queue_index < 0 || skb->ooo_okay ||
-		    queue_index >= dev->real_num_tx_queues) {
-			int old_index = queue_index;
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
 
-			queue_index = get_xps_queue(dev, skb);
-			if (queue_index < 0)
-				queue_index = skb_tx_hash(dev, skb);
+	/* To get more precise estimation of bytes sent on wire,
+	 * we add to pkt_len the headers size of all segments
+	 */
+	if (shinfo->gso_size)  {
+		unsigned int hdr_len;
 
-			if (queue_index != old_index && sk) {
-				struct dst_entry *dst =
-				    rcu_dereference_check(sk->sk_dst_cache, 1);
+		/* mac layer + network layer */
+		hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
 
-				if (dst && skb_dst(skb) == dst)
-					sk_tx_queue_set(sk, queue_index);
-			}
-		}
+		/* + transport layer */
+		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+			hdr_len += tcp_hdrlen(skb);
+		else
+			hdr_len += sizeof(struct udphdr);
+		qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
 	}
-
-	skb_set_queue_mapping(skb, queue_index);
-	return netdev_get_tx_queue(dev, queue_index);
 }
 
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
@@ -2540,7 +2647,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	bool contended;
 	int rc;
 
-	qdisc_skb_cb(skb)->pkt_len = skb->len;
+	qdisc_pkt_len_init(skb);
 	qdisc_calculate_pkt_len(skb, q);
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
@@ -2663,6 +2770,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
+	skb_reset_mac_header(skb);
+
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
@@ -2757,41 +2866,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
 
-/*
- * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
- * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
- * if hash is a canonical 4-tuple hash over transport ports.
- */
-void __skb_get_rxhash(struct sk_buff *skb)
-{
-	struct flow_keys keys;
-	u32 hash;
-
-	if (!skb_flow_dissect(skb, &keys))
-		return;
-
-	if (keys.ports)
-		skb->l4_rxhash = 1;
-
-	/* get a consistent hash (same value on both flow directions) */
-	if (((__force u32)keys.dst < (__force u32)keys.src) ||
-	    (((__force u32)keys.dst == (__force u32)keys.src) &&
-	     ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
-		swap(keys.dst, keys.src);
-		swap(keys.port16[0], keys.port16[1]);
-	}
-
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src,
-			    (__force u32)keys.ports, hashrnd);
-	if (!hash)
-		hash = 1;
-
-	skb->rxhash = hash;
-}
-EXPORT_SYMBOL(__skb_get_rxhash);
-
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -3318,7 +3392,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 	}
 }
 
-static int __netif_receive_skb(struct sk_buff *skb)
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
@@ -3327,24 +3401,11 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
-	unsigned long pflags = current->flags;
 
 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
 
 	trace_netif_receive_skb(skb);
 
-	/*
-	 * PFMEMALLOC skbs are special, they should
-	 * - be delivered to SOCK_MEMALLOC sockets only
-	 * - stay away from userspace
-	 * - have bounded memory usage
-	 *
-	 * Use PF_MEMALLOC as this saves us from propagating the allocation
-	 * context down to all allocation sites.
-	 */
-	if (sk_memalloc_socks() && skb_pfmemalloc(skb))
-		current->flags |= PF_MEMALLOC;
-
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
 		goto out;
@@ -3352,7 +3413,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	orig_dev = skb->dev;
 
 	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
+	if (!skb_transport_header_was_set(skb))
+		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 
 	pt_prev = NULL;
@@ -3377,7 +3439,7 @@ another_round:
 	}
 #endif
 
-	if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+	if (pfmemalloc)
 		goto skip_taps;
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -3396,8 +3458,7 @@ skip_taps:
 ncls:
 #endif
 
-	if (sk_memalloc_socks() && skb_pfmemalloc(skb)
-				&& !skb_pfmemalloc_protocol(skb))
+	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 
 	if (vlan_tx_tag_present(skb)) {
@@ -3467,7 +3528,31 @@ drop:
 unlock:
 	rcu_read_unlock();
 out:
-	tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	return ret;
+}
+
+static int __netif_receive_skb(struct sk_buff *skb)
+{
+	int ret;
+
+	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
+		unsigned long pflags = current->flags;
+
+		/*
+		 * PFMEMALLOC skbs are special, they should
+		 * - be delivered to SOCK_MEMALLOC sockets only
+		 * - stay away from userspace
+		 * - have bounded memory usage
+		 *
+		 * Use PF_MEMALLOC as this saves us from propagating the allocation
+		 * context down to all allocation sites.
+		 */
+		current->flags |= PF_MEMALLOC;
+		ret = __netif_receive_skb_core(skb, true);
+		tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	} else
+		ret = __netif_receive_skb_core(skb, false);
+
 	return ret;
 }
 
@@ -3634,7 +3719,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	__be16 type = skb->protocol;
 	struct list_head *head = &offload_base;
 	int same_flow;
-	int mac_len;
 	enum gro_result ret;
 
 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
@@ -3651,8 +3735,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 			continue;
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
-		mac_len = skb->network_header - skb->mac_header;
-		skb->mac_len = mac_len;
+		skb_reset_mac_len(skb);
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
@@ -4134,127 +4217,6 @@ softnet_break:
 	goto out;
 }
 
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- *	register_gifconf	-	register a SIOCGIF handler
- *	@family: Address family
- *	@gifconf: Function handler
- *
- *	Register protocol dependent address dumping routines. The handler
- *	that is passed must not be freed or reused until it has been replaced
- *	by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
-	if (family >= NPROTO)
-		return -EINVAL;
-	gifconf_list[family] = gifconf;
-	return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
-
-/*
- *	Map an interface index to its name (SIOCGIFNAME)
- */
-
-/*
- *	We need this ioctl for efficient implementation of the
- *	if_indextoname() function required by the IPv6 API.  Without
- *	it, we would have to search all the interfaces to find a
- *	match.  --pb
- */
-
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
-{
-	struct net_device *dev;
-	struct ifreq ifr;
-	unsigned seq;
-
-	/*
-	 *	Fetch the caller's info block.
-	 */
-
-	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
-		return -EFAULT;
-
-retry:
-	seq = read_seqbegin(&devnet_rename_seq);
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
-	if (!dev) {
-		rcu_read_unlock();
-		return -ENODEV;
-	}
-
-	strcpy(ifr.ifr_name, dev->name);
-	rcu_read_unlock();
-	if (read_seqretry(&devnet_rename_seq, seq))
-		goto retry;
-
-	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
-		return -EFAULT;
-	return 0;
-}
-
-/*
- *	Perform a SIOCGIFCONF call. This structure will change
- *	size eventually, and there is nothing I can do about it.
- *	Thus we will need a 'compatibility mode'.
- */
-
-static int dev_ifconf(struct net *net, char __user *arg)
-{
-	struct ifconf ifc;
-	struct net_device *dev;
-	char __user *pos;
-	int len;
-	int total;
-	int i;
-
-	/*
-	 *	Fetch the caller's info block.
-	 */
-
-	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
-		return -EFAULT;
-
-	pos = ifc.ifc_buf;
-	len = ifc.ifc_len;
-
-	/*
-	 *	Loop over the interfaces, and write an info block for each.
-	 */
-
-	total = 0;
-	for_each_netdev(net, dev) {
-		for (i = 0; i < NPROTO; i++) {
-			if (gifconf_list[i]) {
-				int done;
-				if (!pos)
-					done = gifconf_list[i](dev, NULL, 0);
-				else
-					done = gifconf_list[i](dev, pos + total,
-							       len - total);
-				if (done < 0)
-					return -EFAULT;
-				total += done;
-			}
-		}
-	}
-
-	/*
-	 *	All done.  Write the updated control block back to the caller.
-	 */
-	ifc.ifc_len = total;
-
-	/*
-	 * 	Both BSD and Solaris return 0 here, so we do too.
-	 */
-	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
-}
-
 #ifdef CONFIG_PROC_FS
 
 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
@@ -4600,64 +4562,231 @@ static int __init dev_proc_init(void)
 #endif	/* CONFIG_PROC_FS */
 
 
+struct netdev_upper {
+	struct net_device *dev;
+	bool master;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head search_list;
+};
+
+static void __append_search_uppers(struct list_head *search_list,
+				   struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		/* check if this upper is not already in search list */
+		if (list_empty(&upper->search_list))
+			list_add_tail(&upper->search_list, search_list);
+	}
+}
+
+static bool __netdev_search_upper_dev(struct net_device *dev,
+				      struct net_device *upper_dev)
+{
+	LIST_HEAD(search_list);
+	struct netdev_upper *upper;
+	struct netdev_upper *tmp;
+	bool ret = false;
+
+	__append_search_uppers(&search_list, dev);
+	list_for_each_entry(upper, &search_list, search_list) {
+		if (upper->dev == upper_dev) {
+			ret = true;
+			break;
+		}
+		__append_search_uppers(&search_list, upper->dev);
+	}
+	list_for_each_entry_safe(upper, tmp, &search_list, search_list)
+		INIT_LIST_HEAD(&upper->search_list);
+	return ret;
+}
+
+static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
+						struct net_device *upper_dev)
+{
+	struct netdev_upper *upper;
+
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		if (upper->dev == upper_dev)
+			return upper;
+	}
+	return NULL;
+}
+
 /**
- *	netdev_set_master	-	set up master pointer
- *	@slave: slave device
- *	@master: new master device
+ * netdev_has_upper_dev - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
  *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success the reference counts
- *	are adjusted and the function returns zero.
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks only immediate upper device,
+ * not through a complete stack of devices. The caller must hold the RTNL lock.
  */
-int netdev_set_master(struct net_device *slave, struct net_device *master)
+bool netdev_has_upper_dev(struct net_device *dev,
+			  struct net_device *upper_dev)
 {
-	struct net_device *old = slave->master;
+	ASSERT_RTNL();
 
+	return __netdev_find_upper(dev, upper_dev);
+}
+EXPORT_SYMBOL(netdev_has_upper_dev);
+
+/**
+ * netdev_has_any_upper_dev - Check if device is linked to some device
+ * @dev: device
+ *
+ * Find out if a device is linked to an upper device and return true in case
+ * it is. The caller must hold the RTNL lock.
+ */
+bool netdev_has_any_upper_dev(struct net_device *dev)
+{
 	ASSERT_RTNL();
 
-	if (master) {
-		if (old)
-			return -EBUSY;
-		dev_hold(master);
-	}
+	return !list_empty(&dev->upper_dev_list);
+}
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
 
-	slave->master = master;
+/**
+ * netdev_master_upper_dev_get - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RTNL lock.
+ */
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
+{
+	struct netdev_upper *upper;
 
-	if (old)
-		dev_put(old);
-	return 0;
+	ASSERT_RTNL();
+
+	if (list_empty(&dev->upper_dev_list))
+		return NULL;
+
+	upper = list_first_entry(&dev->upper_dev_list,
+				 struct netdev_upper, list);
+	if (likely(upper->master))
+		return upper->dev;
+	return NULL;
 }
-EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
 /**
- *	netdev_set_bond_master	-	set up bonding master/slave pair
- *	@slave: slave device
- *	@master: new master device
+ * netdev_master_upper_dev_get_rcu - Get master upper device
+ * @dev: device
  *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success %RTM_NEWLINK is sent
- *	to the routing socket and the function returns zero.
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RCU read lock.
  */
-int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 {
-	int err;
+	struct netdev_upper *upper;
+
+	upper = list_first_or_null_rcu(&dev->upper_dev_list,
+				       struct netdev_upper, list);
+	if (upper && likely(upper->master))
+		return upper->dev;
+	return NULL;
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+
+static int __netdev_upper_dev_link(struct net_device *dev,
+				   struct net_device *upper_dev, bool master)
+{
+	struct netdev_upper *upper;
 
 	ASSERT_RTNL();
 
-	err = netdev_set_master(slave, master);
-	if (err)
-		return err;
+	if (dev == upper_dev)
+		return -EBUSY;
+
+	/* To prevent loops, check if dev is not upper device to upper_dev. */
+	if (__netdev_search_upper_dev(upper_dev, dev))
+		return -EBUSY;
+
+	if (__netdev_find_upper(dev, upper_dev))
+		return -EEXIST;
+
+	if (master && netdev_master_upper_dev_get(dev))
+		return -EBUSY;
+
+	upper = kmalloc(sizeof(*upper), GFP_KERNEL);
+	if (!upper)
+		return -ENOMEM;
+
+	upper->dev = upper_dev;
+	upper->master = master;
+	INIT_LIST_HEAD(&upper->search_list);
+
+	/* Ensure that master upper link is always the first item in list. */
 	if (master)
-		slave->flags |= IFF_SLAVE;
+		list_add_rcu(&upper->list, &dev->upper_dev_list);
 	else
-		slave->flags &= ~IFF_SLAVE;
+		list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
+	dev_hold(upper_dev);
 
-	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
 	return 0;
 }
-EXPORT_SYMBOL(netdev_set_bond_master);
+
+/**
+ * netdev_upper_dev_link - Add a link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. The caller must hold
+ * the RTNL lock. On a failure a negative errno code is returned.
+ * On success the reference counts are adjusted and the function
+ * returns zero.
+ */
+int netdev_upper_dev_link(struct net_device *dev,
+			  struct net_device *upper_dev)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, false);
+}
+EXPORT_SYMBOL(netdev_upper_dev_link);
+
+/**
+ * netdev_master_upper_dev_link - Add a master link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. In this case, only
+ * one master upper device can be linked, although other non-master devices
+ * might be linked as well. The caller must hold the RTNL lock.
+ * On a failure a negative errno code is returned. On success the reference
+ * counts are adjusted and the function returns zero.
+ */
+int netdev_master_upper_dev_link(struct net_device *dev,
+				 struct net_device *upper_dev)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, true);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link);
+
+/**
+ * netdev_upper_dev_unlink - Removes a link to upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Removes a link to device which is upper to this one. The caller must hold
+ * the RTNL lock.
+ */
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	upper = __netdev_find_upper(dev, upper_dev);
+	if (!upper)
+		return;
+	list_del_rcu(&upper->list);
+	dev_put(upper_dev);
+	kfree_rcu(upper, rcu);
+}
+EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
@@ -5020,381 +5149,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	err = ops->ndo_set_mac_address(dev, sa);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	if (err)
+		return err;
+	dev->addr_assign_type = NET_ADDR_SET;
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 
-/*
- *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
- */
-static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
-{
-	int err;
-	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
-
-	if (!dev)
-		return -ENODEV;
-
-	switch (cmd) {
-	case SIOCGIFFLAGS:	/* Get interface flags */
-		ifr->ifr_flags = (short) dev_get_flags(dev);
-		return 0;
-
-	case SIOCGIFMETRIC:	/* Get the metric on the interface
-				   (currently unused) */
-		ifr->ifr_metric = 0;
-		return 0;
-
-	case SIOCGIFMTU:	/* Get the MTU of a device */
-		ifr->ifr_mtu = dev->mtu;
-		return 0;
-
-	case SIOCGIFHWADDR:
-		if (!dev->addr_len)
-			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
-		else
-			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-		ifr->ifr_hwaddr.sa_family = dev->type;
-		return 0;
-
-	case SIOCGIFSLAVE:
-		err = -EINVAL;
-		break;
-
-	case SIOCGIFMAP:
-		ifr->ifr_map.mem_start = dev->mem_start;
-		ifr->ifr_map.mem_end   = dev->mem_end;
-		ifr->ifr_map.base_addr = dev->base_addr;
-		ifr->ifr_map.irq       = dev->irq;
-		ifr->ifr_map.dma       = dev->dma;
-		ifr->ifr_map.port      = dev->if_port;
-		return 0;
-
-	case SIOCGIFINDEX:
-		ifr->ifr_ifindex = dev->ifindex;
-		return 0;
-
-	case SIOCGIFTXQLEN:
-		ifr->ifr_qlen = dev->tx_queue_len;
-		return 0;
-
-	default:
-		/* dev_ioctl() should ensure this case
-		 * is never reached
-		 */
-		WARN_ON(1);
-		err = -ENOTTY;
-		break;
-
-	}
-	return err;
-}
-
-/*
- *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
- */
-static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
-{
-	int err;
-	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
-	const struct net_device_ops *ops;
-
-	if (!dev)
-		return -ENODEV;
-
-	ops = dev->netdev_ops;
-
-	switch (cmd) {
-	case SIOCSIFFLAGS:	/* Set interface flags */
-		return dev_change_flags(dev, ifr->ifr_flags);
-
-	case SIOCSIFMETRIC:	/* Set the metric on the interface
-				   (currently unused) */
-		return -EOPNOTSUPP;
-
-	case SIOCSIFMTU:	/* Set the MTU of a device */
-		return dev_set_mtu(dev, ifr->ifr_mtu);
-
-	case SIOCSIFHWADDR:
-		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
-
-	case SIOCSIFHWBROADCAST:
-		if (ifr->ifr_hwaddr.sa_family != dev->type)
-			return -EINVAL;
-		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
-		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-		return 0;
-
-	case SIOCSIFMAP:
-		if (ops->ndo_set_config) {
-			if (!netif_device_present(dev))
-				return -ENODEV;
-			return ops->ndo_set_config(dev, &ifr->ifr_map);
-		}
-		return -EOPNOTSUPP;
-
-	case SIOCADDMULTI:
-		if (!ops->ndo_set_rx_mode ||
-		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-			return -EINVAL;
-		if (!netif_device_present(dev))
-			return -ENODEV;
-		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
-
-	case SIOCDELMULTI:
-		if (!ops->ndo_set_rx_mode ||
-		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-			return -EINVAL;
-		if (!netif_device_present(dev))
-			return -ENODEV;
-		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
-
-	case SIOCSIFTXQLEN:
-		if (ifr->ifr_qlen < 0)
-			return -EINVAL;
-		dev->tx_queue_len = ifr->ifr_qlen;
-		return 0;
-
-	case SIOCSIFNAME:
-		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
-		return dev_change_name(dev, ifr->ifr_newname);
-
-	case SIOCSHWTSTAMP:
-		err = net_hwtstamp_validate(ifr);
-		if (err)
-			return err;
-		/* fall through */
-
-	/*
-	 *	Unknown or private ioctl
-	 */
-	default:
-		if ((cmd >= SIOCDEVPRIVATE &&
-		    cmd <= SIOCDEVPRIVATE + 15) ||
-		    cmd == SIOCBONDENSLAVE ||
-		    cmd == SIOCBONDRELEASE ||
-		    cmd == SIOCBONDSETHWADDR ||
-		    cmd == SIOCBONDSLAVEINFOQUERY ||
-		    cmd == SIOCBONDINFOQUERY ||
-		    cmd == SIOCBONDCHANGEACTIVE ||
-		    cmd == SIOCGMIIPHY ||
-		    cmd == SIOCGMIIREG ||
-		    cmd == SIOCSMIIREG ||
-		    cmd == SIOCBRADDIF ||
-		    cmd == SIOCBRDELIF ||
-		    cmd == SIOCSHWTSTAMP ||
-		    cmd == SIOCWANDEV) {
-			err = -EOPNOTSUPP;
-			if (ops->ndo_do_ioctl) {
-				if (netif_device_present(dev))
-					err = ops->ndo_do_ioctl(dev, ifr, cmd);
-				else
-					err = -ENODEV;
-			}
-		} else
-			err = -EINVAL;
-
-	}
-	return err;
-}
-
-/*
- *	This function handles all "interface"-type I/O control requests. The actual
- *	'doing' part of this is dev_ifsioc above.
- */
-
 /**
- *	dev_ioctl	-	network device ioctl
- *	@net: the applicable net namespace
- *	@cmd: command to issue
- *	@arg: pointer to a struct ifreq in user space
+ *	dev_change_carrier - Change device carrier
+ *	@dev: device
+ *	@new_carries: new value
  *
- *	Issue ioctl functions to devices. This is normally called by the
- *	user space syscall interfaces but can sometimes be useful for
- *	other purposes. The return value is the return from the syscall if
- *	positive or a negative errno code on error.
+ *	Change device carrier
  */
-
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int dev_change_carrier(struct net_device *dev, bool new_carrier)
 {
-	struct ifreq ifr;
-	int ret;
-	char *colon;
-
-	/* One special case: SIOCGIFCONF takes ifconf argument
-	   and requires shared lock, because it sleeps writing
-	   to user space.
-	 */
-
-	if (cmd == SIOCGIFCONF) {
-		rtnl_lock();
-		ret = dev_ifconf(net, (char __user *) arg);
-		rtnl_unlock();
-		return ret;
-	}
-	if (cmd == SIOCGIFNAME)
-		return dev_ifname(net, (struct ifreq __user *)arg);
-
-	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
-		return -EFAULT;
-
-	ifr.ifr_name[IFNAMSIZ-1] = 0;
-
-	colon = strchr(ifr.ifr_name, ':');
-	if (colon)
-		*colon = 0;
-
-	/*
-	 *	See which interface the caller is talking about.
-	 */
-
-	switch (cmd) {
-	/*
-	 *	These ioctl calls:
-	 *	- can be done by all.
-	 *	- atomic and do not require locking.
-	 *	- return a value
-	 */
-	case SIOCGIFFLAGS:
-	case SIOCGIFMETRIC:
-	case SIOCGIFMTU:
-	case SIOCGIFHWADDR:
-	case SIOCGIFSLAVE:
-	case SIOCGIFMAP:
-	case SIOCGIFINDEX:
-	case SIOCGIFTXQLEN:
-		dev_load(net, ifr.ifr_name);
-		rcu_read_lock();
-		ret = dev_ifsioc_locked(net, &ifr, cmd);
-		rcu_read_unlock();
-		if (!ret) {
-			if (colon)
-				*colon = ':';
-			if (copy_to_user(arg, &ifr,
-					 sizeof(struct ifreq)))
-				ret = -EFAULT;
-		}
-		return ret;
-
-	case SIOCETHTOOL:
-		dev_load(net, ifr.ifr_name);
-		rtnl_lock();
-		ret = dev_ethtool(net, &ifr);
-		rtnl_unlock();
-		if (!ret) {
-			if (colon)
-				*colon = ':';
-			if (copy_to_user(arg, &ifr,
-					 sizeof(struct ifreq)))
-				ret = -EFAULT;
-		}
-		return ret;
-
-	/*
-	 *	These ioctl calls:
-	 *	- require superuser power.
-	 *	- require strict serialization.
-	 *	- return a value
-	 */
-	case SIOCGMIIPHY:
-	case SIOCGMIIREG:
-	case SIOCSIFNAME:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			return -EPERM;
-		dev_load(net, ifr.ifr_name);
-		rtnl_lock();
-		ret = dev_ifsioc(net, &ifr, cmd);
-		rtnl_unlock();
-		if (!ret) {
-			if (colon)
-				*colon = ':';
-			if (copy_to_user(arg, &ifr,
-					 sizeof(struct ifreq)))
-				ret = -EFAULT;
-		}
-		return ret;
-
-	/*
-	 *	These ioctl calls:
-	 *	- require superuser power.
-	 *	- require strict serialization.
-	 *	- do not return a value
-	 */
-	case SIOCSIFMAP:
-	case SIOCSIFTXQLEN:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		/* fall through */
-	/*
-	 *	These ioctl calls:
-	 *	- require local superuser power.
-	 *	- require strict serialization.
-	 *	- do not return a value
-	 */
-	case SIOCSIFFLAGS:
-	case SIOCSIFMETRIC:
-	case SIOCSIFMTU:
-	case SIOCSIFHWADDR:
-	case SIOCSIFSLAVE:
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-	case SIOCSIFHWBROADCAST:
-	case SIOCSMIIREG:
-	case SIOCBONDENSLAVE:
-	case SIOCBONDRELEASE:
-	case SIOCBONDSETHWADDR:
-	case SIOCBONDCHANGEACTIVE:
-	case SIOCBRADDIF:
-	case SIOCBRDELIF:
-	case SIOCSHWTSTAMP:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			return -EPERM;
-		/* fall through */
-	case SIOCBONDSLAVEINFOQUERY:
-	case SIOCBONDINFOQUERY:
-		dev_load(net, ifr.ifr_name);
-		rtnl_lock();
-		ret = dev_ifsioc(net, &ifr, cmd);
-		rtnl_unlock();
-		return ret;
-
-	case SIOCGIFMEM:
-		/* Get the per device memory space. We can add this but
-		 * currently do not support it */
-	case SIOCSIFMEM:
-		/* Set the per device memory buffer space.
-		 * Not applicable in our case */
-	case SIOCSIFLINK:
-		return -ENOTTY;
+	const struct net_device_ops *ops = dev->netdev_ops;
 
-	/*
-	 *	Unknown or private ioctl.
-	 */
-	default:
-		if (cmd == SIOCWANDEV ||
-		    (cmd >= SIOCDEVPRIVATE &&
-		     cmd <= SIOCDEVPRIVATE + 15)) {
-			dev_load(net, ifr.ifr_name);
-			rtnl_lock();
-			ret = dev_ifsioc(net, &ifr, cmd);
-			rtnl_unlock();
-			if (!ret && copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-				ret = -EFAULT;
-			return ret;
-		}
-		/* Take care of Wireless Extensions */
-		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-			return wext_handle_ioctl(net, &ifr, cmd, arg);
-		return -ENOTTY;
-	}
+	if (!ops->ndo_change_carrier)
+		return -EOPNOTSUPP;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	return ops->ndo_change_carrier(dev, new_carrier);
 }
-
+EXPORT_SYMBOL(dev_change_carrier);
 
 /**
  *	dev_new_index	-	allocate an ifindex
@@ -5482,11 +5263,15 @@ static void rollback_registered_many(struct list_head *head)
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
-		/* Notifier chain MUST detach us from master device. */
-		WARN_ON(dev->master);
+		/* Notifier chain MUST detach us all upper devices. */
+		WARN_ON(netdev_has_any_upper_dev(dev));
 
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+		/* Remove XPS queueing entries */
+		netif_reset_xps_queues_gt(dev, 0);
+#endif
 	}
 
 	synchronize_net();
@@ -5664,10 +5449,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 	BUG_ON(count < 1);
 
 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-	if (!rx) {
-		pr_err("netdev: Unable to allocate %u rx queues\n", count);
+	if (!rx)
 		return -ENOMEM;
-	}
+
 	dev->_rx = rx;
 
 	for (i = 0; i < count; i++)
@@ -5698,10 +5482,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	BUG_ON(count < 1);
 
 	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
-	if (!tx) {
-		pr_err("netdev: Unable to allocate %u tx queues\n", count);
+	if (!tx)
 		return -ENOMEM;
-	}
+
 	dev->_tx = tx;
 
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5760,6 +5543,14 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
+	if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
+	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
+	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
+		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
+		ret = -EINVAL;
+		goto err_uninit;
+	}
+
 	ret = -EBUSY;
 	if (!dev->ifindex)
 		dev->ifindex = dev_new_index(net);
@@ -5815,6 +5606,13 @@ int register_netdevice(struct net_device *dev)
 	list_netdevice(dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 
+	/* If the device has permanent device address, driver should
+	 * set dev_addr and also addr_assign_type should be set to
+	 * NET_ADDR_PERM (default value).
+	 */
+	if (dev->addr_assign_type == NET_ADDR_PERM)
+		memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
+
 	/* Notify protocols, that a new device appeared. */
 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	ret = notifier_to_errno(ret);
@@ -6121,6 +5919,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 
 static const struct ethtool_ops default_ethtool_ops;
 
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+				    const struct ethtool_ops *ops)
+{
+	if (dev->ethtool_ops == &default_ethtool_ops)
+		dev->ethtool_ops = ops;
+}
+EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -6165,10 +5971,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	alloc_size += NETDEV_ALIGN - 1;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
-	if (!p) {
-		pr_err("alloc_netdev: Unable to allocate device\n");
+	if (!p)
 		return NULL;
-	}
 
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
@@ -6191,6 +5995,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
+	INIT_LIST_HEAD(&dev->upper_dev_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
@@ -6841,12 +6646,3 @@ out:
 }
 
 subsys_initcall(net_dev_init);
-
-static int __init initialize_hashrnd(void)
-{
-	get_random_bytes(&hashrnd, sizeof(hashrnd));
-	return 0;
-}
-
-late_initcall_sync(initialize_hashrnd);
-
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
new file mode 100644
index 00000000000..6cc0481faad
--- /dev/null
+++ b/net/core/dev_ioctl.c
@@ -0,0 +1,576 @@
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/net_tstamp.h>
+#include <linux/wireless.h>
+#include <net/wext.h>
+
+/*
+ *	Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct net *net, struct ifreq __user *arg)
+{
+	struct net_device *dev;
+	struct ifreq ifr;
+	unsigned seq;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+retry:
+	seq = read_seqcount_begin(&devnet_rename_seq);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
+	if (!dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+
+	strcpy(ifr.ifr_name, dev->name);
+	rcu_read_unlock();
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
+		goto retry;
+
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
+}
+
+static gifconf_func_t *gifconf_list[NPROTO];
+
+/**
+ *	register_gifconf	-	register a SIOCGIF handler
+ *	@family: Address family
+ *	@gifconf: Function handler
+ *
+ *	Register protocol dependent address dumping routines. The handler
+ *	that is passed must not be freed or reused until it has been replaced
+ *	by another handler.
+ */
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
+{
+	if (family >= NPROTO)
+		return -EINVAL;
+	gifconf_list[family] = gifconf;
+	return 0;
+}
+EXPORT_SYMBOL(register_gifconf);
+
+/*
+ *	Perform a SIOCGIFCONF call. This structure will change
+ *	size eventually, and there is nothing I can do about it.
+ *	Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(struct net *net, char __user *arg)
+{
+	struct ifconf ifc;
+	struct net_device *dev;
+	char __user *pos;
+	int len;
+	int total;
+	int i;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+		return -EFAULT;
+
+	pos = ifc.ifc_buf;
+	len = ifc.ifc_len;
+
+	/*
+	 *	Loop over the interfaces, and write an info block for each.
+	 */
+
+	total = 0;
+	for_each_netdev(net, dev) {
+		for (i = 0; i < NPROTO; i++) {
+			if (gifconf_list[i]) {
+				int done;
+				if (!pos)
+					done = gifconf_list[i](dev, NULL, 0);
+				else
+					done = gifconf_list[i](dev, pos + total,
+							       len - total);
+				if (done < 0)
+					return -EFAULT;
+				total += done;
+			}
+		}
+	}
+
+	/*
+	 *	All done.  Write the updated control block back to the caller.
+	 */
+	ifc.ifc_len = total;
+
+	/*
+	 * 	Both BSD and Solaris return 0 here, so we do too.
+	 */
+	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
+ */
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (cmd) {
+	case SIOCGIFFLAGS:	/* Get interface flags */
+		ifr->ifr_flags = (short) dev_get_flags(dev);
+		return 0;
+
+	case SIOCGIFMETRIC:	/* Get the metric on the interface
+				   (currently unused) */
+		ifr->ifr_metric = 0;
+		return 0;
+
+	case SIOCGIFMTU:	/* Get the MTU of a device */
+		ifr->ifr_mtu = dev->mtu;
+		return 0;
+
+	case SIOCGIFHWADDR:
+		if (!dev->addr_len)
+			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+		else
+			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		ifr->ifr_hwaddr.sa_family = dev->type;
+		return 0;
+
+	case SIOCGIFSLAVE:
+		err = -EINVAL;
+		break;
+
+	case SIOCGIFMAP:
+		ifr->ifr_map.mem_start = dev->mem_start;
+		ifr->ifr_map.mem_end   = dev->mem_end;
+		ifr->ifr_map.base_addr = dev->base_addr;
+		ifr->ifr_map.irq       = dev->irq;
+		ifr->ifr_map.dma       = dev->dma;
+		ifr->ifr_map.port      = dev->if_port;
+		return 0;
+
+	case SIOCGIFINDEX:
+		ifr->ifr_ifindex = dev->ifindex;
+		return 0;
+
+	case SIOCGIFTXQLEN:
+		ifr->ifr_qlen = dev->tx_queue_len;
+		return 0;
+
+	default:
+		/* dev_ioctl() should ensure this case
+		 * is never reached
+		 */
+		WARN_ON(1);
+		err = -ENOTTY;
+		break;
+
+	}
+	return err;
+}
+
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+	struct hwtstamp_config cfg;
+	enum hwtstamp_tx_types tx_type;
+	enum hwtstamp_rx_filters rx_filter;
+	int tx_type_valid = 0;
+	int rx_filter_valid = 0;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.flags) /* reserved for future extensions */
+		return -EINVAL;
+
+	tx_type = cfg.tx_type;
+	rx_filter = cfg.rx_filter;
+
+	switch (tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		tx_type_valid = 1;
+		break;
+	}
+
+	switch (rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		rx_filter_valid = 1;
+		break;
+	}
+
+	if (!tx_type_valid || !rx_filter_valid)
+		return -ERANGE;
+
+	return 0;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	const struct net_device_ops *ops;
+
+	if (!dev)
+		return -ENODEV;
+
+	ops = dev->netdev_ops;
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:	/* Set interface flags */
+		return dev_change_flags(dev, ifr->ifr_flags);
+
+	case SIOCSIFMETRIC:	/* Set the metric on the interface
+				   (currently unused) */
+		return -EOPNOTSUPP;
+
+	case SIOCSIFMTU:	/* Set the MTU of a device */
+		return dev_set_mtu(dev, ifr->ifr_mtu);
+
+	case SIOCSIFHWADDR:
+		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+	case SIOCSIFHWBROADCAST:
+		if (ifr->ifr_hwaddr.sa_family != dev->type)
+			return -EINVAL;
+		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+		return 0;
+
+	case SIOCSIFMAP:
+		if (ops->ndo_set_config) {
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return ops->ndo_set_config(dev, &ifr->ifr_map);
+		}
+		return -EOPNOTSUPP;
+
+	case SIOCADDMULTI:
+		if (!ops->ndo_set_rx_mode ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
+
+	case SIOCDELMULTI:
+		if (!ops->ndo_set_rx_mode ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
+
+	case SIOCSIFTXQLEN:
+		if (ifr->ifr_qlen < 0)
+			return -EINVAL;
+		dev->tx_queue_len = ifr->ifr_qlen;
+		return 0;
+
+	case SIOCSIFNAME:
+		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+		return dev_change_name(dev, ifr->ifr_newname);
+
+	case SIOCSHWTSTAMP:
+		err = net_hwtstamp_validate(ifr);
+		if (err)
+			return err;
+		/* fall through */
+
+	/*
+	 *	Unknown or private ioctl
+	 */
+	default:
+		if ((cmd >= SIOCDEVPRIVATE &&
+		    cmd <= SIOCDEVPRIVATE + 15) ||
+		    cmd == SIOCBONDENSLAVE ||
+		    cmd == SIOCBONDRELEASE ||
+		    cmd == SIOCBONDSETHWADDR ||
+		    cmd == SIOCBONDSLAVEINFOQUERY ||
+		    cmd == SIOCBONDINFOQUERY ||
+		    cmd == SIOCBONDCHANGEACTIVE ||
+		    cmd == SIOCGMIIPHY ||
+		    cmd == SIOCGMIIREG ||
+		    cmd == SIOCSMIIREG ||
+		    cmd == SIOCBRADDIF ||
+		    cmd == SIOCBRDELIF ||
+		    cmd == SIOCSHWTSTAMP ||
+		    cmd == SIOCWANDEV) {
+			err = -EOPNOTSUPP;
+			if (ops->ndo_do_ioctl) {
+				if (netif_device_present(dev))
+					err = ops->ndo_do_ioctl(dev, ifr, cmd);
+				else
+					err = -ENODEV;
+			}
+		} else
+			err = -EINVAL;
+
+	}
+	return err;
+}
+
+/**
+ *	dev_load 	- load a network module
+ *	@net: the applicable net namespace
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
+void dev_load(struct net *net, const char *name)
+{
+	struct net_device *dev;
+	int no_module;
+
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	rcu_read_unlock();
+
+	no_module = !dev;
+	if (no_module && capable(CAP_NET_ADMIN))
+		no_module = request_module("netdev-%s", name);
+	if (no_module && capable(CAP_SYS_MODULE)) {
+		if (!request_module("%s", name))
+			pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
+				name);
+	}
+}
+EXPORT_SYMBOL(dev_load);
+
+/*
+ *	This function handles all "interface"-type I/O control requests. The actual
+ *	'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@net: the applicable net namespace
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	int ret;
+	char *colon;
+
+	/* One special case: SIOCGIFCONF takes ifconf argument
+	   and requires shared lock, because it sleeps writing
+	   to user space.
+	 */
+
+	if (cmd == SIOCGIFCONF) {
+		rtnl_lock();
+		ret = dev_ifconf(net, (char __user *) arg);
+		rtnl_unlock();
+		return ret;
+	}
+	if (cmd == SIOCGIFNAME)
+		return dev_ifname(net, (struct ifreq __user *)arg);
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	/*
+	 *	See which interface the caller is talking about.
+	 */
+
+	switch (cmd) {
+	/*
+	 *	These ioctl calls:
+	 *	- can be done by all.
+	 *	- atomic and do not require locking.
+	 *	- return a value
+	 */
+	case SIOCGIFFLAGS:
+	case SIOCGIFMETRIC:
+	case SIOCGIFMTU:
+	case SIOCGIFHWADDR:
+	case SIOCGIFSLAVE:
+	case SIOCGIFMAP:
+	case SIOCGIFINDEX:
+	case SIOCGIFTXQLEN:
+		dev_load(net, ifr.ifr_name);
+		rcu_read_lock();
+		ret = dev_ifsioc_locked(net, &ifr, cmd);
+		rcu_read_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	case SIOCETHTOOL:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ethtool(net, &ifr);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- return a value
+	 */
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSIFNAME:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- do not return a value
+	 */
+	case SIOCSIFMAP:
+	case SIOCSIFTXQLEN:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		/* fall through */
+	/*
+	 *	These ioctl calls:
+	 *	- require local superuser power.
+	 *	- require strict serialization.
+	 *	- do not return a value
+	 */
+	case SIOCSIFFLAGS:
+	case SIOCSIFMETRIC:
+	case SIOCSIFMTU:
+	case SIOCSIFHWADDR:
+	case SIOCSIFSLAVE:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+	case SIOCSIFHWBROADCAST:
+	case SIOCSMIIREG:
+	case SIOCBONDENSLAVE:
+	case SIOCBONDRELEASE:
+	case SIOCBONDSETHWADDR:
+	case SIOCBONDCHANGEACTIVE:
+	case SIOCBRADDIF:
+	case SIOCBRDELIF:
+	case SIOCSHWTSTAMP:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+		/* fall through */
+	case SIOCBONDSLAVEINFOQUERY:
+	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		return ret;
+
+	case SIOCGIFMEM:
+		/* Get the per device memory space. We can add this but
+		 * currently do not support it */
+	case SIOCSIFMEM:
+		/* Set the per device memory buffer space.
+		 * Not applicable in our case */
+	case SIOCSIFLINK:
+		return -ENOTTY;
+
+	/*
+	 *	Unknown or private ioctl.
+	 */
+	default:
+		if (cmd == SIOCWANDEV ||
+		    (cmd >= SIOCDEVPRIVATE &&
+		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(net, &ifr, cmd);
+			rtnl_unlock();
+			if (!ret && copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+				ret = -EFAULT;
+			return ret;
+		}
+		/* Take care of Wireless Extensions */
+		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+			return wext_handle_ioctl(net, &ifr, cmd, arg);
+		return -ENOTTY;
+	}
+}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a8705432e4b..3e9b2c3e30f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
+	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
 
 	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
 	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
@@ -175,7 +176,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_FEATURES)
 		return ARRAY_SIZE(netdev_features_strings);
 
-	if (ops && ops->get_sset_count && ops->get_strings)
+	if (ops->get_sset_count && ops->get_strings)
 		return ops->get_sset_count(dev, sset);
 	else
 		return -EOPNOTSUPP;
@@ -311,7 +312,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	ASSERT_RTNL();
 
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+	if (!dev->ethtool_ops->get_settings)
 		return -EOPNOTSUPP;
 
 	memset(cmd, 0, sizeof(struct ethtool_cmd));
@@ -355,7 +356,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GDRVINFO;
-	if (ops && ops->get_drvinfo) {
+	if (ops->get_drvinfo) {
 		ops->get_drvinfo(dev, &info);
 	} else if (dev->dev.parent && dev->dev.parent->driver) {
 		strlcpy(info.bus_info, dev_name(dev->dev.parent),
@@ -370,7 +371,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 	 * this method of obtaining string set info is deprecated;
 	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
-	if (ops && ops->get_sset_count) {
+	if (ops->get_sset_count) {
 		int rc;
 
 		rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -383,9 +384,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 		if (rc >= 0)
 			info.n_priv_flags = rc;
 	}
-	if (ops && ops->get_regs_len)
+	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-	if (ops && ops->get_eeprom_len)
+	if (ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
 
 	if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -590,13 +591,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	struct ethtool_rxnfc rx_rings;
 	u32 user_size, dev_size, i;
 	u32 *indir;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 
-	if (!dev->ethtool_ops->get_rxfh_indir_size ||
-	    !dev->ethtool_ops->set_rxfh_indir ||
-	    !dev->ethtool_ops->get_rxnfc)
+	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+	    !ops->get_rxnfc)
 		return -EOPNOTSUPP;
-	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+
+	dev_size = ops->get_rxfh_indir_size(dev);
 	if (dev_size == 0)
 		return -EOPNOTSUPP;
 
@@ -613,7 +615,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 		return -ENOMEM;
 
 	rx_rings.cmd = ETHTOOL_GRXRINGS;
-	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
+	ret = ops->get_rxnfc(dev, &rx_rings, NULL);
 	if (ret)
 		goto out;
 
@@ -639,7 +641,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 		}
 	}
 
-	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh_indir(dev, indir);
 
 out:
 	kfree(indir);
@@ -1082,9 +1084,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_value id;
 	static bool busy;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int rc;
 
-	if (!dev->ethtool_ops->set_phys_id)
+	if (!ops->set_phys_id)
 		return -EOPNOTSUPP;
 
 	if (busy)
@@ -1093,7 +1096,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&id, useraddr, sizeof(id)))
 		return -EFAULT;
 
-	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
+	rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
 	if (rc < 0)
 		return rc;
 
@@ -1118,7 +1121,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 			i = n;
 			do {
 				rtnl_lock();
-				rc = dev->ethtool_ops->set_phys_id(dev,
+				rc = ops->set_phys_id(dev,
 				    (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
 				rtnl_unlock();
 				if (rc)
@@ -1133,7 +1136,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	dev_put(dev);
 	busy = false;
 
-	(void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
+	(void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
 	return rc;
 }
 
@@ -1275,7 +1278,7 @@ static int ethtool_get_dump_flag(struct net_device *dev,
 	struct ethtool_dump dump;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 
-	if (!dev->ethtool_ops->get_dump_flag)
+	if (!ops->get_dump_flag)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1299,8 +1302,7 @@ static int ethtool_get_dump_data(struct net_device *dev,
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	void *data = NULL;
 
-	if (!dev->ethtool_ops->get_dump_data ||
-		!dev->ethtool_ops->get_dump_flag)
+	if (!ops->get_dump_data || !ops->get_dump_flag)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1346,13 +1348,9 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 	info.cmd = ETHTOOL_GET_TS_INFO;
 
 	if (phydev && phydev->drv && phydev->drv->ts_info) {
-
 		err = phydev->drv->ts_info(phydev, &info);
-
-	} else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) {
-
+	} else if (ops->get_ts_info) {
 		err = ops->get_ts_info(dev, &info);
-
 	} else {
 		info.so_timestamping =
 			SOF_TIMESTAMPING_RX_SOFTWARE |
diff --git a/net/core/filter.c b/net/core/filter.c
index c23543cba13..2e20b55a783 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -532,6 +532,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
 	};
 	int pc;
+	bool anc_found;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
@@ -592,8 +593,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		case BPF_S_LD_W_ABS:
 		case BPF_S_LD_H_ABS:
 		case BPF_S_LD_B_ABS:
+			anc_found = false;
 #define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
 				code = BPF_S_ANC_##CODE;	\
+				anc_found = true;		\
 				break
 			switch (ftest->k) {
 			ANCILLARY(PROTOCOL);
@@ -610,6 +613,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 			ANCILLARY(VLAN_TAG);
 			ANCILLARY(VLAN_TAG_PRESENT);
 			}
+
+			/* ancillary operation unknown or unsupported */
+			if (anc_found == false && ftest->k >= SKF_AD_OFF)
+				return -EINVAL;
 		}
 		ftest->code = code;
 	}
@@ -714,6 +721,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
 	int err;
 
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
 		return -EINVAL;
@@ -750,6 +760,9 @@ int sk_detach_filter(struct sock *sk)
 	int ret = -ENOENT;
 	struct sk_filter *filter;
 
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
 	filter = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));
 	if (filter) {
diff --git a/net/core/flow.c b/net/core/flow.c
index b0901ee5a00..43f7495df27 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -286,7 +286,7 @@ nocache:
 		else
 			fle->genid--;
 	} else {
-		if (flo && !IS_ERR(flo))
+		if (!IS_ERR_OR_NULL(flo))
 			flo->ops->delete(flo);
 	}
 ret_object:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 466820b6e34..9d4c7201400 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -143,3 +143,176 @@ ipv6:
 	return true;
 }
 EXPORT_SYMBOL(skb_flow_dissect);
+
+static u32 hashrnd __read_mostly;
+
+/*
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
+ */
+void __skb_get_rxhash(struct sk_buff *skb)
+{
+	struct flow_keys keys;
+	u32 hash;
+
+	if (!skb_flow_dissect(skb, &keys))
+		return;
+
+	if (keys.ports)
+		skb->l4_rxhash = 1;
+
+	/* get a consistent hash (same value on both flow directions) */
+	if (((__force u32)keys.dst < (__force u32)keys.src) ||
+	    (((__force u32)keys.dst == (__force u32)keys.src) &&
+	     ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
+		swap(keys.dst, keys.src);
+		swap(keys.port16[0], keys.port16[1]);
+	}
+
+	hash = jhash_3words((__force u32)keys.dst,
+			    (__force u32)keys.src,
+			    (__force u32)keys.ports, hashrnd);
+	if (!hash)
+		hash = 1;
+
+	skb->rxhash = hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
+{
+	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
+
+	if (skb_rx_queue_recorded(skb)) {
+		hash = skb_get_rx_queue(skb);
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
+		return hash;
+	}
+
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
+	if (skb->sk && skb->sk->sk_hash)
+		hash = skb->sk->sk_hash;
+	else
+		hash = (__force u16) skb->protocol;
+	hash = jhash_1word(hash, hashrnd);
+
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
+static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
+{
+	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
+				     dev->name, queue_index,
+				     dev->real_num_tx_queues);
+		return 0;
+	}
+	return queue_index;
+}
+
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
+
+	if (queue_index < 0 || skb->ooo_okay ||
+	    queue_index >= dev->real_num_tx_queues) {
+		int new_index = get_xps_queue(dev, skb);
+		if (new_index < 0)
+			new_index = skb_tx_hash(dev, skb);
+
+		if (queue_index != new_index && sk) {
+			struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
+
+			if (dst && skb_dst(skb) == dst)
+				sk_tx_queue_set(sk, queue_index);
+
+		}
+
+		queue_index = new_index;
+	}
+
+	return queue_index;
+}
+EXPORT_SYMBOL(__netdev_pick_tx);
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb)
+{
+	int queue_index = 0;
+
+	if (dev->real_num_tx_queues != 1) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+		if (ops->ndo_select_queue)
+			queue_index = ops->ndo_select_queue(dev, skb);
+		else
+			queue_index = __netdev_pick_tx(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
+	}
+
+	skb_set_queue_mapping(skb, queue_index);
+	return netdev_get_tx_queue(dev, queue_index);
+}
+
+static int __init initialize_hashrnd(void)
+{
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
+	return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c815f285e5a..3863b8f639c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -290,15 +290,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
 			goto out_entries;
 	}
 
-	if (tbl->entry_size)
-		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
-	else {
-		int sz = sizeof(*n) + tbl->key_len;
-
-		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
-		sz += dev->neigh_priv_len;
-		n = kzalloc(sz, GFP_ATOMIC);
-	}
+	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
 	if (!n)
 		goto out_entries;
 
@@ -778,6 +770,9 @@ static void neigh_periodic_work(struct work_struct *work)
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 
+	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+		goto out;
+
 	/*
 	 *	periodically recompute ReachableTime from random function
 	 */
@@ -832,6 +827,7 @@ next_elt:
 		nht = rcu_dereference_protected(tbl->nht,
 						lockdep_is_held(&tbl->lock));
 	}
+out:
 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 	 * base_reachable_time.
@@ -1542,6 +1538,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	if (!tbl->nht || !tbl->phash_buckets)
 		panic("cannot allocate neighbour cache hashes");
 
+	if (!tbl->entry_size)
+		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
+					tbl->key_len, NEIGH_PRIV_ALIGN);
+	else
+		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
+
 	rwlock_init(&tbl->lock);
 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 334efd5d67a..a5b89a6fec6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -126,6 +126,19 @@ static ssize_t show_broadcast(struct device *dev,
 	return -EINVAL;
 }
 
+static int change_carrier(struct net_device *net, unsigned long new_carrier)
+{
+	if (!netif_running(net))
+		return -EINVAL;
+	return dev_change_carrier(net, (bool) new_carrier);
+}
+
+static ssize_t store_carrier(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_carrier);
+}
+
 static ssize_t show_carrier(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -331,7 +344,7 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
 	__ATTR(address, S_IRUGO, show_address, NULL),
 	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
-	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier),
 	__ATTR(speed, S_IRUGO, show_speed, NULL),
 	__ATTR(duplex, S_IRUGO, show_duplex, NULL),
 	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
@@ -989,68 +1002,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static DEFINE_MUTEX(xps_map_mutex);
-#define xmap_dereference(P)		\
-	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
-
-static void xps_queue_release(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
-	unsigned long index;
-	int i, pos, nonempty = 0;
-
-	index = get_netdev_queue_index(queue);
-
-	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	if (dev_maps) {
-		for_each_possible_cpu(i) {
-			map = xmap_dereference(dev_maps->cpu_map[i]);
-			if (!map)
-				continue;
-
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-
-			if (pos < map->len) {
-				if (map->len > 1)
-					map->queues[pos] =
-					    map->queues[--map->len];
-				else {
-					RCU_INIT_POINTER(dev_maps->cpu_map[i],
-					    NULL);
-					kfree_rcu(map, rcu);
-					map = NULL;
-				}
-			}
-			if (map)
-				nonempty = 1;
-		}
-
-		if (!nonempty) {
-			RCU_INIT_POINTER(dev->xps_maps, NULL);
-			kfree_rcu(dev_maps, rcu);
-		}
-	}
-	mutex_unlock(&xps_map_mutex);
-}
-
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
 		      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
-	cpumask_var_t mask;
-	int err, i, cpu, pos, map_len, alloc_len, need_set;
 	unsigned long index;
-	struct xps_map *map, *new_map;
-	struct xps_dev_maps *dev_maps, *new_dev_maps;
-	int nonempty = 0;
-	int numa_node_id = -2;
+	cpumask_var_t mask;
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1066,105 +1025,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		return err;
 	}
 
-	new_dev_maps = kzalloc(max_t(unsigned int,
-	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
-	if (!new_dev_maps) {
-		free_cpumask_var(mask);
-		return -ENOMEM;
-	}
-
-	mutex_lock(&xps_map_mutex);
-
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		new_map = map;
-		if (map) {
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-			map_len = map->len;
-			alloc_len = map->alloc_len;
-		} else
-			pos = map_len = alloc_len = 0;
-
-		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
-#ifdef CONFIG_NUMA
-		if (need_set) {
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
-		}
-#endif
-		if (need_set && pos >= map_len) {
-			/* Need to add queue to this CPU's map */
-			if (map_len >= alloc_len) {
-				alloc_len = alloc_len ?
-				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
-				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
-						       GFP_KERNEL,
-						       cpu_to_node(cpu));
-				if (!new_map)
-					goto error;
-				new_map->alloc_len = alloc_len;
-				for (i = 0; i < map_len; i++)
-					new_map->queues[i] = map->queues[i];
-				new_map->len = map_len;
-			}
-			new_map->queues[new_map->len++] = index;
-		} else if (!need_set && pos < map_len) {
-			/* Need to remove queue from this CPU's map */
-			if (map_len > 1)
-				new_map->queues[pos] =
-				    new_map->queues[--new_map->len];
-			else
-				new_map = NULL;
-		}
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
-	}
-
-	/* Cleanup old maps */
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
-			kfree_rcu(map, rcu);
-		if (new_dev_maps->cpu_map[cpu])
-			nonempty = 1;
-	}
-
-	if (nonempty) {
-		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
-	} else {
-		kfree(new_dev_maps);
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-	}
-
-	if (dev_maps)
-		kfree_rcu(dev_maps, rcu);
-
-	netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
-					    NUMA_NO_NODE);
-
-	mutex_unlock(&xps_map_mutex);
+	err = netif_set_xps_queue(dev, mask, index);
 
 	free_cpumask_var(mask);
-	return len;
 
-error:
-	mutex_unlock(&xps_map_mutex);
-
-	if (new_dev_maps)
-		for_each_possible_cpu(i)
-			kfree(rcu_dereference_protected(
-				new_dev_maps->cpu_map[i],
-				1));
-	kfree(new_dev_maps);
-	free_cpumask_var(mask);
-	return -ENOMEM;
+	return err ? : len;
 }
 
 static struct netdev_queue_attribute xps_cpus_attribute =
@@ -1183,10 +1048,6 @@ static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
-#ifdef CONFIG_XPS
-	xps_queue_release(queue);
-#endif
-
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
@@ -1334,7 +1195,6 @@ struct kobj_ns_type_operations net_ns_type_operations = {
 };
 EXPORT_SYMBOL_GPL(net_ns_type_operations);
 
-#ifdef CONFIG_HOTPLUG
 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 {
 	struct net_device *dev = to_net_dev(d);
@@ -1353,7 +1213,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 exit:
 	return retval;
 }
-#endif
 
 /*
  *	netdev_release -- destroy and free a dead device.
@@ -1382,9 +1241,7 @@ static struct class net_class = {
 #ifdef CONFIG_SYSFS
 	.dev_attrs = net_class_attributes,
 #endif /* CONFIG_SYSFS */
-#ifdef CONFIG_HOTPLUG
 	.dev_uevent = netdev_uevent,
-#endif
 	.ns_type = &net_ns_type_operations,
 	.namespace = net_namespace,
 };
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3151acf5ec1..fa32899006a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,6 +29,9 @@
 #include <linux/if_vlan.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+#include <net/ip6_checksum.h>
 #include <asm/unaligned.h>
 #include <trace/events/napi.h>
 
@@ -44,6 +47,8 @@ static struct sk_buff_head skb_pool;
 
 static atomic_t trapped;
 
+static struct srcu_struct netpoll_srcu;
+
 #define USEC_PER_POLL	50
 #define NETPOLL_RX_ENABLED  1
 #define NETPOLL_RX_DROP     2
@@ -55,7 +60,8 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -181,13 +187,13 @@ static void poll_napi(struct net_device *dev)
 	}
 }
 
-static void service_arp_queue(struct netpoll_info *npi)
+static void service_neigh_queue(struct netpoll_info *npi)
 {
 	if (npi) {
 		struct sk_buff *skb;
 
-		while ((skb = skb_dequeue(&npi->arp_tx)))
-			netpoll_arp_reply(skb, npi);
+		while ((skb = skb_dequeue(&npi->neigh_tx)))
+			netpoll_neigh_reply(skb, npi);
 	}
 }
 
@@ -196,35 +202,76 @@ static void netpoll_poll_dev(struct net_device *dev)
 	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
-	if (!dev || !netif_running(dev))
+	/* Don't do any rx activity if the dev_lock mutex is held
+	 * the dev_open/close paths use this to block netpoll activity
+	 * while changing device state
+	 */
+	if (!mutex_trylock(&ni->dev_lock))
 		return;
 
+	if (!netif_running(dev)) {
+		mutex_unlock(&ni->dev_lock);
+		return;
+	}
+
 	ops = dev->netdev_ops;
-	if (!ops->ndo_poll_controller)
+	if (!ops->ndo_poll_controller) {
+		mutex_unlock(&ni->dev_lock);
 		return;
+	}
 
 	/* Process pending work on NIC */
 	ops->ndo_poll_controller(dev);
 
 	poll_napi(dev);
 
+	mutex_unlock(&ni->dev_lock);
+
 	if (dev->flags & IFF_SLAVE) {
 		if (ni) {
-			struct net_device *bond_dev = dev->master;
+			struct net_device *bond_dev;
 			struct sk_buff *skb;
-			struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
-			while ((skb = skb_dequeue(&ni->arp_tx))) {
+			struct netpoll_info *bond_ni;
+
+			bond_dev = netdev_master_upper_dev_get_rcu(dev);
+			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+			while ((skb = skb_dequeue(&ni->neigh_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_ni->arp_tx, skb);
+				skb_queue_tail(&bond_ni->neigh_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(ni);
+	service_neigh_queue(ni);
 
 	zap_completion_queue();
 }
 
+int netpoll_rx_disable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	int idx;
+	might_sleep();
+	idx = srcu_read_lock(&netpoll_srcu);
+	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+	if (ni)
+		mutex_lock(&ni->dev_lock);
+	srcu_read_unlock(&netpoll_srcu, idx);
+	return 0;
+}
+EXPORT_SYMBOL(netpoll_rx_disable);
+
+void netpoll_rx_enable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	rcu_read_lock();
+	ni = rcu_dereference(dev->npinfo);
+	if (ni)
+		mutex_unlock(&ni->dev_lock);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(netpoll_rx_enable);
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -381,9 +428,14 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	struct iphdr *iph;
 	struct ethhdr *eth;
 	static atomic_t ip_ident;
+	struct ipv6hdr *ip6h;
 
 	udp_len = len + sizeof(*udph);
-	ip_len = udp_len + sizeof(*iph);
+	if (np->ipv6)
+		ip_len = udp_len + sizeof(*ip6h);
+	else
+		ip_len = udp_len + sizeof(*iph);
+
 	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 
 	skb = find_skb(np, total_len + np->dev->needed_tailroom,
@@ -400,34 +452,66 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	udph->source = htons(np->local_port);
 	udph->dest = htons(np->remote_port);
 	udph->len = htons(udp_len);
-	udph->check = 0;
-	udph->check = csum_tcpudp_magic(np->local_ip,
-					np->remote_ip,
-					udp_len, IPPROTO_UDP,
-					csum_partial(udph, udp_len, 0));
-	if (udph->check == 0)
-		udph->check = CSUM_MANGLED_0;
-
-	skb_push(skb, sizeof(*iph));
-	skb_reset_network_header(skb);
-	iph = ip_hdr(skb);
-
-	/* iph->version = 4; iph->ihl = 5; */
-	put_unaligned(0x45, (unsigned char *)iph);
-	iph->tos      = 0;
-	put_unaligned(htons(ip_len), &(iph->tot_len));
-	iph->id       = htons(atomic_inc_return(&ip_ident));
-	iph->frag_off = 0;
-	iph->ttl      = 64;
-	iph->protocol = IPPROTO_UDP;
-	iph->check    = 0;
-	put_unaligned(np->local_ip, &(iph->saddr));
-	put_unaligned(np->remote_ip, &(iph->daddr));
-	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
-	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	skb->protocol = eth->h_proto = htons(ETH_P_IP);
+
+	if (np->ipv6) {
+		udph->check = 0;
+		udph->check = csum_ipv6_magic(&np->local_ip.in6,
+					      &np->remote_ip.in6,
+					      udp_len, IPPROTO_UDP,
+					      csum_partial(udph, udp_len, 0));
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+
+		skb_push(skb, sizeof(*ip6h));
+		skb_reset_network_header(skb);
+		ip6h = ipv6_hdr(skb);
+
+		/* ip6h->version = 6; ip6h->priority = 0; */
+		put_unaligned(0x60, (unsigned char *)ip6h);
+		ip6h->flow_lbl[0] = 0;
+		ip6h->flow_lbl[1] = 0;
+		ip6h->flow_lbl[2] = 0;
+
+		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
+		ip6h->nexthdr = IPPROTO_UDP;
+		ip6h->hop_limit = 32;
+		ip6h->saddr = np->local_ip.in6;
+		ip6h->daddr = np->remote_ip.in6;
+
+		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		skb_reset_mac_header(skb);
+		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
+	} else {
+		udph->check = 0;
+		udph->check = csum_tcpudp_magic(np->local_ip.ip,
+						np->remote_ip.ip,
+						udp_len, IPPROTO_UDP,
+						csum_partial(udph, udp_len, 0));
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+
+		skb_push(skb, sizeof(*iph));
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+
+		/* iph->version = 4; iph->ihl = 5; */
+		put_unaligned(0x45, (unsigned char *)iph);
+		iph->tos      = 0;
+		put_unaligned(htons(ip_len), &(iph->tot_len));
+		iph->id       = htons(atomic_inc_return(&ip_ident));
+		iph->frag_off = 0;
+		iph->ttl      = 64;
+		iph->protocol = IPPROTO_UDP;
+		iph->check    = 0;
+		put_unaligned(np->local_ip.ip, &(iph->saddr));
+		put_unaligned(np->remote_ip.ip, &(iph->daddr));
+		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		skb_reset_mac_header(skb);
+		skb->protocol = eth->h_proto = htons(ETH_P_IP);
+	}
+
 	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
 	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 
@@ -437,18 +521,16 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	struct arphdr *arp;
-	unsigned char *arp_ptr;
-	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
+	int size, type = ARPOP_REPLY;
 	__be32 sip, tip;
 	unsigned char *sha;
 	struct sk_buff *send_skb;
 	struct netpoll *np, *tmp;
 	unsigned long flags;
 	int hlen, tlen;
-	int hits = 0;
+	int hits = 0, proto;
 
 	if (list_empty(&npinfo->rx_np))
 		return;
@@ -466,94 +548,214 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 	if (!hits)
 		return;
 
-	/* No arp on this interface */
-	if (skb->dev->flags & IFF_NOARP)
-		return;
-
-	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
-		return;
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto == ETH_P_IP) {
+		struct arphdr *arp;
+		unsigned char *arp_ptr;
+		/* No arp on this interface */
+		if (skb->dev->flags & IFF_NOARP)
+			return;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	arp = arp_hdr(skb);
+		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+			return;
 
-	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
-	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
-	    arp->ar_pro != htons(ETH_P_IP) ||
-	    arp->ar_op != htons(ARPOP_REQUEST))
-		return;
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
+		arp = arp_hdr(skb);
 
-	arp_ptr = (unsigned char *)(arp+1);
-	/* save the location of the src hw addr */
-	sha = arp_ptr;
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&sip, arp_ptr, 4);
-	arp_ptr += 4;
-	/* If we actually cared about dst hw addr,
-	   it would get copied here */
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&tip, arp_ptr, 4);
-
-	/* Should we ignore arp? */
-	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
-		return;
+		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+		    arp->ar_pro != htons(ETH_P_IP) ||
+		    arp->ar_op != htons(ARPOP_REQUEST))
+			return;
 
-	size = arp_hdr_len(skb->dev);
+		arp_ptr = (unsigned char *)(arp+1);
+		/* save the location of the src hw addr */
+		sha = arp_ptr;
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&sip, arp_ptr, 4);
+		arp_ptr += 4;
+		/* If we actually cared about dst hw addr,
+		   it would get copied here */
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&tip, arp_ptr, 4);
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (tip != np->local_ip)
-			continue;
+		/* Should we ignore arp? */
+		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+			return;
 
-		hlen = LL_RESERVED_SPACE(np->dev);
-		tlen = np->dev->needed_tailroom;
-		send_skb = find_skb(np, size + hlen + tlen, hlen);
-		if (!send_skb)
-			continue;
+		size = arp_hdr_len(skb->dev);
 
-		skb_reset_network_header(send_skb);
-		arp = (struct arphdr *) skb_put(send_skb, size);
-		send_skb->dev = skb->dev;
-		send_skb->protocol = htons(ETH_P_ARP);
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (tip != np->local_ip.ip)
+				continue;
+
+			hlen = LL_RESERVED_SPACE(np->dev);
+			tlen = np->dev->needed_tailroom;
+			send_skb = find_skb(np, size + hlen + tlen, hlen);
+			if (!send_skb)
+				continue;
+
+			skb_reset_network_header(send_skb);
+			arp = (struct arphdr *) skb_put(send_skb, size);
+			send_skb->dev = skb->dev;
+			send_skb->protocol = htons(ETH_P_ARP);
+
+			/* Fill the device header for the ARP frame */
+			if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
+					    sha, np->dev->dev_addr,
+					    send_skb->len) < 0) {
+				kfree_skb(send_skb);
+				continue;
+			}
 
-		/* Fill the device header for the ARP frame */
-		if (dev_hard_header(send_skb, skb->dev, ptype,
-				    sha, np->dev->dev_addr,
-				    send_skb->len) < 0) {
-			kfree_skb(send_skb);
-			continue;
+			/*
+			 * Fill out the arp protocol part.
+			 *
+			 * we only support ethernet device type,
+			 * which (according to RFC 1390) should
+			 * always equal 1 (Ethernet).
+			 */
+
+			arp->ar_hrd = htons(np->dev->type);
+			arp->ar_pro = htons(ETH_P_IP);
+			arp->ar_hln = np->dev->addr_len;
+			arp->ar_pln = 4;
+			arp->ar_op = htons(type);
+
+			arp_ptr = (unsigned char *)(arp + 1);
+			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &tip, 4);
+			arp_ptr += 4;
+			memcpy(arp_ptr, sha, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &sip, 4);
+
+			netpoll_send_skb(np, send_skb);
+
+			/* If there are several rx_hooks for the same address,
+			   we're fine by sending a single reply */
+			break;
 		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	} else if( proto == ETH_P_IPV6) {
+#if IS_ENABLED(CONFIG_IPV6)
+		struct nd_msg *msg;
+		u8 *lladdr = NULL;
+		struct ipv6hdr *hdr;
+		struct icmp6hdr *icmp6h;
+		const struct in6_addr *saddr;
+		const struct in6_addr *daddr;
+		struct inet6_dev *in6_dev = NULL;
+		struct in6_addr *target;
+
+		in6_dev = in6_dev_get(skb->dev);
+		if (!in6_dev || !in6_dev->cnf.accept_ra)
+			return;
 
-		/*
-		 * Fill out the arp protocol part.
-		 *
-		 * we only support ethernet device type,
-		 * which (according to RFC 1390) should
-		 * always equal 1 (Ethernet).
-		 */
+		if (!pskb_may_pull(skb, skb->len))
+			return;
 
-		arp->ar_hrd = htons(np->dev->type);
-		arp->ar_pro = htons(ETH_P_IP);
-		arp->ar_hln = np->dev->addr_len;
-		arp->ar_pln = 4;
-		arp->ar_op = htons(type);
+		msg = (struct nd_msg *)skb_transport_header(skb);
 
-		arp_ptr = (unsigned char *)(arp + 1);
-		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &tip, 4);
-		arp_ptr += 4;
-		memcpy(arp_ptr, sha, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &sip, 4);
+		__skb_push(skb, skb->data - skb_transport_header(skb));
 
-		netpoll_send_skb(np, send_skb);
+		if (ipv6_hdr(skb)->hop_limit != 255)
+			return;
+		if (msg->icmph.icmp6_code != 0)
+			return;
+		if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+			return;
+
+		saddr = &ipv6_hdr(skb)->saddr;
+		daddr = &ipv6_hdr(skb)->daddr;
 
-		/* If there are several rx_hooks for the same address,
-		   we're fine by sending a single reply */
-		break;
+		size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
+				continue;
+
+			hlen = LL_RESERVED_SPACE(np->dev);
+			tlen = np->dev->needed_tailroom;
+			send_skb = find_skb(np, size + hlen + tlen, hlen);
+			if (!send_skb)
+				continue;
+
+			send_skb->protocol = htons(ETH_P_IPV6);
+			send_skb->dev = skb->dev;
+
+			skb_reset_network_header(send_skb);
+			skb_put(send_skb, sizeof(struct ipv6hdr));
+			hdr = ipv6_hdr(send_skb);
+
+			*(__be32*)hdr = htonl(0x60000000);
+
+			hdr->payload_len = htons(size);
+			hdr->nexthdr = IPPROTO_ICMPV6;
+			hdr->hop_limit = 255;
+			hdr->saddr = *saddr;
+			hdr->daddr = *daddr;
+
+			send_skb->transport_header = send_skb->tail;
+			skb_put(send_skb, size);
+
+			icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
+			icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+			icmp6h->icmp6_router = 0;
+			icmp6h->icmp6_solicited = 1;
+			target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
+			*target = msg->target;
+			icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
+							      IPPROTO_ICMPV6,
+							      csum_partial(icmp6h,
+									   size, 0));
+
+			if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
+					    lladdr, np->dev->dev_addr,
+					    send_skb->len) < 0) {
+				kfree_skb(send_skb);
+				continue;
+			}
+
+			netpoll_send_skb(np, send_skb);
+
+			/* If there are several rx_hooks for the same address,
+			   we're fine by sending a single reply */
+			break;
+		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+#endif
 	}
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+}
+
+static bool pkt_is_ns(struct sk_buff *skb)
+{
+	struct nd_msg *msg;
+	struct ipv6hdr *hdr;
+
+	if (skb->protocol != htons(ETH_P_ARP))
+		return false;
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
+		return false;
+
+	msg = (struct nd_msg *)skb_transport_header(skb);
+	__skb_push(skb, skb->data - skb_transport_header(skb));
+	hdr = ipv6_hdr(skb);
+
+	if (hdr->nexthdr != IPPROTO_ICMPV6)
+		return false;
+	if (hdr->hop_limit != 255)
+		return false;
+	if (msg->icmph.icmp6_code != 0)
+		return false;
+	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+		return false;
+
+	return true;
 }
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
@@ -571,9 +773,11 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		goto out;
 
 	/* check if netpoll clients need ARP */
-	if (skb->protocol == htons(ETH_P_ARP) &&
-	    atomic_read(&trapped)) {
-		skb_queue_tail(&npinfo->arp_tx, skb);
+	if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
+		skb_queue_tail(&npinfo->neigh_tx, skb);
+		return 1;
+	} else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
+		skb_queue_tail(&npinfo->neigh_tx, skb);
 		return 1;
 	}
 
@@ -584,60 +788,100 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 	}
 
 	proto = ntohs(eth_hdr(skb)->h_proto);
-	if (proto != ETH_P_IP)
+	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
 		goto out;
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto out;
 	if (skb_shared(skb))
 		goto out;
 
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (iph->ihl < 5 || iph->version != 4)
-		goto out;
-	if (!pskb_may_pull(skb, iph->ihl*4))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
-		goto out;
-
-	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < iph->ihl*4)
-		goto out;
+	if (proto == ETH_P_IP) {
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (iph->ihl < 5 || iph->version != 4)
+			goto out;
+		if (!pskb_may_pull(skb, iph->ihl*4))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+			goto out;
 
-	/*
-	 * Our transport medium may have padded the buffer out.
-	 * Now We trim to the true length of the frame.
-	 */
-	if (pskb_trim_rcsum(skb, len))
-		goto out;
+		len = ntohs(iph->tot_len);
+		if (skb->len < len || len < iph->ihl*4)
+			goto out;
 
-	iph = (struct iphdr *)skb->data;
-	if (iph->protocol != IPPROTO_UDP)
-		goto out;
+		/*
+		 * Our transport medium may have padded the buffer out.
+		 * Now We trim to the true length of the frame.
+		 */
+		if (pskb_trim_rcsum(skb, len))
+			goto out;
 
-	len -= iph->ihl*4;
-	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
-	ulen = ntohs(uh->len);
+		iph = (struct iphdr *)skb->data;
+		if (iph->protocol != IPPROTO_UDP)
+			goto out;
 
-	if (ulen != len)
-		goto out;
-	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
-		goto out;
+		len -= iph->ihl*4;
+		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		ulen = ntohs(uh->len);
 
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (np->local_ip && np->local_ip != iph->daddr)
-			continue;
-		if (np->remote_ip && np->remote_ip != iph->saddr)
-			continue;
-		if (np->local_port && np->local_port != ntohs(uh->dest))
-			continue;
+		if (ulen != len)
+			goto out;
+		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+			goto out;
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
+				continue;
+			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
+				continue;
+			if (np->local_port && np->local_port != ntohs(uh->dest))
+				continue;
+
+			np->rx_hook(np, ntohs(uh->source),
+				       (char *)(uh+1),
+				       ulen - sizeof(struct udphdr));
+			hits++;
+		}
+	} else {
+#if IS_ENABLED(CONFIG_IPV6)
+		const struct ipv6hdr *ip6h;
 
-		np->rx_hook(np, ntohs(uh->source),
-			       (char *)(uh+1),
-			       ulen - sizeof(struct udphdr));
-		hits++;
+		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+			goto out;
+		ip6h = (struct ipv6hdr *)skb->data;
+		if (ip6h->version != 6)
+			goto out;
+		len = ntohs(ip6h->payload_len);
+		if (!len)
+			goto out;
+		if (len + sizeof(struct ipv6hdr) > skb->len)
+			goto out;
+		if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
+			goto out;
+		ip6h = ipv6_hdr(skb);
+		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+			goto out;
+		uh = udp_hdr(skb);
+		ulen = ntohs(uh->len);
+		if (ulen != skb->len)
+			goto out;
+		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
+			goto out;
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
+				continue;
+			if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
+				continue;
+			if (np->local_port && np->local_port != ntohs(uh->dest))
+				continue;
+
+			np->rx_hook(np, ntohs(uh->source),
+				       (char *)(uh+1),
+				       ulen - sizeof(struct udphdr));
+			hits++;
+		}
+#endif
 	}
 
 	if (!hits)
@@ -658,17 +902,44 @@ out:
 void netpoll_print_options(struct netpoll *np)
 {
 	np_info(np, "local port %d\n", np->local_port);
-	np_info(np, "local IP %pI4\n", &np->local_ip);
+	if (np->ipv6)
+		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
+	else
+		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
 	np_info(np, "interface '%s'\n", np->dev_name);
 	np_info(np, "remote port %d\n", np->remote_port);
-	np_info(np, "remote IP %pI4\n", &np->remote_ip);
+	if (np->ipv6)
+		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
+	else
+		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
 	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
 }
 EXPORT_SYMBOL(netpoll_print_options);
 
+static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
+{
+	const char *end;
+
+	if (!strchr(str, ':') &&
+	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
+		if (!*end)
+			return 0;
+	}
+	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
+#if IS_ENABLED(CONFIG_IPV6)
+		if (!*end)
+			return 1;
+#else
+		return -1;
+#endif
+	}
+	return -1;
+}
+
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
 	char *cur=opt, *delim;
+	int ipv6;
 
 	if (*cur != '@') {
 		if ((delim = strchr(cur, '@')) == NULL)
@@ -684,7 +955,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 		if ((delim = strchr(cur, '/')) == NULL)
 			goto parse_failed;
 		*delim = 0;
-		np->local_ip = in_aton(cur);
+		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
+		if (ipv6 < 0)
+			goto parse_failed;
+		else
+			np->ipv6 = (bool)ipv6;
 		cur = delim;
 	}
 	cur++;
@@ -716,7 +991,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	if ((delim = strchr(cur, '/')) == NULL)
 		goto parse_failed;
 	*delim = 0;
-	np->remote_ip = in_aton(cur);
+	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
+	if (ipv6 < 0)
+		goto parse_failed;
+	else if (np->ipv6 != (bool)ipv6)
+		goto parse_failed;
+	else
+		np->ipv6 = (bool)ipv6;
 	cur = delim + 1;
 
 	if (*cur != 0) {
@@ -744,6 +1025,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
+	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
 	    !ndev->netdev_ops->ndo_poll_controller) {
@@ -764,7 +1046,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
+		mutex_init(&npinfo->dev_lock);
+		skb_queue_head_init(&npinfo->neigh_tx);
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
@@ -777,7 +1060,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 				goto free_npinfo;
 		}
 	} else {
-		npinfo = ndev->npinfo;
+		npinfo = rtnl_dereference(ndev->npinfo);
 		atomic_inc(&npinfo->refcnt);
 	}
 
@@ -808,14 +1091,19 @@ int netpoll_setup(struct netpoll *np)
 	struct in_device *in_dev;
 	int err;
 
-	if (np->dev_name)
-		ndev = dev_get_by_name(&init_net, np->dev_name);
+	rtnl_lock();
+	if (np->dev_name) {
+		struct net *net = current->nsproxy->net_ns;
+		ndev = __dev_get_by_name(net, np->dev_name);
+	}
 	if (!ndev) {
 		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
-		return -ENODEV;
+		err = -ENODEV;
+		goto unlock;
 	}
+	dev_hold(ndev);
 
-	if (ndev->master) {
+	if (netdev_master_upper_dev_get(ndev)) {
 		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
 		err = -EBUSY;
 		goto put;
@@ -826,15 +1114,14 @@ int netpoll_setup(struct netpoll *np)
 
 		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
 
-		rtnl_lock();
 		err = dev_open(ndev);
-		rtnl_unlock();
 
 		if (err) {
 			np_err(np, "failed to open %s\n", ndev->name);
 			goto put;
 		}
 
+		rtnl_unlock();
 		atleast = jiffies + HZ/10;
 		atmost = jiffies + carrier_timeout * HZ;
 		while (!netif_carrier_ok(ndev)) {
@@ -854,39 +1141,70 @@ int netpoll_setup(struct netpoll *np)
 			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
 			msleep(4000);
 		}
+		rtnl_lock();
 	}
 
-	if (!np->local_ip) {
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(ndev);
+	if (!np->local_ip.ip) {
+		if (!np->ipv6) {
+			in_dev = __in_dev_get_rtnl(ndev);
+
+			if (!in_dev || !in_dev->ifa_list) {
+				np_err(np, "no IP address for %s, aborting\n",
+				       np->dev_name);
+				err = -EDESTADDRREQ;
+				goto put;
+			}
+
+			np->local_ip.ip = in_dev->ifa_list->ifa_local;
+			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			struct inet6_dev *idev;
 
-		if (!in_dev || !in_dev->ifa_list) {
-			rcu_read_unlock();
-			np_err(np, "no IP address for %s, aborting\n",
-			       np->dev_name);
 			err = -EDESTADDRREQ;
+			idev = __in6_dev_get(ndev);
+			if (idev) {
+				struct inet6_ifaddr *ifp;
+
+				read_lock_bh(&idev->lock);
+				list_for_each_entry(ifp, &idev->addr_list, if_list) {
+					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+						continue;
+					np->local_ip.in6 = ifp->addr;
+					err = 0;
+					break;
+				}
+				read_unlock_bh(&idev->lock);
+			}
+			if (err) {
+				np_err(np, "no IPv6 address for %s, aborting\n",
+				       np->dev_name);
+				goto put;
+			} else
+				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
+#else
+			np_err(np, "IPv6 is not supported %s, aborting\n",
+			       np->dev_name);
+			err = -EINVAL;
 			goto put;
+#endif
 		}
-
-		np->local_ip = in_dev->ifa_list->ifa_local;
-		rcu_read_unlock();
-		np_info(np, "local IP %pI4\n", &np->local_ip);
 	}
 
 	/* fill up the skb queue */
 	refill_skbs();
 
-	rtnl_lock();
 	err = __netpoll_setup(np, ndev, GFP_KERNEL);
-	rtnl_unlock();
-
 	if (err)
 		goto put;
 
+	rtnl_unlock();
 	return 0;
 
 put:
 	dev_put(ndev);
+unlock:
+	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL(netpoll_setup);
@@ -894,6 +1212,7 @@ EXPORT_SYMBOL(netpoll_setup);
 static int __init netpoll_init(void)
 {
 	skb_queue_head_init(&skb_pool);
+	init_srcu_struct(&netpoll_srcu);
 	return 0;
 }
 core_initcall(netpoll_init);
@@ -903,7 +1222,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
 	struct netpoll_info *npinfo =
 			container_of(rcu_head, struct netpoll_info, rcu);
 
-	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->neigh_tx);
 	skb_queue_purge(&npinfo->txq);
 
 	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
@@ -921,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np)
 	struct netpoll_info *npinfo;
 	unsigned long flags;
 
-	npinfo = np->dev->npinfo;
+	/* rtnl_dereference would be preferable here but
+	 * rcu_cleanup_netpoll path can put us in here safely without
+	 * holding the rtnl, so plain rcu_dereference it is
+	 */
+	npinfo = rtnl_dereference(np->dev->npinfo);
 	if (!npinfo)
 		return;
 
@@ -933,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np)
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
+	synchronize_srcu(&netpoll_srcu);
+
 	if (atomic_dec_and_test(&npinfo->refcnt)) {
 		const struct net_device_ops *ops;
 
@@ -940,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np)
 		if (ops->ndo_netpoll_cleanup)
 			ops->ndo_netpoll_cleanup(np->dev);
 
-		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
 	}
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+static void netpoll_async_cleanup(struct work_struct *work)
 {
-	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
+	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
 
+	rtnl_lock();
 	__netpoll_cleanup(np);
+	rtnl_unlock();
 	kfree(np);
 }
 
-void __netpoll_free_rcu(struct netpoll *np)
+void __netpoll_free_async(struct netpoll *np)
 {
-	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
+	schedule_work(&np->cleanup_work);
 }
-EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
+EXPORT_SYMBOL_GPL(__netpoll_free_async);
 
 void netpoll_cleanup(struct netpoll *np)
 {
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5e67defe2cb..0777d0aa18c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
 
 	/* allocate & copy */
 	new = kzalloc(new_sz, GFP_KERNEL);
-	if (!new) {
-		pr_warn("Unable to alloc new priomap!\n");
+	if (!new)
 		return -ENOMEM;
-	}
 
 	if (old)
 		memcpy(new->priomap, old->priomap,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b29dacf900f..2201e699ad6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,6 +164,7 @@
 #ifdef CONFIG_XFRM
 #include <net/xfrm.h>
 #endif
+#include <net/netns/generic.h>
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
 #include <linux/bitops.h>
@@ -212,7 +213,6 @@
 #define PKTGEN_MAGIC 0xbe9be955
 #define PG_PROC_DIR "pktgen"
 #define PGCTRL	    "pgctrl"
-static struct proc_dir_entry *pg_proc_dir;
 
 #define MAX_CFLOWS  65536
 
@@ -397,7 +397,15 @@ struct pktgen_hdr {
 	__be32 tv_usec;
 };
 
-static bool pktgen_exiting __read_mostly;
+
+static int pg_net_id __read_mostly;
+
+struct pktgen_net {
+	struct net		*net;
+	struct proc_dir_entry	*proc_dir;
+	struct list_head	pktgen_threads;
+	bool			pktgen_exiting;
+};
 
 struct pktgen_thread {
 	spinlock_t if_lock;		/* for list of devices */
@@ -414,6 +422,7 @@ struct pktgen_thread {
 
 	wait_queue_head_t queue;
 	struct completion start_done;
+	struct pktgen_net *net;
 };
 
 #define REMOVE 1
@@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
 static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 					  const char *ifname, bool exact);
 static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
-static void pktgen_run_all_threads(void);
-static void pktgen_reset_all_threads(void);
-static void pktgen_stop_all_threads_ifs(void);
+static void pktgen_run_all_threads(struct pktgen_net *pn);
+static void pktgen_reset_all_threads(struct pktgen_net *pn);
+static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
 
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
@@ -442,7 +451,6 @@ static int pg_clone_skb_d  __read_mostly;
 static int debug  __read_mostly;
 
 static DEFINE_MUTEX(pktgen_thread_lock);
-static LIST_HEAD(pktgen_threads);
 
 static struct notifier_block pktgen_notifier_block = {
 	.notifier_call = pktgen_device_event,
@@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 {
 	int err = 0;
 	char data[128];
+	struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
 
 	if (!capable(CAP_NET_ADMIN)) {
 		err = -EPERM;
@@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 	data[count - 1] = 0;	/* Make string */
 
 	if (!strcmp(data, "stop"))
-		pktgen_stop_all_threads_ifs();
+		pktgen_stop_all_threads_ifs(pn);
 
 	else if (!strcmp(data, "start"))
-		pktgen_run_all_threads();
+		pktgen_run_all_threads(pn);
 
 	else if (!strcmp(data, "reset"))
-		pktgen_reset_all_threads();
+		pktgen_reset_all_threads(pn);
 
 	else
 		pr_warning("Unknown command: %s\n", data);
@@ -1781,10 +1790,13 @@ static ssize_t pktgen_thread_write(struct file *file,
 			return -EFAULT;
 		i += len;
 		mutex_lock(&pktgen_thread_lock);
-		pktgen_add_device(t, f);
+		ret = pktgen_add_device(t, f);
 		mutex_unlock(&pktgen_thread_lock);
-		ret = count;
-		sprintf(pg_result, "OK: add_device=%s", f);
+		if (!ret) {
+			ret = count;
+			sprintf(pg_result, "OK: add_device=%s", f);
+		} else
+			sprintf(pg_result, "ERROR: can not add device %s", f);
 		goto out;
 	}
 
@@ -1824,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = {
 };
 
 /* Think find or remove for NN */
-static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
+static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
+					      const char *ifname, int remove)
 {
 	struct pktgen_thread *t;
 	struct pktgen_dev *pkt_dev = NULL;
 	bool exact = (remove == FIND);
 
-	list_for_each_entry(t, &pktgen_threads, th_list) {
+	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		pkt_dev = pktgen_find_dev(t, ifname, exact);
 		if (pkt_dev) {
 			if (remove) {
@@ -1848,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
 /*
  * mark a device for removal
  */
-static void pktgen_mark_device(const char *ifname)
+static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
 {
 	struct pktgen_dev *pkt_dev = NULL;
 	const int max_tries = 10, msec_per_try = 125;
@@ -1859,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname)
 
 	while (1) {
 
-		pkt_dev = __pktgen_NN_threads(ifname, REMOVE);
+		pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE);
 		if (pkt_dev == NULL)
 			break;	/* success */
 
@@ -1880,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname)
 	mutex_unlock(&pktgen_thread_lock);
 }
 
-static void pktgen_change_name(struct net_device *dev)
+static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev)
 {
 	struct pktgen_thread *t;
 
-	list_for_each_entry(t, &pktgen_threads, th_list) {
+	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		struct pktgen_dev *pkt_dev;
 
 		list_for_each_entry(pkt_dev, &t->if_list, list) {
 			if (pkt_dev->odev != dev)
 				continue;
 
-			remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+			remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
 
 			pkt_dev->entry = proc_create_data(dev->name, 0600,
-							  pg_proc_dir,
+							  pn->proc_dir,
 							  &pktgen_if_fops,
 							  pkt_dev);
 			if (!pkt_dev->entry)
@@ -1909,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
+	struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
 
-	if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting)
+	if (pn->pktgen_exiting)
 		return NOTIFY_DONE;
 
 	/* It is OK that we do not hold the group lock right now,
@@ -1919,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused,
 
 	switch (event) {
 	case NETDEV_CHANGENAME:
-		pktgen_change_name(dev);
+		pktgen_change_name(pn, dev);
 		break;
 
 	case NETDEV_UNREGISTER:
-		pktgen_mark_device(dev->name);
+		pktgen_mark_device(pn, dev->name);
 		break;
 	}
 
 	return NOTIFY_DONE;
 }
 
-static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
+static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn,
+						 struct pktgen_dev *pkt_dev,
 						 const char *ifname)
 {
 	char b[IFNAMSIZ+5];
@@ -1944,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
 	}
 	b[i] = 0;
 
-	return dev_get_by_name(&init_net, b);
+	return dev_get_by_name(pn->net, b);
 }
 
 
 /* Associate pktgen_dev with a device. */
 
-static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
+static int pktgen_setup_dev(const struct pktgen_net *pn,
+			    struct pktgen_dev *pkt_dev, const char *ifname)
 {
 	struct net_device *odev;
 	int err;
@@ -1961,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 		pkt_dev->odev = NULL;
 	}
 
-	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
+	odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname);
 	if (!odev) {
 		pr_err("no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
@@ -2203,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 {
 	struct xfrm_state *x = pkt_dev->flows[flow].x;
+	struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
 	if (!x) {
 		/*slow path: we dont already have xfrm_state*/
-		x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
+		x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
 					(xfrm_address_t *)&pkt_dev->cur_daddr,
 					(xfrm_address_t *)&pkt_dev->cur_saddr,
 					AF_INET,
@@ -2912,7 +2929,7 @@ static void pktgen_run(struct pktgen_thread *t)
 		t->control &= ~(T_STOP);
 }
 
-static void pktgen_stop_all_threads_ifs(void)
+static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 
@@ -2920,7 +2937,7 @@ static void pktgen_stop_all_threads_ifs(void)
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list)
+	list_for_each_entry(t, &pn->pktgen_threads, th_list)
 		t->control |= T_STOP;
 
 	mutex_unlock(&pktgen_thread_lock);
@@ -2956,28 +2973,28 @@ signal:
 	return 0;
 }
 
-static int pktgen_wait_all_threads_run(void)
+static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 	int sig = 1;
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list) {
+	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		sig = pktgen_wait_thread_run(t);
 		if (sig == 0)
 			break;
 	}
 
 	if (sig == 0)
-		list_for_each_entry(t, &pktgen_threads, th_list)
+		list_for_each_entry(t, &pn->pktgen_threads, th_list)
 			t->control |= (T_STOP);
 
 	mutex_unlock(&pktgen_thread_lock);
 	return sig;
 }
 
-static void pktgen_run_all_threads(void)
+static void pktgen_run_all_threads(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 
@@ -2985,7 +3002,7 @@ static void pktgen_run_all_threads(void)
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list)
+	list_for_each_entry(t, &pn->pktgen_threads, th_list)
 		t->control |= (T_RUN);
 
 	mutex_unlock(&pktgen_thread_lock);
@@ -2993,10 +3010,10 @@ static void pktgen_run_all_threads(void)
 	/* Propagate thread->control  */
 	schedule_timeout_interruptible(msecs_to_jiffies(125));
 
-	pktgen_wait_all_threads_run();
+	pktgen_wait_all_threads_run(pn);
 }
 
-static void pktgen_reset_all_threads(void)
+static void pktgen_reset_all_threads(struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 
@@ -3004,7 +3021,7 @@ static void pktgen_reset_all_threads(void)
 
 	mutex_lock(&pktgen_thread_lock);
 
-	list_for_each_entry(t, &pktgen_threads, th_list)
+	list_for_each_entry(t, &pn->pktgen_threads, th_list)
 		t->control |= (T_REMDEVALL);
 
 	mutex_unlock(&pktgen_thread_lock);
@@ -3012,7 +3029,7 @@ static void pktgen_reset_all_threads(void)
 	/* Propagate thread->control  */
 	schedule_timeout_interruptible(msecs_to_jiffies(125));
 
-	pktgen_wait_all_threads_run();
+	pktgen_wait_all_threads_run(pn);
 }
 
 static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
@@ -3154,9 +3171,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 static void pktgen_rem_thread(struct pktgen_thread *t)
 {
 	/* Remove from the thread list */
-
-	remove_proc_entry(t->tsk->comm, pg_proc_dir);
-
+	remove_proc_entry(t->tsk->comm, t->net->proc_dir);
 }
 
 static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3302,7 +3317,7 @@ static int pktgen_thread_worker(void *arg)
 		pkt_dev = next_to_run(t);
 
 		if (unlikely(!pkt_dev && t->control == 0)) {
-			if (pktgen_exiting)
+			if (t->net->pktgen_exiting)
 				break;
 			wait_event_interruptible_timeout(t->queue,
 							 t->control != 0,
@@ -3424,7 +3439,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 
 	/* We don't allow a device to be on several threads */
 
-	pkt_dev = __pktgen_NN_threads(ifname, FIND);
+	pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND);
 	if (pkt_dev) {
 		pr_err("ERROR: interface already used\n");
 		return -EBUSY;
@@ -3459,13 +3474,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_id = 0xffff;
 	pkt_dev->node = -1;
 
-	err = pktgen_setup_dev(pkt_dev, ifname);
+	err = pktgen_setup_dev(t->net, pkt_dev, ifname);
 	if (err)
 		goto out1;
 	if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
 		pkt_dev->clone_skb = pg_clone_skb_d;
 
-	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
+	pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
 					  &pktgen_if_fops, pkt_dev);
 	if (!pkt_dev->entry) {
 		pr_err("cannot create %s/%s procfs entry\n",
@@ -3490,7 +3505,7 @@ out1:
 	return err;
 }
 
-static int __init pktgen_create_thread(int cpu)
+static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
 {
 	struct pktgen_thread *t;
 	struct proc_dir_entry *pe;
@@ -3508,7 +3523,7 @@ static int __init pktgen_create_thread(int cpu)
 
 	INIT_LIST_HEAD(&t->if_list);
 
-	list_add_tail(&t->th_list, &pktgen_threads);
+	list_add_tail(&t->th_list, &pn->pktgen_threads);
 	init_completion(&t->start_done);
 
 	p = kthread_create_on_node(pktgen_thread_worker,
@@ -3524,7 +3539,7 @@ static int __init pktgen_create_thread(int cpu)
 	kthread_bind(p, cpu);
 	t->tsk = p;
 
-	pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
+	pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
 			      &pktgen_thread_fops, t);
 	if (!pe) {
 		pr_err("cannot create %s/%s procfs entry\n",
@@ -3535,6 +3550,7 @@ static int __init pktgen_create_thread(int cpu)
 		return -EINVAL;
 	}
 
+	t->net = pn;
 	wake_up_process(p);
 	wait_for_completion(&t->start_done);
 
@@ -3560,6 +3576,7 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
 static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
+	struct pktgen_net *pn = t->net;
 
 	pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
 
@@ -3580,7 +3597,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	_rem_dev_from_if_list(t, pkt_dev);
 
 	if (pkt_dev->entry)
-		remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+		remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
 
 #ifdef CONFIG_XFRM
 	free_SAs(pkt_dev);
@@ -3592,63 +3609,63 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	return 0;
 }
 
-static int __init pg_init(void)
+static int __net_init pg_net_init(struct net *net)
 {
-	int cpu;
+	struct pktgen_net *pn = net_generic(net, pg_net_id);
 	struct proc_dir_entry *pe;
-	int ret = 0;
-
-	pr_info("%s", version);
-
-	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
-	if (!pg_proc_dir)
+	int cpu, ret = 0;
+
+	pn->net = net;
+	INIT_LIST_HEAD(&pn->pktgen_threads);
+	pn->pktgen_exiting = false;
+	pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net);
+	if (!pn->proc_dir) {
+		pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
 		return -ENODEV;
-
-	pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
+	}
+	pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
 	if (pe == NULL) {
-		pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
+		pr_err("cannot create %s procfs entry\n", PGCTRL);
 		ret = -EINVAL;
-		goto remove_dir;
+		goto remove;
 	}
 
-	register_netdevice_notifier(&pktgen_notifier_block);
-
 	for_each_online_cpu(cpu) {
 		int err;
 
-		err = pktgen_create_thread(cpu);
+		err = pktgen_create_thread(cpu, pn);
 		if (err)
-			pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+			pr_warn("Cannot create thread for cpu %d (%d)\n",
 				   cpu, err);
 	}
 
-	if (list_empty(&pktgen_threads)) {
-		pr_err("ERROR: Initialization failed for all threads\n");
+	if (list_empty(&pn->pktgen_threads)) {
+		pr_err("Initialization failed for all threads\n");
 		ret = -ENODEV;
-		goto unregister;
+		goto remove_entry;
 	}
 
 	return 0;
 
- unregister:
-	unregister_netdevice_notifier(&pktgen_notifier_block);
-	remove_proc_entry(PGCTRL, pg_proc_dir);
- remove_dir:
-	proc_net_remove(&init_net, PG_PROC_DIR);
+remove_entry:
+	remove_proc_entry(PGCTRL, pn->proc_dir);
+remove:
+	proc_net_remove(pn->net, PG_PROC_DIR);
 	return ret;
 }
 
-static void __exit pg_cleanup(void)
+static void __net_exit pg_net_exit(struct net *net)
 {
+	struct pktgen_net *pn = net_generic(net, pg_net_id);
 	struct pktgen_thread *t;
 	struct list_head *q, *n;
 	LIST_HEAD(list);
 
 	/* Stop all interfaces & threads */
-	pktgen_exiting = true;
+	pn->pktgen_exiting = true;
 
 	mutex_lock(&pktgen_thread_lock);
-	list_splice_init(&pktgen_threads, &list);
+	list_splice_init(&pn->pktgen_threads, &list);
 	mutex_unlock(&pktgen_thread_lock);
 
 	list_for_each_safe(q, n, &list) {
@@ -3658,12 +3675,36 @@ static void __exit pg_cleanup(void)
 		kfree(t);
 	}
 
-	/* Un-register us from receiving netdevice events */
-	unregister_netdevice_notifier(&pktgen_notifier_block);
+	remove_proc_entry(PGCTRL, pn->proc_dir);
+	proc_net_remove(pn->net, PG_PROC_DIR);
+}
+
+static struct pernet_operations pg_net_ops = {
+	.init = pg_net_init,
+	.exit = pg_net_exit,
+	.id   = &pg_net_id,
+	.size = sizeof(struct pktgen_net),
+};
+
+static int __init pg_init(void)
+{
+	int ret = 0;
 
-	/* Clean up proc file system */
-	remove_proc_entry(PGCTRL, pg_proc_dir);
-	proc_net_remove(&init_net, PG_PROC_DIR);
+	pr_info("%s", version);
+	ret = register_pernet_subsys(&pg_net_ops);
+	if (ret)
+		return ret;
+	ret = register_netdevice_notifier(&pktgen_notifier_block);
+	if (ret)
+		unregister_pernet_subsys(&pg_net_ops);
+
+	return ret;
+}
+
+static void __exit pg_cleanup(void)
+{
+	unregister_netdevice_notifier(&pktgen_notifier_block);
+	unregister_pernet_subsys(&pg_net_ops);
 }
 
 module_init(pg_init);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index c31d9e8668c..4425148d2b5 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -186,8 +186,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 	struct fastopen_queue *fastopenq =
 	    inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
-	BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
-
 	tcp_sk(sk)->fastopen_rsk = NULL;
 	spin_lock_bh(&fastopenq->lock);
 	fastopenq->qlen--;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1868625af25..d8aa20f6a46 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -780,6 +780,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_MTU */
 	       + nla_total_size(4) /* IFLA_LINK */
 	       + nla_total_size(4) /* IFLA_MASTER */
+	       + nla_total_size(1) /* IFLA_CARRIER */
 	       + nla_total_size(4) /* IFLA_PROMISCUITY */
 	       + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
 	       + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
@@ -879,6 +880,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr, *af_spec;
 	struct rtnl_af_ops *af_ops;
+	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 
 	ASSERT_RTNL();
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -907,8 +909,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 #endif
 	    (dev->ifindex != dev->iflink &&
 	     nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
-	    (dev->master &&
-	     nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+	    (upper_dev &&
+	     nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+	    nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
 	    (dev->qdisc &&
 	     nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
 	    (dev->ifalias &&
@@ -1108,6 +1111,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_MTU]		= { .type = NLA_U32 },
 	[IFLA_LINK]		= { .type = NLA_U32 },
 	[IFLA_MASTER]		= { .type = NLA_U32 },
+	[IFLA_CARRIER]		= { .type = NLA_U8 },
 	[IFLA_TXQLEN]		= { .type = NLA_U32 },
 	[IFLA_WEIGHT]		= { .type = NLA_U32 },
 	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
@@ -1270,16 +1274,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 
 static int do_set_master(struct net_device *dev, int ifindex)
 {
-	struct net_device *master_dev;
+	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 	const struct net_device_ops *ops;
 	int err;
 
-	if (dev->master) {
-		if (dev->master->ifindex == ifindex)
+	if (upper_dev) {
+		if (upper_dev->ifindex == ifindex)
 			return 0;
-		ops = dev->master->netdev_ops;
+		ops = upper_dev->netdev_ops;
 		if (ops->ndo_del_slave) {
-			err = ops->ndo_del_slave(dev->master, dev);
+			err = ops->ndo_del_slave(upper_dev, dev);
 			if (err)
 				return err;
 		} else {
@@ -1288,12 +1292,12 @@ static int do_set_master(struct net_device *dev, int ifindex)
 	}
 
 	if (ifindex) {
-		master_dev = __dev_get_by_index(dev_net(dev), ifindex);
-		if (!master_dev)
+		upper_dev = __dev_get_by_index(dev_net(dev), ifindex);
+		if (!upper_dev)
 			return -EINVAL;
-		ops = master_dev->netdev_ops;
+		ops = upper_dev->netdev_ops;
 		if (ops->ndo_add_slave) {
-			err = ops->ndo_add_slave(master_dev, dev);
+			err = ops->ndo_add_slave(upper_dev, dev);
 			if (err)
 				return err;
 		} else {
@@ -1307,7 +1311,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	int send_addr_notify = 0;
 	int err;
 
 	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
@@ -1360,16 +1363,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		struct sockaddr *sa;
 		int len;
 
-		if (!ops->ndo_set_mac_address) {
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
-
-		if (!netif_device_present(dev)) {
-			err = -ENODEV;
-			goto errout;
-		}
-
 		len = sizeof(sa_family_t) + dev->addr_len;
 		sa = kmalloc(len, GFP_KERNEL);
 		if (!sa) {
@@ -1379,13 +1372,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		sa->sa_family = dev->type;
 		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
 		       dev->addr_len);
-		err = ops->ndo_set_mac_address(dev, sa);
+		err = dev_set_mac_address(dev, sa);
 		kfree(sa);
 		if (err)
 			goto errout;
-		send_addr_notify = 1;
 		modified = 1;
-		add_device_randomness(dev->dev_addr, dev->addr_len);
 	}
 
 	if (tb[IFLA_MTU]) {
@@ -1422,7 +1413,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 
 	if (tb[IFLA_BROADCAST]) {
 		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
-		send_addr_notify = 1;
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	}
 
 	if (ifm->ifi_flags || ifm->ifi_change) {
@@ -1438,6 +1429,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		modified = 1;
 	}
 
+	if (tb[IFLA_CARRIER]) {
+		err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
+		if (err)
+			goto errout;
+		modified = 1;
+	}
+
 	if (tb[IFLA_TXQLEN])
 		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
@@ -1536,9 +1534,6 @@ errout:
 		net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
 				     dev->name);
 
-	if (send_addr_notify)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-
 	return err;
 }
 
@@ -1672,9 +1667,11 @@ struct net_device *rtnl_create_link(struct net *net,
 
 	if (tb[IFLA_MTU])
 		dev->mtu = nla_get_u32(tb[IFLA_MTU]);
-	if (tb[IFLA_ADDRESS])
+	if (tb[IFLA_ADDRESS]) {
 		memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
 				nla_len(tb[IFLA_ADDRESS]));
+		dev->addr_assign_type = NET_ADDR_SET;
+	}
 	if (tb[IFLA_BROADCAST])
 		memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
 				nla_len(tb[IFLA_BROADCAST]));
@@ -1992,6 +1989,7 @@ errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
+EXPORT_SYMBOL(rtmsg_ifinfo);
 
 static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
 				   struct net_device *dev,
@@ -2054,16 +2052,12 @@ errout:
 static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
-	struct net_device *master = NULL;
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	u8 *addr;
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
 	if (err < 0)
 		return err;
@@ -2096,10 +2090,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	/* Support fdb on master device the net/bridge default case */
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
-		master = dev->master;
-		err = master->netdev_ops->ndo_fdb_add(ndm, tb,
-						      dev, addr,
-						      nlh->nlmsg_flags);
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+		const struct net_device_ops *ops = br_dev->netdev_ops;
+
+		err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags);
 		if (err)
 			goto out;
 		else
@@ -2125,7 +2119,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
-	struct nlattr *llattr;
+	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	int err = -EINVAL;
 	__u8 *addr;
@@ -2133,8 +2127,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (nlmsg_len(nlh) < sizeof(*ndm))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		return err;
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex == 0) {
@@ -2148,22 +2143,27 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		return -ENODEV;
 	}
 
-	llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
-	if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
-		pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n");
+	if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+		return -EINVAL;
+	}
+
+	addr = nla_data(tb[NDA_LLADDR]);
+	if (!is_valid_ether_addr(addr)) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
 		return -EINVAL;
 	}
 
-	addr = nla_data(llattr);
 	err = -EOPNOTSUPP;
 
 	/* Support fdb on master device the net/bridge default case */
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
-		struct net_device *master = dev->master;
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+		const struct net_device_ops *ops = br_dev->netdev_ops;
 
-		if (master->netdev_ops->ndo_fdb_del)
-			err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr);
+		if (ops->ndo_fdb_del)
+			err = ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (err)
 			goto out;
@@ -2173,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	/* Embedded bridge, macvlan, and any other device support */
 	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
-		err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr);
+		err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (!err) {
 			rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2247,9 +2247,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		if (dev->priv_flags & IFF_BRIDGE_PORT) {
-			struct net_device *master = dev->master;
-			const struct net_device_ops *ops = master->netdev_ops;
+			struct net_device *br_dev;
+			const struct net_device_ops *ops;
 
+			br_dev = netdev_master_upper_dev_get(dev);
+			ops = br_dev->netdev_ops;
 			if (ops->ndo_fdb_dump)
 				idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
 		}
@@ -2270,6 +2272,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	struct ifinfomsg *ifm;
 	struct nlattr *br_afspec;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
 	if (nlh == NULL)
@@ -2287,8 +2290,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
 	    nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
 	    nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
-	    (dev->master &&
-	     nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+	    (br_dev &&
+	     nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
 	    (dev->addr_len &&
 	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
 	    (dev->ifindex != dev->iflink &&
@@ -2320,23 +2323,31 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0;
 	u32 portid = NETLINK_CB(cb->skb).portid;
 	u32 seq = cb->nlh->nlmsg_seq;
+	struct nlattr *extfilt;
+	u32 filter_mask = 0;
+
+	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+				  IFLA_EXT_MASK);
+	if (extfilt)
+		filter_mask = nla_get_u32(extfilt);
 
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		const struct net_device_ops *ops = dev->netdev_ops;
-		struct net_device *master = dev->master;
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
-		if (master && master->netdev_ops->ndo_bridge_getlink) {
+		if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
-			    master->netdev_ops->ndo_bridge_getlink(
-				    skb, portid, seq, dev) < 0)
+			    br_dev->netdev_ops->ndo_bridge_getlink(
+				    skb, portid, seq, dev, filter_mask) < 0)
 				break;
 			idx++;
 		}
 
 		if (ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
-			    ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
+			    ops->ndo_bridge_getlink(skb, portid, seq, dev,
+						    filter_mask) < 0)
 				break;
 			idx++;
 		}
@@ -2365,7 +2376,7 @@ static inline size_t bridge_nlmsg_size(void)
 static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 {
 	struct net *net = dev_net(dev);
-	struct net_device *master = dev->master;
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 	struct sk_buff *skb;
 	int err = -EOPNOTSUPP;
 
@@ -2376,15 +2387,15 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 	}
 
 	if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
-	    master && master->netdev_ops->ndo_bridge_getlink) {
-		err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+	    br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
+		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
 
 	if ((flags & BRIDGE_FLAGS_SELF) &&
 	    dev->netdev_ops->ndo_bridge_getlink) {
-		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
@@ -2436,13 +2447,14 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	oflags = flags;
 
 	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
-		if (!dev->master ||
-		    !dev->master->netdev_ops->ndo_bridge_setlink) {
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+
+		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
 			err = -EOPNOTSUPP;
 			goto out;
 		}
 
-		err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
+		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
 		if (err)
 			goto out;
 
@@ -2468,6 +2480,77 @@ out:
 	return err;
 }
 
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	struct nlattr *br_spec, *attr = NULL;
+	int rem, err = -EOPNOTSUPP;
+	u16 oflags, flags = 0;
+	bool have_flags = false;
+
+	if (nlmsg_len(nlh) < sizeof(*ifm))
+		return -EINVAL;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_family != AF_BRIDGE)
+		return -EPFNOSUPPORT;
+
+	dev = __dev_get_by_index(net, ifm->ifi_index);
+	if (!dev) {
+		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		return -ENODEV;
+	}
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (br_spec) {
+		nla_for_each_nested(attr, br_spec, rem) {
+			if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+				have_flags = true;
+				flags = nla_get_u16(attr);
+				break;
+			}
+		}
+	}
+
+	oflags = flags;
+
+	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+
+		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+		if (err)
+			goto out;
+
+		flags &= ~BRIDGE_FLAGS_MASTER;
+	}
+
+	if ((flags & BRIDGE_FLAGS_SELF)) {
+		if (!dev->netdev_ops->ndo_bridge_dellink)
+			err = -EOPNOTSUPP;
+		else
+			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+
+		if (!err)
+			flags &= ~BRIDGE_FLAGS_SELF;
+	}
+
+	if (have_flags)
+		memcpy(nla_data(attr), &flags, sizeof(flags));
+	/* Generate event to notify upper layer of bridge change */
+	if (!err)
+		err = rtnl_bridge_notify(dev, oflags);
+out:
+	return err;
+}
+
 /* Protected by RTNL sempahore.  */
 static struct rtattr **rta_buf;
 static int rtattr_max;
@@ -2651,6 +2734,7 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
 
 	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
 }
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 57fb1ee6649..905dcc6ad1e 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -35,6 +35,7 @@
 #include <net/sock.h>
 #include <net/compat.h>
 #include <net/scm.h>
+#include <net/cls_cgroup.h>
 
 
 /*
@@ -302,8 +303,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		}
 		/* Bump the usage count and install the file. */
 		sock = sock_from_file(fp[i], &err);
-		if (sock)
+		if (sock) {
 			sock_update_netprioidx(sock->sk, current);
+			sock_update_classid(sock->sk, current);
+		}
 		fd_install(new_fd, get_file(fp[i]));
 	}
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ab989b0de4..2a3ca33c30a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
 	.get = sock_pipe_buf_get,
 };
 
-/*
- *	Keep out-of-line to prevent kernel bloat.
- *	__builtin_return_address is not used because it is not always
- *	reliable.
- */
-
 /**
- *	skb_over_panic	- 	private function
- *	@skb: buffer
- *	@sz: size
- *	@here: address
- *
- *	Out of line support code for skb_put(). Not user callable.
+ *	skb_panic - private function for out-of-line support
+ *	@skb:	buffer
+ *	@sz:	size
+ *	@addr:	address
+ *	@msg:	skb_over_panic or skb_under_panic
+ *
+ *	Out-of-line support for skb_put() and skb_push().
+ *	Called via the wrapper skb_over_panic() or skb_under_panic().
+ *	Keep out of line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always reliable.
  */
-static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
+		      const char msg[])
 {
 	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
-		 __func__, here, skb->len, sz, skb->head, skb->data,
+		 msg, addr, skb->len, sz, skb->head, skb->data,
 		 (unsigned long)skb->tail, (unsigned long)skb->end,
 		 skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
 
-/**
- *	skb_under_panic	- 	private function
- *	@skb: buffer
- *	@sz: size
- *	@here: address
- *
- *	Out of line support code for skb_push(). Not user callable.
- */
-
-static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
 {
-	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
-		 __func__, here, skb->len, sz, skb->head, skb->data,
-		 (unsigned long)skb->tail, (unsigned long)skb->end,
-		 skb->dev ? skb->dev->name : "<NULL>");
-	BUG();
+	skb_panic(skb, sz, addr, __func__);
 }
 
+static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
+{
+	skb_panic(skb, sz, addr, __func__);
+}
 
 /*
  * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
@@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  */
 #define kmalloc_reserve(size, gfp, node, pfmemalloc) \
 	 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
-void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
-			 bool *pfmemalloc)
+
+static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
+			       unsigned long ip, bool *pfmemalloc)
 {
 	void *obj;
 	bool ret_pfmemalloc = false;
@@ -259,6 +250,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
@@ -327,6 +319,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
@@ -348,10 +341,6 @@ struct netdev_alloc_cache {
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
 
-#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
-#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
-#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
-
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct netdev_alloc_cache *nc;
@@ -683,7 +672,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
 	new->inner_transport_header = old->inner_transport_header;
-	new->inner_network_header = old->inner_transport_header;
+	new->inner_network_header = old->inner_network_header;
 	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 	new->ooo_okay		= old->ooo_okay;
@@ -1649,7 +1638,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 
 static struct page *linear_to_page(struct page *page, unsigned int *len,
 				   unsigned int *offset,
-				   struct sk_buff *skb, struct sock *sk)
+				   struct sock *sk)
 {
 	struct page_frag *pfrag = sk_page_frag(sk);
 
@@ -1682,14 +1671,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
 static bool spd_fill_page(struct splice_pipe_desc *spd,
 			  struct pipe_inode_info *pipe, struct page *page,
 			  unsigned int *len, unsigned int offset,
-			  struct sk_buff *skb, bool linear,
+			  bool linear,
 			  struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
 		return true;
 
 	if (linear) {
-		page = linear_to_page(page, len, &offset, skb, sk);
+		page = linear_to_page(page, len, &offset, sk);
 		if (!page)
 			return true;
 	}
@@ -1706,23 +1695,9 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
 	return false;
 }
 
-static inline void __segment_seek(struct page **page, unsigned int *poff,
-				  unsigned int *plen, unsigned int off)
-{
-	unsigned long n;
-
-	*poff += off;
-	n = *poff / PAGE_SIZE;
-	if (n)
-		*page = nth_page(*page, n);
-
-	*poff = *poff % PAGE_SIZE;
-	*plen -= off;
-}
-
 static bool __splice_segment(struct page *page, unsigned int poff,
 			     unsigned int plen, unsigned int *off,
-			     unsigned int *len, struct sk_buff *skb,
+			     unsigned int *len,
 			     struct splice_pipe_desc *spd, bool linear,
 			     struct sock *sk,
 			     struct pipe_inode_info *pipe)
@@ -1737,23 +1712,19 @@ static bool __splice_segment(struct page *page, unsigned int poff,
 	}
 
 	/* ignore any bits we already processed */
-	if (*off) {
-		__segment_seek(&page, &poff, &plen, *off);
-		*off = 0;
-	}
+	poff += *off;
+	plen -= *off;
+	*off = 0;
 
 	do {
 		unsigned int flen = min(*len, plen);
 
-		/* the linear region may spread across several pages  */
-		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
-
-		if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
+		if (spd_fill_page(spd, pipe, page, &flen, poff,
+				  linear, sk))
 			return true;
-
-		__segment_seek(&page, &poff, &plen, flen);
+		poff += flen;
+		plen -= flen;
 		*len -= flen;
-
 	} while (*len && plen);
 
 	return false;
@@ -1777,7 +1748,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd,
+			     offset, len, spd,
 			     skb_head_is_locked(skb),
 			     sk, pipe))
 		return true;
@@ -1790,7 +1761,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 
 		if (__splice_segment(skb_frag_page(f),
 				     f->page_offset, skb_frag_size(f),
-				     offset, len, skb, spd, false, sk, pipe))
+				     offset, len, spd, false, sk, pipe))
 			return true;
 	}
 
@@ -2355,6 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
 
+	skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG;
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2686,48 +2658,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 					int len, int odd, struct sk_buff *skb),
 			void *from, int length)
 {
-	int frg_cnt = 0;
-	skb_frag_t *frag = NULL;
-	struct page *page = NULL;
-	int copy, left;
+	int frg_cnt = skb_shinfo(skb)->nr_frags;
+	int copy;
 	int offset = 0;
 	int ret;
+	struct page_frag *pfrag = &current->task_frag;
 
 	do {
 		/* Return error if we don't have space for new frag */
-		frg_cnt = skb_shinfo(skb)->nr_frags;
 		if (frg_cnt >= MAX_SKB_FRAGS)
-			return -EFAULT;
+			return -EMSGSIZE;
 
-		/* allocate a new page for next frag */
-		page = alloc_pages(sk->sk_allocation, 0);
-
-		/* If alloc_page fails just return failure and caller will
-		 * free previous allocated pages by doing kfree_skb()
-		 */
-		if (page == NULL)
+		if (!sk_page_frag_refill(sk, pfrag))
 			return -ENOMEM;
 
-		/* initialize the next frag */
-		skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
-		skb->truesize += PAGE_SIZE;
-		atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
-
-		/* get the new initialized frag */
-		frg_cnt = skb_shinfo(skb)->nr_frags;
-		frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
-
 		/* copy the user data to page */
-		left = PAGE_SIZE - frag->page_offset;
-		copy = (length > left)? left : length;
+		copy = min_t(int, length, pfrag->size - pfrag->offset);
 
-		ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
-			    offset, copy, 0, skb);
+		ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
+			      offset, copy, 0, skb);
 		if (ret < 0)
 			return -EFAULT;
 
 		/* copy was successful so update the size parameters */
-		skb_frag_size_add(frag, copy);
+		skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
+				   copy);
+		frg_cnt++;
+		pfrag->offset += copy;
+		get_page(pfrag->page);
+
+		skb->truesize += copy;
+		atomic_add(copy, &sk->sk_wmem_alloc);
 		skb->len += copy;
 		skb->data_len += copy;
 		offset += copy;
@@ -2777,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
+	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int headroom;
 	unsigned int len;
 	int sg = !!(features & NETIF_F_SG);
@@ -2853,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+						 nskb->data - tnl_hlen,
+						 doffset + tnl_hlen);
 
 		if (fskb != skb_shinfo(skb)->frag_list)
 			continue;
@@ -2871,6 +2836,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
+		skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+
 		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
 			__skb_frag_ref(frag);
diff --git a/net/core/sock.c b/net/core/sock.c
index a692ef49c9b..f1e14e20d18 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -583,7 +583,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
 		goto out;
 
 retry:
-	seq = read_seqbegin(&devnet_rename_seq);
+	seq = read_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
 	ret = -ENODEV;
@@ -594,7 +594,7 @@ retry:
 
 	strcpy(devname, dev->name);
 	rcu_read_unlock();
-	if (read_seqretry(&devnet_rename_seq, seq))
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
 		goto retry;
 
 	len = strlen(devname) + 1;
@@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	case SO_REUSEADDR:
 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
 		break;
+	case SO_REUSEPORT:
+		sk->sk_reuseport = valbool;
+		break;
 	case SO_TYPE:
 	case SO_PROTOCOL:
 	case SO_DOMAIN:
@@ -861,6 +864,13 @@ set_rcvbuf:
 		ret = sk_detach_filter(sk);
 		break;
 
+	case SO_LOCK_FILTER:
+		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
+			ret = -EPERM;
+		else
+			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
+		break;
+
 	case SO_PASSSEC:
 		if (valbool)
 			set_bit(SOCK_PASSSEC, &sock->flags);
@@ -965,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_reuse;
 		break;
 
+	case SO_REUSEPORT:
+		v.val = sk->sk_reuseport;
+		break;
+
 	case SO_KEEPALIVE:
 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
 		break;
@@ -1140,6 +1154,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
 		goto lenout;
 
+	case SO_LOCK_FILTER:
+		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2212,7 +2230,7 @@ EXPORT_SYMBOL(sk_reset_timer);
 
 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
 {
-	if (timer_pending(timer) && del_timer(timer))
+	if (del_timer(timer))
 		__sock_put(sk);
 }
 EXPORT_SYMBOL(sk_stop_timer);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d1b08045a9d..cfdb46ab3a7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,8 @@
 #include <net/sock.h>
 #include <net/net_ratelimit.h>
 
+static int one = 1;
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = {
 		.data		= &sysctl_wmem_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "rmem_max",
 		.data		= &sysctl_rmem_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "wmem_default",
 		.data		= &sysctl_wmem_default,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "rmem_default",
 		.data		= &sysctl_rmem_default,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "dev_weight",
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 307c322d53b..64d9843f9e0 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -909,6 +909,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
 	struct dn_scp *scp = DN_SK(sk);
 	int err = -EISCONN;
 	struct flowidn fld;
+	struct dst_entry *dst;
 
 	if (sock->state == SS_CONNECTED)
 		goto out;
@@ -955,10 +956,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
 	fld.flowidn_proto = DNPROTO_NSP;
 	if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
 		goto out;
-	sk->sk_route_caps = sk->sk_dst_cache->dev->features;
+	dst = __sk_dst_get(sk);
+	sk->sk_route_caps = dst->dev->features;
 	sock->state = SS_CONNECTING;
 	scp->state = DN_CI;
-	scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache);
+	scp->segsize_loc = dst_metric_advmss(dst);
 
 	dn_nsp_send_conninit(sk, NSP_CI);
 	err = -EINPROGRESS;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 3aede1b459f..856636addd7 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -95,7 +95,7 @@ static u32 dn_neigh_hash(const void *pkey,
 
 struct neigh_table dn_neigh_table = {
 	.family =			PF_DECnet,
-	.entry_size =			sizeof(struct dn_neigh),
+	.entry_size =			NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
 	.key_len =			sizeof(__le16),
 	.hash =				dn_neigh_hash,
 	.constructor =			dn_neigh_construct,
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 8a96047c7c9..1aaa51ebbda 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -598,7 +598,7 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
 	if (reason == 0)
 		reason = le16_to_cpu(scp->discdata_out.opt_status);
 
-	dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl,
+	dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl,
 		scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b57419cc41a..1550028fcd8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1282,7 +1282,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int
 	return err;
 }
 
-int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags)
+int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
 {
 	int err;
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 45295ca0957..2bc62ea857c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -80,6 +80,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	int ret;
 	char *name;
 	int i;
+	bool valid_name_found = false;
 
 	/*
 	 * Probe for switch model.
@@ -131,8 +132,13 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 		} else {
 			ds->phys_port_mask |= 1 << i;
 		}
+		valid_name_found = true;
 	}
 
+	if (!valid_name_found && i == DSA_MAX_PORTS) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * If the CPU connects to this switch, set the switch tree
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e32083d5d8f..6ebd8fbd928 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -41,8 +41,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 	ds->slave_mii_bus->name = "dsa slave smi";
 	ds->slave_mii_bus->read = dsa_slave_phy_read;
 	ds->slave_mii_bus->write = dsa_slave_phy_write;
-	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x",
-			ds->master_mii_bus->id, ds->pd->sw_addr);
+	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
+			ds->index, ds->pd->sw_addr);
 	ds->slave_mii_bus->parent = &ds->master_mii_bus->dev;
 }
 
@@ -203,10 +203,10 @@ dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static void dsa_slave_get_drvinfo(struct net_device *dev,
 				  struct ethtool_drvinfo *drvinfo)
 {
-	strncpy(drvinfo->driver, "dsa", 32);
-	strncpy(drvinfo->version, dsa_driver_version, 32);
-	strncpy(drvinfo->fw_version, "N/A", 32);
-	strncpy(drvinfo->bus_info, "platform", 32);
+	strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, dsa_driver_version, sizeof(drvinfo->version));
+	strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
+	strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
 static int dsa_slave_nway_reset(struct net_device *dev)
@@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	if (p->phy != NULL) {
 		phy_attach(slave_dev, dev_name(&p->phy->dev),
-			   0, PHY_INTERFACE_MODE_GMII);
+			   PHY_INTERFACE_MODE_GMII);
 
 		p->phy->autoneg = AUTONEG_ENABLE;
 		p->phy->speed = 0;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4efad533e5f..a36c85eab5b 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -272,6 +272,36 @@ void eth_header_cache_update(struct hh_cache *hh,
 EXPORT_SYMBOL(eth_header_cache_update);
 
 /**
+ * eth_prepare_mac_addr_change - prepare for mac change
+ * @dev: network device
+ * @p: socket address
+ */
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
+		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	return 0;
+}
+EXPORT_SYMBOL(eth_prepare_mac_addr_change);
+
+/**
+ * eth_commit_mac_addr_change - commit mac change
+ * @dev: network device
+ * @p: socket address
+ */
+void eth_commit_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+}
+EXPORT_SYMBOL(eth_commit_mac_addr_change);
+
+/**
  * eth_mac_addr - set new Ethernet hardware address
  * @dev: network device
  * @p: socket address
@@ -283,15 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update);
  */
 int eth_mac_addr(struct net_device *dev, void *p)
 {
-	struct sockaddr *addr = p;
+	int ret;
 
-	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
-		return -EBUSY;
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
-	/* if device marked as NET_ADDR_RANDOM, reset it */
-	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
+	ret = eth_prepare_mac_addr_change(dev, p);
+	if (ret < 0)
+		return ret;
+	eth_commit_mac_addr_change(dev, p);
 	return 0;
 }
 EXPORT_SYMBOL(eth_mac_addr);
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index f651da60f16..43b95ca6111 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -377,17 +377,14 @@ static int lowpan_header_create(struct sk_buff *skb,
 	struct ipv6hdr *hdr;
 	const u8 *saddr = _saddr;
 	const u8 *daddr = _daddr;
-	u8 *head;
+	u8 head[100];
 	struct ieee802154_addr sa, da;
 
+	/* TODO:
+	 * if this package isn't ipv6 one, where should it be routed?
+	 */
 	if (type != ETH_P_IPV6)
 		return 0;
-		/* TODO:
-		 * if this package isn't ipv6 one, where should it be routed?
-		 */
-	head = kzalloc(100, GFP_KERNEL);
-	if (head == NULL)
-		return -ENOMEM;
 
 	hdr = ipv6_hdr(skb);
 	hc06_ptr = head + 2;
@@ -561,8 +558,6 @@ static int lowpan_header_create(struct sk_buff *skb,
 	skb_pull(skb, sizeof(struct ipv6hdr));
 	memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head);
 
-	kfree(head);
-
 	lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
 				skb->len);
 
@@ -594,10 +589,32 @@ static int lowpan_header_create(struct sk_buff *skb,
 	}
 }
 
+static int lowpan_give_skb_to_devices(struct sk_buff *skb)
+{
+	struct lowpan_dev_record *entry;
+	struct sk_buff *skb_cp;
+	int stat = NET_RX_SUCCESS;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, &lowpan_devices, list)
+		if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
+			skb_cp = skb_copy(skb, GFP_ATOMIC);
+			if (!skb_cp) {
+				stat = -ENOMEM;
+				break;
+			}
+
+			skb_cp->dev = entry->ldev;
+			stat = netif_rx(skb_cp);
+		}
+	rcu_read_unlock();
+
+	return stat;
+}
+
 static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
 {
 	struct sk_buff *new;
-	struct lowpan_dev_record *entry;
 	int stat = NET_RX_SUCCESS;
 
 	new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb),
@@ -614,19 +631,7 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
 	new->protocol = htons(ETH_P_IPV6);
 	new->pkt_type = PACKET_HOST;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(entry, &lowpan_devices, list)
-		if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) {
-			skb = skb_copy(new, GFP_ATOMIC);
-			if (!skb) {
-				stat = -ENOMEM;
-				break;
-			}
-
-			skb->dev = entry->ldev;
-			stat = netif_rx(skb);
-		}
-	rcu_read_unlock();
+	stat = lowpan_give_skb_to_devices(new);
 
 	kfree_skb(new);
 
@@ -1137,19 +1142,42 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	/* check that it's our buffer */
-	switch (skb->data[0] & 0xe0) {
-	case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
-	case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
-	case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
-		local_skb = skb_clone(skb, GFP_ATOMIC);
+	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
+		/* Copy the packet so that the IPv6 header is
+		 * properly aligned.
+		 */
+		local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1,
+					    skb_tailroom(skb), GFP_ATOMIC);
 		if (!local_skb)
 			goto drop;
-		lowpan_process_data(local_skb);
 
+		local_skb->protocol = htons(ETH_P_IPV6);
+		local_skb->pkt_type = PACKET_HOST;
+
+		/* Pull off the 1-byte of 6lowpan header. */
+		skb_pull(local_skb, 1);
+		skb_reset_network_header(local_skb);
+		skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
+
+		lowpan_give_skb_to_devices(local_skb);
+
+		kfree_skb(local_skb);
 		kfree_skb(skb);
-		break;
-	default:
-		break;
+	} else {
+		switch (skb->data[0] & 0xe0) {
+		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
+		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
+		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
+			local_skb = skb_clone(skb, GFP_ATOMIC);
+			if (!local_skb)
+				goto drop;
+			lowpan_process_data(local_skb);
+
+			kfree_skb(skb);
+			break;
+		default:
+			break;
+		}
 	}
 
 	return NET_RX_SUCCESS;
@@ -1234,7 +1262,7 @@ static inline int __init lowpan_netlink_init(void)
 	return rtnl_link_register(&lowpan_link_ops);
 }
 
-static inline void __init lowpan_netlink_fini(void)
+static inline void lowpan_netlink_fini(void)
 {
 	rtnl_link_unregister(&lowpan_link_ops);
 }
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24b384b7903..e225a4e5b57 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,21 +263,6 @@ void build_ehash_secret(void)
 }
 EXPORT_SYMBOL(build_ehash_secret);
 
-static inline int inet_netns_ok(struct net *net, __u8 protocol)
-{
-	const struct net_protocol *ipprot;
-
-	if (net_eq(net, &init_net))
-		return 1;
-
-	ipprot = rcu_dereference(inet_protos[protocol]);
-	if (ipprot == NULL) {
-		/* raw IP is OK */
-		return 1;
-	}
-	return ipprot->netns_ok;
-}
-
 /*
  *	Create an inet socket.
  */
@@ -350,10 +335,6 @@ lookup_protocol:
 	    !ns_capable(net->user_ns, CAP_NET_RAW))
 		goto out_rcu_unlock;
 
-	err = -EAFNOSUPPORT;
-	if (!inet_netns_ok(net, protocol))
-		goto out_rcu_unlock;
-
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
 	answer_no_check = answer->no_check;
@@ -1306,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       0)))
 		goto out;
 
@@ -1333,7 +1315,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		segs = ops->callbacks.gso_segment(skb, features);
 	rcu_read_unlock();
 
-	if (!segs || IS_ERR(segs))
+	if (IS_ERR_OR_NULL(segs))
 		goto out;
 
 	skb = segs;
@@ -1705,12 +1687,11 @@ static struct packet_type ip_packet_type __read_mostly = {
 
 static int __init inet_init(void)
 {
-	struct sk_buff *dummy_skb;
 	struct inet_protosw *q;
 	struct list_head *r;
 	int rc = -EINVAL;
 
-	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
 	if (!sysctl_local_reserved_ports)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a0d8392491c..2e7f1948216 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err)
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, ihl);
 	__skb_pull(skb, ah_hlen + ihl);
-	skb_set_transport_header(skb, -ihl);
+
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -ihl);
 out:
 	kfree(AH_SKB_CB(skb)->tmp);
 	xfrm_input_resume(skb, err);
@@ -317,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
@@ -381,7 +384,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, ihl);
 	__skb_pull(skb, ah_hlen + ihl);
-	skb_set_transport_header(skb, -ihl);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -ihl);
 
 	err = nexthdr;
 
@@ -413,9 +419,12 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+		atomic_inc(&flow_cache_genid);
+		rt_genid_bump(net);
+
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
-	else
+	} else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ce6fbdfd40b..ded146b217f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 {
 	__be32 saddr = 0;
-	u8  *dst_ha = NULL;
+	u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL;
 	struct net_device *dev = neigh->dev;
 	__be32 target = *(__be32 *)neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
@@ -363,8 +363,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	if (probes < 0) {
 		if (!(neigh->nud_state & NUD_VALID))
 			pr_debug("trying to ucast probe in NUD_INVALID\n");
-		dst_ha = neigh->ha;
-		read_lock_bh(&neigh->lock);
+		neigh_ha_snapshot(dst_ha, neigh, dev);
+		dst_hw = dst_ha;
 	} else {
 		probes -= neigh->parms->app_probes;
 		if (probes < 0) {
@@ -376,9 +376,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	}
 
 	arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
-		 dst_ha, dev->dev_addr, NULL);
-	if (dst_ha)
-		read_unlock_bh(&neigh->lock);
+		 dst_hw, dev->dev_addr, NULL);
 }
 
 static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
@@ -930,24 +928,25 @@ static void parp_redo(struct sk_buff *skb)
 static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev)
 {
-	struct arphdr *arp;
+	const struct arphdr *arp;
+
+	if (dev->flags & IFF_NOARP ||
+	    skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->pkt_type == PACKET_LOOPBACK)
+		goto freeskb;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out_of_mem;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull(skb, arp_hdr_len(dev)))
 		goto freeskb;
 
 	arp = arp_hdr(skb);
-	if (arp->ar_hln != dev->addr_len ||
-	    dev->flags & IFF_NOARP ||
-	    skb->pkt_type == PACKET_OTHERHOST ||
-	    skb->pkt_type == PACKET_LOOPBACK ||
-	    arp->ar_pln != 4)
+	if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
 		goto freeskb;
 
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (skb == NULL)
-		goto out_of_mem;
-
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
 	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 424fafbc8cb..b28e863fe0a 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -85,3 +85,28 @@ out:
 	return err;
 }
 EXPORT_SYMBOL(ip4_datagram_connect);
+
+void ip4_datagram_release_cb(struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ip_options_rcu *inet_opt;
+	__be32 daddr = inet->inet_daddr;
+	struct flowi4 fl4;
+	struct rtable *rt;
+
+	if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
+		return;
+
+	rcu_read_lock();
+	inet_opt = rcu_dereference(inet->inet_opt);
+	if (inet_opt && inet_opt->opt.srr)
+		daddr = inet_opt->opt.faddr;
+	rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
+				   inet->inet_saddr, inet->inet_dport,
+				   inet->inet_sport, sk->sk_protocol,
+				   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+	if (!IS_ERR(rt))
+		__sk_dst_set(sk, &rt->dst);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cc06a47f121..5281314886c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -63,6 +63,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <net/addrconf.h>
 
 #include "fib_lookup.h"
 
@@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 };
 
 #define IN4_ADDR_HSIZE_SHIFT	8
@@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 }
 
+static void check_lifetime(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
+
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 			     u32 portid)
 {
@@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 
 	inet_hash_insert(dev_net(in_dev->dev), ifa);
 
+	cancel_delayed_work(&check_lifetime_work);
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
@@ -573,7 +582,107 @@ errout:
 	return err;
 }
 
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+#define INFINITY_LIFE_TIME	0xFFFFFFFF
+
+static void check_lifetime(struct work_struct *work)
+{
+	unsigned long now, next, next_sec, next_sched;
+	struct in_ifaddr *ifa;
+	struct hlist_node *node;
+	int i;
+
+	now = jiffies;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
+
+	rcu_read_lock();
+	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+		hlist_for_each_entry_rcu(ifa, node,
+					 &inet_addr_lst[i], hash) {
+			unsigned long age;
+
+			if (ifa->ifa_flags & IFA_F_PERMANENT)
+				continue;
+
+			/* We try to batch several events at once. */
+			age = (now - ifa->ifa_tstamp +
+			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
+			    age >= ifa->ifa_valid_lft) {
+				struct in_ifaddr **ifap ;
+
+				rtnl_lock();
+				for (ifap = &ifa->ifa_dev->ifa_list;
+				     *ifap != NULL; ifap = &ifa->ifa_next) {
+					if (*ifap == ifa)
+						inet_del_ifa(ifa->ifa_dev,
+							     ifap, 1);
+				}
+				rtnl_unlock();
+			} else if (ifa->ifa_preferred_lft ==
+				   INFINITY_LIFE_TIME) {
+				continue;
+			} else if (age >= ifa->ifa_preferred_lft) {
+				if (time_before(ifa->ifa_tstamp +
+						ifa->ifa_valid_lft * HZ, next))
+					next = ifa->ifa_tstamp +
+					       ifa->ifa_valid_lft * HZ;
+
+				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
+					ifa->ifa_flags |= IFA_F_DEPRECATED;
+					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
+				}
+			} else if (time_before(ifa->ifa_tstamp +
+					       ifa->ifa_preferred_lft * HZ,
+					       next)) {
+				next = ifa->ifa_tstamp +
+				       ifa->ifa_preferred_lft * HZ;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	now = jiffies;
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
+
+	schedule_delayed_work(&check_lifetime_work, next_sched - now);
+}
+
+static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
+			     __u32 prefered_lft)
+{
+	unsigned long timeout;
+
+	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout))
+		ifa->ifa_valid_lft = timeout;
+	else
+		ifa->ifa_flags |= IFA_F_PERMANENT;
+
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa->ifa_flags |= IFA_F_DEPRECATED;
+		ifa->ifa_preferred_lft = timeout;
+	}
+	ifa->ifa_tstamp = jiffies;
+	if (!ifa->ifa_cstamp)
+		ifa->ifa_cstamp = ifa->ifa_tstamp;
+}
+
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
+				       __u32 *pvalid_lft, __u32 *pprefered_lft)
 {
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_ifaddr *ifa;
@@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
+	if (tb[IFA_CACHEINFO]) {
+		struct ifa_cacheinfo *ci;
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
+		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+			err = -EINVAL;
+			goto errout;
+		}
+		*pvalid_lft = ci->ifa_valid;
+		*pprefered_lft = ci->ifa_prefered;
+	}
+
 	return ifa;
 
 errout:
 	return ERR_PTR(err);
 }
 
+static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct in_ifaddr *ifa1, **ifap;
+
+	if (!ifa->ifa_local)
+		return NULL;
+
+	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
+	     ifap = &ifa1->ifa_next) {
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa) &&
+		    ifa1->ifa_local == ifa->ifa_local)
+			return ifa1;
+	}
+	return NULL;
+}
+
 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct in_ifaddr *ifa;
+	struct in_ifaddr *ifa_existing;
+	__u32 valid_lft = INFINITY_LIFE_TIME;
+	__u32 prefered_lft = INFINITY_LIFE_TIME;
 
 	ASSERT_RTNL();
 
-	ifa = rtm_to_ifaddr(net, nlh);
+	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
-	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	ifa_existing = find_matching_ifa(ifa);
+	if (!ifa_existing) {
+		/* It would be best to check for !NLM_F_CREATE here but
+		 * userspace alreay relies on not having to provide this.
+		 */
+		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	} else {
+		inet_free_ifa(ifa);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL ||
+		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+			return -EEXIST;
+
+		set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
+	}
+	return 0;
 }
 
 /*
@@ -823,9 +981,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		if (!ifa) {
 			ret = -ENOBUFS;
 			ifa = inet_alloc_ifa();
-			INIT_HLIST_NODE(&ifa->hash);
 			if (!ifa)
 				break;
+			INIT_HLIST_NODE(&ifa->hash);
 			if (colon)
 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
 			else
@@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			ifa->ifa_prefixlen = 32;
 			ifa->ifa_mask = inet_make_mask(32);
 		}
+		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 		ret = inet_set_ifa(dev, ifa);
 		break;
 
@@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 				ifa->ifa_dev = in_dev;
 				ifa->ifa_scope = RT_SCOPE_HOST;
 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
+						 INFINITY_LIFE_TIME);
 				inet_insert_ifa(ifa);
 			}
 		}
@@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void)
 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
 }
 
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			    u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
+	u32 preferred, valid;
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
-	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+	ifm->ifa_flags = ifa->ifa_flags;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
 
+	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+		preferred = ifa->ifa_preferred_lft;
+		valid = ifa->ifa_valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
+			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+
+			if (preferred > tval)
+				preferred -= tval;
+			else
+				preferred = 0;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
+		}
+	} else {
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
+	}
 	if ((ifa->ifa_address &&
 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
 	    (ifa->ifa_local &&
@@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	    (ifa->ifa_broadcast &&
 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
 	    (ifa->ifa_label[0] &&
-	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+			  preferred, valid))
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -1988,6 +2191,8 @@ void __init devinet_init(void)
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	rtnl_af_register(&inet_af_ops);
 
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b61e9deb7c7..3b4f0cd2e63 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
 
 	pskb_trim(skb, skb->len - alen - padlen - 2);
 	__skb_pull(skb, hlen);
-	skb_set_transport_header(skb, -ihl);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -ihl);
 
 	err = nexthdr[1];
 
@@ -499,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+		atomic_inc(&flow_cache_genid);
+		rt_genid_bump(net);
+
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
-	else
+	} else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 5cd75e2dab2..99f00d39d10 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -974,7 +974,7 @@ static void nl_fib_input(struct sk_buff *skb)
 
 	nl_fib_lookup(frn, tb);
 
-	portid = NETLINK_CB(skb).portid;      /* pid of sending process */
+	portid = NETLINK_CB(skb).portid;      /* netlink portid */
 	NETLINK_CB(skb).portid = 0;        /* from kernel */
 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
 	netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a491055c7..7a4c710c4cd 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
@@ -26,6 +27,11 @@
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	rcu_read_unlock();
 }
 
+static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	netdev_features_t enc_features;
+	int ghl = GRE_HEADER_SECTION;
+	struct gre_base_hdr *greh;
+	int mac_len = skb->mac_len;
+	int tnl_hlen;
+	bool csum;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+				~(SKB_GSO_TCPV4 |
+				  SKB_GSO_TCPV6 |
+				  SKB_GSO_UDP |
+				  SKB_GSO_DODGY |
+				  SKB_GSO_TCP_ECN |
+				  SKB_GSO_GRE)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+		goto out;
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+
+	if (greh->flags & GRE_KEY)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_SEQ)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_CSUM) {
+		ghl += GRE_HEADER_SECTION;
+		csum = true;
+	} else
+		csum = false;
+
+	/* setup inner skb. */
+	if (greh->protocol == htons(ETH_P_TEB)) {
+		struct ethhdr *eth = eth_hdr(skb);
+		skb->protocol = eth->h_proto;
+	} else {
+		skb->protocol = greh->protocol;
+	}
+
+	skb->encapsulation = 0;
+
+	if (unlikely(!pskb_may_pull(skb, ghl)))
+		goto out;
+	__skb_pull(skb, ghl);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+
+	/* segment inner packet. */
+	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	segs = skb_mac_gso_segment(skb, enc_features);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	skb = segs;
+	tnl_hlen = skb_tnl_header_len(skb);
+	do {
+		__skb_push(skb, ghl);
+		if (csum) {
+			__be32 *pcsum;
+
+			if (skb_has_shared_frag(skb)) {
+				int err;
+
+				err = __skb_linearize(skb);
+				if (err) {
+					kfree_skb(segs);
+					segs = ERR_PTR(err);
+					goto out;
+				}
+			}
+
+			greh = (struct gre_base_hdr *)(skb->data);
+			pcsum = (__be32 *)(greh + 1);
+			*pcsum = 0;
+			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+		}
+		__skb_push(skb, tnl_hlen - ghl);
+
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb->mac_len = mac_len;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
+static int gre_gso_send_check(struct sk_buff *skb)
+{
+	if (!skb->encapsulation)
+		return -EINVAL;
+	return 0;
+}
+
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
 	.netns_ok    = 1,
 };
 
+static const struct net_offload gre_offload = {
+	.callbacks = {
+		.gso_send_check =	gre_gso_send_check,
+		.gso_segment    =	gre_gso_segment,
+	},
+};
+
 static int __init gre_init(void)
 {
 	pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -127,11 +238,18 @@ static int __init gre_init(void)
 		return -EAGAIN;
 	}
 
+	if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
+		pr_err("can't add protocol offload\n");
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
 static void __exit gre_exit(void)
 {
+	inet_del_offload(&gre_offload, IPPROTO_GRE);
 	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d0670f00d52..11cb4979a46 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -59,6 +59,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	struct sock *sk2;
 	struct hlist_node *node;
 	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
 
 	/*
 	 * Unlike other sk lookup places we do not check
@@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
+			if ((!reuse || !sk2->sk_reuse ||
+			    sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			    (sk2->sk_state != TCP_TIME_WAIT &&
+			     !uid_eq(uid, sock_i_uid(sk2))))) {
 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -106,6 +111,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 	int ret, attempts = 5;
 	struct net *net = sock_net(sk);
 	int smallest_size = -1, smallest_rover;
+	kuid_t uid = sock_i_uid(sk);
 
 	local_bh_disable();
 	if (!snum) {
@@ -125,9 +131,12 @@ again:
 			spin_lock(&head->lock);
 			inet_bind_bucket_for_each(tb, node, &head->chain)
 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
-					if (tb->fastreuse > 0 &&
-					    sk->sk_reuse &&
-					    sk->sk_state != TCP_LISTEN &&
+					if (((tb->fastreuse > 0 &&
+					      sk->sk_reuse &&
+					      sk->sk_state != TCP_LISTEN) ||
+					     (tb->fastreuseport > 0 &&
+					      sk->sk_reuseport &&
+					      uid_eq(tb->fastuid, uid))) &&
 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
@@ -185,14 +194,18 @@ tb_found:
 		if (sk->sk_reuse == SK_FORCE_REUSE)
 			goto success;
 
-		if (tb->fastreuse > 0 &&
-		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+		if (((tb->fastreuse > 0 &&
+		      sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+		     (tb->fastreuseport > 0 &&
+		      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
 		    smallest_size == -1) {
 			goto success;
 		} else {
 			ret = 1;
 			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
-				if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+				if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+				     (tb->fastreuseport > 0 &&
+				      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
 				    smallest_size != -1 && --attempts >= 0) {
 					spin_unlock(&head->lock);
 					goto again;
@@ -212,9 +225,19 @@ tb_not_found:
 			tb->fastreuse = 1;
 		else
 			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
+		if (sk->sk_reuseport) {
+			tb->fastreuseport = 1;
+			tb->fastuid = uid;
+		} else
+			tb->fastreuseport = 0;
+	} else {
+		if (tb->fastreuse &&
+		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+			tb->fastreuse = 0;
+		if (tb->fastreuseport &&
+		    (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
+			tb->fastreuseport = 0;
+	}
 success:
 	if (!inet_csk(sk)->icsk_bind_hash)
 		inet_bind_hash(sk, tb, snum);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4750d2b74d7..2e453bde699 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -73,8 +73,9 @@ EXPORT_SYMBOL(inet_frags_init);
 void inet_frags_init_net(struct netns_frags *nf)
 {
 	nf->nqueues = 0;
-	atomic_set(&nf->mem, 0);
+	init_frag_mem_limit(nf);
 	INIT_LIST_HEAD(&nf->lru_list);
+	spin_lock_init(&nf->lru_lock);
 }
 EXPORT_SYMBOL(inet_frags_init_net);
 
@@ -91,6 +92,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
 	local_bh_disable();
 	inet_frag_evictor(nf, f, true);
 	local_bh_enable();
+
+	percpu_counter_destroy(&nf->mem);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
 
@@ -98,9 +101,9 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 {
 	write_lock(&f->lock);
 	hlist_del(&fq->list);
-	list_del(&fq->lru_list);
 	fq->net->nqueues--;
 	write_unlock(&f->lock);
+	inet_frag_lru_del(fq);
 }
 
 void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -117,12 +120,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 EXPORT_SYMBOL(inet_frag_kill);
 
 static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
-		struct sk_buff *skb, int *work)
+		struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
-
-	atomic_sub(skb->truesize, &nf->mem);
 	if (f->skb_free)
 		f->skb_free(skb);
 	kfree_skb(skb);
@@ -133,6 +132,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 {
 	struct sk_buff *fp;
 	struct netns_frags *nf;
+	unsigned int sum, sum_truesize = 0;
 
 	WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
 	WARN_ON(del_timer(&q->timer) != 0);
@@ -143,13 +143,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 	while (fp) {
 		struct sk_buff *xp = fp->next;
 
-		frag_kfree_skb(nf, f, fp, work);
+		sum_truesize += fp->truesize;
+		frag_kfree_skb(nf, f, fp);
 		fp = xp;
 	}
-
+	sum = sum_truesize + f->qsize;
 	if (work)
-		*work -= f->qsize;
-	atomic_sub(f->qsize, &nf->mem);
+		*work -= sum;
+	sub_frag_mem_limit(q, sum);
 
 	if (f->destructor)
 		f->destructor(q);
@@ -164,22 +165,23 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
 	int work, evicted = 0;
 
 	if (!force) {
-		if (atomic_read(&nf->mem) <= nf->high_thresh)
+		if (frag_mem_limit(nf) <= nf->high_thresh)
 			return 0;
 	}
 
-	work = atomic_read(&nf->mem) - nf->low_thresh;
+	work = frag_mem_limit(nf) - nf->low_thresh;
 	while (work > 0) {
-		read_lock(&f->lock);
+		spin_lock(&nf->lru_lock);
+
 		if (list_empty(&nf->lru_list)) {
-			read_unlock(&f->lock);
+			spin_unlock(&nf->lru_lock);
 			break;
 		}
 
 		q = list_first_entry(&nf->lru_list,
 				struct inet_frag_queue, lru_list);
 		atomic_inc(&q->refcnt);
-		read_unlock(&f->lock);
+		spin_unlock(&nf->lru_lock);
 
 		spin_lock(&q->lock);
 		if (!(q->last_in & INET_FRAG_COMPLETE))
@@ -233,9 +235,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 
 	atomic_inc(&qp->refcnt);
 	hlist_add_head(&qp->list, &f->hash[hash]);
-	list_add_tail(&qp->lru_list, &nf->lru_list);
 	nf->nqueues++;
 	write_unlock(&f->lock);
+	inet_frag_lru_add(nf, qp);
 	return qp;
 }
 
@@ -250,7 +252,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 
 	q->net = nf;
 	f->constructor(q, arg);
-	atomic_add(f->qsize, &nf->mem);
+	add_frag_mem_limit(q, f->qsize);
+
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
 	atomic_set(&q->refcnt, 1);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fa3ae814871..0ce0595d986 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 		write_pnet(&tb->ib_net, hold_net(net));
 		tb->port      = snum;
 		tb->fastreuse = 0;
+		tb->fastreuseport = 0;
 		tb->num_owners = 0;
 		INIT_HLIST_HEAD(&tb->owners);
 		hlist_add_head(&tb->node, &head->chain);
@@ -151,16 +152,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
 			!ipv6_only_sock(sk)) {
 		__be32 rcv_saddr = inet->inet_rcv_saddr;
-		score = sk->sk_family == PF_INET ? 1 : 0;
+		score = sk->sk_family == PF_INET ? 2 : 1;
 		if (rcv_saddr) {
 			if (rcv_saddr != daddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -176,6 +177,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 struct sock *__inet_lookup_listener(struct net *net,
 				    struct inet_hashinfo *hashinfo,
+				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
 				    const int dif)
 {
@@ -183,17 +185,29 @@ struct sock *__inet_lookup_listener(struct net *net,
 	struct hlist_nulls_node *node;
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-	int score, hiscore;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
 
 	rcu_read_lock();
 begin:
 	result = NULL;
-	hiscore = -1;
+	hiscore = 0;
 	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif);
 		if (score > hiscore) {
 			result = sk;
 			hiscore = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			}
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
+				result = sk;
+			phash = next_pseudo_random32(phash);
 		}
 	}
 	/*
@@ -501,7 +515,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
 				if (net_eq(ib_net(tb), net) &&
 				    tb->port == port) {
-					if (tb->fastreuse >= 0)
+					if (tb->fastreuse >= 0 ||
+					    tb->fastreuseport >= 0)
 						goto next_port;
 					WARN_ON(hlist_empty(&tb->owners));
 					if (!check_established(death_row, sk,
@@ -518,6 +533,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 				break;
 			}
 			tb->fastreuse = -1;
+			tb->fastreuseport = -1;
 			goto ok;
 
 		next_port:
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index eb9d63a570c..b6d30acb600 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -122,7 +122,7 @@ int ip_frag_nqueues(struct net *net)
 
 int ip_frag_mem(struct net *net)
 {
-	return atomic_read(&net->ipv4.frags.mem);
+	return sum_frag_mem_limit(&net->ipv4.frags);
 }
 
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -161,13 +161,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
 		qp->user == arg->user;
 }
 
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
-{
-	atomic_sub(skb->truesize, &nf->mem);
-	kfree_skb(skb);
-}
-
 static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
@@ -340,6 +333,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
 static int ip_frag_reinit(struct ipq *qp)
 {
 	struct sk_buff *fp;
+	unsigned int sum_truesize = 0;
 
 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
 		atomic_inc(&qp->q.refcnt);
@@ -349,9 +343,12 @@ static int ip_frag_reinit(struct ipq *qp)
 	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
-		frag_kfree_skb(qp->q.net, fp);
+
+		sum_truesize += fp->truesize;
+		kfree_skb(fp);
 		fp = xp;
 	} while (fp);
+	sub_frag_mem_limit(&qp->q, sum_truesize);
 
 	qp->q.last_in = 0;
 	qp->q.len = 0;
@@ -496,7 +493,8 @@ found:
 				qp->q.fragments = next;
 
 			qp->q.meat -= free_it->len;
-			frag_kfree_skb(qp->q.net, free_it);
+			sub_frag_mem_limit(&qp->q, free_it->truesize);
+			kfree_skb(free_it);
 		}
 	}
 
@@ -519,7 +517,7 @@ found:
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
-	atomic_add(skb->truesize, &qp->q.net->mem);
+	add_frag_mem_limit(&qp->q, skb->truesize);
 	if (offset == 0)
 		qp->q.last_in |= INET_FRAG_FIRST_IN;
 
@@ -531,9 +529,7 @@ found:
 	    qp->q.meat == qp->q.len)
 		return ip_frag_reasm(qp, prev, dev);
 
-	write_lock(&ip4_frags.lock);
-	list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
-	write_unlock(&ip4_frags.lock);
+	inet_frag_lru_move(&qp->q);
 	return -EINPROGRESS;
 
 err:
@@ -594,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_nomem;
 
 	/* If the first fragment is fragmented itself, we split
@@ -617,7 +613,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &qp->q.net->mem);
+		add_frag_mem_limit(&qp->q, clone->truesize);
 	}
 
 	skb_push(head, head->data - skb_network_header(head));
@@ -645,7 +641,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		}
 		fp = next;
 	}
-	atomic_sub(sum_truesize, &qp->q.net->mem);
+	sub_frag_mem_limit(&qp->q, sum_truesize);
 
 	head->next = NULL;
 	head->dev = dev;
@@ -851,14 +847,22 @@ static inline void ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
-	/*
-	 * Fragment cache limits. We will commit 256K at one time. Should we
-	 * cross that limit we will prune down to 192K. This should cope with
-	 * even the most extreme cases without allowing an attacker to
-	 * measurably harm machine performance.
+	/* Fragment cache limits.
+	 *
+	 * The fragment memory accounting code, (tries to) account for
+	 * the real memory usage, by measuring both the size of frag
+	 * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
+	 * and the SKB's truesize.
+	 *
+	 * A 64K fragment consumes 129736 bytes (44*2944)+200
+	 * (1500 truesize == 2944, sizeof(struct ipq) == 200)
+	 *
+	 * We will commit 4MB at one time. Should we cross that limit
+	 * we will prune down to 3MB, making room for approx 8 big 64K
+	 * fragments 8x128k.
 	 */
-	net->ipv4.frags.high_thresh = 256 * 1024;
-	net->ipv4.frags.low_thresh = 192 * 1024;
+	net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
+	net->ipv4.frags.low_thresh  = 3 * 1024 * 1024;
 	/*
 	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
 	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a85ae2f7a21..a56f1182c17 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -735,10 +735,35 @@ drop:
 	return 0;
 }
 
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
+{
+	int err;
+
+	if (skb_is_gso(skb)) {
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			goto error;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+		return skb;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error;
+	}
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	const struct iphdr  *old_iph = ip_hdr(skb);
+	const struct iphdr  *old_iph;
 	const struct iphdr  *tiph;
 	struct flowi4 fl4;
 	u8     tos;
@@ -750,17 +775,32 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	int    gre_hlen;
 	__be32 dst;
 	int    mtu;
+	u8     ttl;
+	int    err;
+	int    pkt_len;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    skb_checksum_help(skb))
-		goto tx_error;
+	skb = handle_offloads(skb);
+	if (IS_ERR(skb)) {
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (!skb->encapsulation) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
+
+	old_iph = ip_hdr(skb);
 
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
 
 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
 		gre_hlen = 0;
-		tiph = (const struct iphdr *)skb->data;
+		if (skb->protocol == htons(ETH_P_IP))
+			tiph = (const struct iphdr *)skb->data;
+		else
+			tiph = &tunnel->parms.iph;
 	} else {
 		gre_hlen = tunnel->hlen;
 		tiph = &tunnel->parms.iph;
@@ -812,9 +852,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 	}
 
+	ttl = tiph->ttl;
 	tos = tiph->tos;
-	if (tos == 1) {
-		tos = 0;
+	if (tos & 0x1) {
+		tos &= ~0x1;
 		if (skb->protocol == htons(ETH_P_IP))
 			tos = old_iph->tos;
 		else if (skb->protocol == htons(ETH_P_IPV6))
@@ -848,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
 
-		if ((old_iph->frag_off&htons(IP_DF)) &&
+		if (!skb_is_gso(skb) &&
+		    (old_iph->frag_off&htons(IP_DF)) &&
 		    mtu < ntohs(old_iph->tot_len)) {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 			ip_rt_put(rt);
@@ -868,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			}
 		}
 
-		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
+		if (!skb_is_gso(skb) &&
+		    mtu >= IPV6_MIN_MTU &&
+		    mtu < skb->len - tunnel->hlen + gre_hlen) {
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
@@ -904,11 +948,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		dev_kfree_skb(skb);
 		skb = new_skb;
 		old_iph = ip_hdr(skb);
+		/* Warning : tiph value might point to freed memory */
 	}
 
-	skb_reset_transport_header(skb);
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, sizeof(*iph));
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
@@ -927,8 +972,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
 	iph->daddr		=	fl4.daddr;
 	iph->saddr		=	fl4.saddr;
+	iph->ttl		=	ttl;
+	iph->id			=	0;
 
-	if ((iph->ttl = tiph->ttl) == 0) {
+	if (ttl == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			iph->ttl = old_iph->ttl;
 #if IS_ENABLED(CONFIG_IPV6)
@@ -955,13 +1002,39 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			*ptr = tunnel->parms.o_key;
 			ptr--;
 		}
-		if (tunnel->parms.o_flags&GRE_CSUM) {
+		/* Skip GRE checksum if skb is getting offloaded. */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
+		    (tunnel->parms.o_flags&GRE_CSUM)) {
+			int offset = skb_transport_offset(skb);
+
+			if (skb_has_shared_frag(skb)) {
+				err = __skb_linearize(skb);
+				if (err) {
+					ip_rt_put(rt);
+					goto tx_error;
+				}
+			}
+
 			*ptr = 0;
-			*(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
+								 skb->len - offset,
+								 0));
 		}
 	}
 
-	iptunnel_xmit(skb, dev);
+	nf_reset(skb);
+
+	pkt_len = skb->len - skb_transport_offset(skb);
+	err = ip_local_out(skb);
+	if (likely(net_xmit_eval(err) == 0)) {
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += pkt_len;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
+	}
 	return NETDEV_TX_OK;
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1031,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 		mtu = 68;
 
 	tunnel->hlen = addend;
+	/* TCP offload with GRE SEQ is not supported. */
+	if (!(tunnel->parms.o_flags & GRE_SEQ)) {
+		dev->features		|= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features	|= NETIF_F_GSO_SOFTWARE;
+	}
 
 	return mtu;
 }
@@ -1580,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev)
 
 	dev->iflink		= 0;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
+
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
 }
 
 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f1395a6fb35..87abd3e2bd3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -208,13 +208,6 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 		if (ipprot != NULL) {
 			int ret;
 
-			if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
-				net_info_ratelimited("%s: proto %d isn't netns-ready\n",
-						     __func__, protocol);
-				kfree_skb(skb);
-				goto out;
-			}
-
 			if (!ipprot->no_policy) {
 				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 					kfree_skb(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e98ed2bff5..5e12dca7b3d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -598,6 +598,7 @@ slow_path:
 	/* for offloaded checksums cleanup checksum before fragmentation */
 	if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
 		goto fail;
+	iph = ip_hdr(skb);
 
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3c9d2088028..d9c4f113d70 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -590,7 +590,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case IP_TTL:
 		if (optlen < 1)
 			goto e_inval;
-		if (val != -1 && (val < 0 || val > 255))
+		if (val != -1 && (val < 1 || val > 255))
 			goto e_inval;
 		inet->uc_ttl = val;
 		break;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index d3ab47e19a8..f01d1b1aff7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+		atomic_inc(&flow_cache_genid);
+		rt_genid_bump(net);
+
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
-	else
+	} else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
 	xfrm_state_put(x);
 }
@@ -160,6 +163,7 @@ static const struct net_protocol ipcomp4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	ipcomp4_err,
 	.no_policy	=	1,
+	.netns_ok	=	1,
 };
 
 static int __init ipcomp4_init(void)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index d763701cff1..a2e50ae80b5 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -136,6 +136,8 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
+__be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 
 __be32 root_server_addr = NONE;	/* Address of NFS server */
@@ -558,6 +560,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (ic_myaddr == NONE)
 		ic_myaddr = tip;
 	ic_servaddr = sip;
+	ic_addrservaddr = sip;
 	ic_got_reply = IC_RARP;
 
 drop_unlock:
@@ -1068,7 +1071,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 				ic_servaddr = server_id;
 #ifdef IPCONFIG_DEBUG
 				printk("DHCP: Offered address %pI4 by server %pI4\n",
-				       &ic_myaddr, &ic_servaddr);
+				       &ic_myaddr, &b->iph.saddr);
 #endif
 				/* The DHCP indicated server address takes
 				 * precedence over the bootp header one if
@@ -1113,6 +1116,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	ic_dev = dev;
 	ic_myaddr = b->your_ip;
 	ic_servaddr = b->server_ip;
+	ic_addrservaddr = b->iph.saddr;
 	if (ic_gateway == NONE && b->relay_ip)
 		ic_gateway = b->relay_ip;
 	if (ic_nameservers[0] == NONE)
@@ -1268,7 +1272,7 @@ static int __init ic_dynamic(void)
 	printk("IP-Config: Got %s answer from %pI4, ",
 		((ic_got_reply & IC_RARP) ? "RARP"
 		 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
-	       &ic_servaddr);
+	       &ic_addrservaddr);
 	pr_cont("my address is %pI4\n", &ic_myaddr);
 
 	return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 191fc24a745..8f024d41eef 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;		/* Device to other host */
-	const struct iphdr  *old_iph = ip_hdr(skb);
+	const struct iphdr  *old_iph;
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
 	__be32 dst = tiph->daddr;
@@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	    skb_checksum_help(skb))
 		goto tx_error;
 
+	old_iph = ip_hdr(skb);
+
 	if (tos & 1)
 		tos = old_iph->tos;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a9454cbd953..7085b9b51e7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 	return NULL;
 }
 
+/* Look for a (*,*,oif) entry */
+static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
+						    int vifi)
+{
+	int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY));
+	struct mfc_cache *c;
+
+	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
+		if (c->mfc_origin == htonl(INADDR_ANY) &&
+		    c->mfc_mcastgrp == htonl(INADDR_ANY) &&
+		    c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+	return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
+					     __be32 mcastgrp, int vifi)
+{
+	int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY));
+	struct mfc_cache *c, *proxy;
+
+	if (mcastgrp == htonl(INADDR_ANY))
+		goto skip;
+
+	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list)
+		if (c->mfc_origin == htonl(INADDR_ANY) &&
+		    c->mfc_mcastgrp == mcastgrp) {
+			if (c->mfc_un.res.ttls[vifi] < 255)
+				return c;
+
+			/* It's ok if the vifi is part of the static tree */
+			proxy = ipmr_cache_find_any_parent(mrt,
+							   c->mfc_parent);
+			if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+				return c;
+		}
+
+skip:
+	return ipmr_cache_find_any_parent(mrt, vifi);
+}
+
 /*
  *	Allocate a multicast cache entry
  */
@@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
  *	MFC cache manipulation by user space mroute daemon
  */
 
-static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
+static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 {
 	int line;
 	struct mfc_cache *c, *next;
@@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 
 	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
+		    (parent == -1 || parent == c->mfc_parent)) {
 			list_del_rcu(&c->list);
 			mroute_netlink_event(mrt, c, RTM_DELROUTE);
 			ipmr_cache_free(c);
@@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 }
 
 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
-			struct mfcctl *mfc, int mrtsock)
+			struct mfcctl *mfc, int mrtsock, int parent)
 {
 	bool found = false;
 	int line;
@@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
 	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr &&
+		    (parent == -1 || parent == c->mfc_parent)) {
 			found = true;
 			break;
 		}
@@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		return 0;
 	}
 
-	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
+	if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
+	    !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
 	c = ipmr_cache_alloc();
@@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk)
 
 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
-	int ret;
+	int ret, parent = 0;
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
@@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		 */
 	case MRT_ADD_MFC:
 	case MRT_DEL_MFC:
+		parent = -1;
+	case MRT_ADD_MFC_PROXY:
+	case MRT_DEL_MFC_PROXY:
 		if (optlen != sizeof(mfc))
 			return -EINVAL;
 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
 			return -EFAULT;
+		if (parent == 0)
+			parent = mfc.mfcc_parent;
 		rtnl_lock();
-		if (optname == MRT_DEL_MFC)
-			ret = ipmr_mfc_delete(mrt, &mfc);
+		if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
+			ret = ipmr_mfc_delete(mrt, &mfc, parent);
 		else
 			ret = ipmr_mfc_add(net, mrt, &mfc,
-					   sk == rtnl_dereference(mrt->mroute_sk));
+					   sk == rtnl_dereference(mrt->mroute_sk),
+					   parent);
 		rtnl_unlock();
 		return ret;
 		/*
@@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 {
 	int psend = -1;
 	int vif, ct;
+	int true_vifi = ipmr_find_vif(mrt, skb->dev);
 
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
 	cache->mfc_un.res.bytes += skb->len;
 
+	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+		struct mfc_cache *cache_proxy;
+
+		/* For an (*,G) entry, we only check that the incomming
+		 * interface is part of the static tree.
+		 */
+		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+		if (cache_proxy &&
+		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+			goto forward;
+	}
+
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
 	if (mrt->vif_table[vif].dev != skb->dev) {
-		int true_vifi;
-
 		if (rt_is_output_route(skb_rtable(skb))) {
 			/* It is our own packet, looped back.
 			 * Very complicated situation...
@@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 		}
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ipmr_find_vif(mrt, skb->dev);
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -1794,15 +1856,34 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 		goto dont_forward;
 	}
 
+forward:
 	mrt->vif_table[vif].pkt_in++;
 	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
 	 */
+	if (cache->mfc_origin == htonl(INADDR_ANY) &&
+	    cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+		if (true_vifi >= 0 &&
+		    true_vifi != cache->mfc_parent &&
+		    ip_hdr(skb)->ttl >
+				cache->mfc_un.res.ttls[cache->mfc_parent]) {
+			/* It's an (*,*) entry and the packet is not coming from
+			 * the upstream: forward the packet to the upstream
+			 * only.
+			 */
+			psend = cache->mfc_parent;
+			goto last_forward;
+		}
+		goto dont_forward;
+	}
 	for (ct = cache->mfc_un.res.maxvif - 1;
 	     ct >= cache->mfc_un.res.minvif; ct--) {
-		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+		/* For (*,G) entry, don't forward to the incoming interface */
+		if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+		     ct != true_vifi) &&
+		    ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
@@ -1813,6 +1894,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			psend = ct;
 		}
 	}
+last_forward:
 	if (psend != -1) {
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -1902,6 +1984,13 @@ int ip_mr_input(struct sk_buff *skb)
 
 	/* already under rcu_read_lock() */
 	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+	if (cache == NULL) {
+		int vif = ipmr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
+						    vif);
+	}
 
 	/*
 	 *	No usable cache entry
@@ -2107,7 +2196,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 
 	rcu_read_lock();
 	cache = ipmr_cache_find(mrt, saddr, daddr);
+	if (cache == NULL && skb->dev) {
+		int vif = ipmr_find_vif(mrt, skb->dev);
 
+		if (vif >= 0)
+			cache = ipmr_cache_find_any(mrt, daddr, vif);
+	}
 	if (cache == NULL) {
 		struct sk_buff *skb2;
 		struct iphdr *iph;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3ea4127404d..7dc6a974359 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -901,7 +901,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
 	t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
 				    "arptable_%s", name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct arpt_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -958,7 +958,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
 	}
 
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 
 		duprintf("t->private->number = %u\n",
@@ -1001,7 +1001,7 @@ static int __do_replace(struct net *net, const char *name,
 
 	t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
 				    "arptable_%s", name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
@@ -1158,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	}
 
 	t = xt_find_table_lock(net, NFPROTO_ARP, name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
@@ -1646,7 +1646,7 @@ static int compat_get_entries(struct net *net,
 
 	xt_compat_lock(NFPROTO_ARP);
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 17c5e06da66..3efcf87400c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1090,7 +1090,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
 				    "iptable_%s", name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct ipt_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1149,7 +1149,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
 	}
 
 	t = xt_find_table_lock(net, AF_INET, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n", private->number);
 		if (get.size == private->size)
@@ -1189,7 +1189,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
 				    "iptable_%s", name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
@@ -1347,7 +1347,7 @@ do_add_counters(struct net *net, const void __user *user,
 	}
 
 	t = xt_find_table_lock(net, AF_INET, name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
@@ -1931,7 +1931,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
 
 	xt_compat_lock(AF_INET);
 	t = xt_find_table_lock(net, AF_INET, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 		duprintf("t->private->number = %u\n", private->number);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 51f13f8ec72..04b18c1ac34 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	niph->saddr	= oiph->daddr;
 	niph->daddr	= oiph->saddr;
 
+	skb_reset_transport_header(nskb);
 	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
 	memset(tcph, 0, sizeof(*tcph));
 	tcph->source	= oth->dest;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5ef3cba225..7d168dcbd13 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -88,10 +88,8 @@ static void ulog_send(unsigned int nlgroupnum)
 {
 	ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
 
-	if (timer_pending(&ub->timer)) {
-		pr_debug("ulog_send: timer was pending, deleting\n");
-		del_timer(&ub->timer);
-	}
+	pr_debug("ulog_send: timer is deleting\n");
+	del_timer(&ub->timer);
 
 	if (!ub->skb) {
 		pr_debug("ulog_send: nothing to send\n");
@@ -426,10 +424,8 @@ static void __exit ulog_tg_exit(void)
 	/* remove pending timers and free allocated skb's */
 	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
-		if (timer_pending(&ub->timer)) {
-			pr_debug("timer was pending, deleting\n");
-			del_timer(&ub->timer);
-		}
+		pr_debug("timer is deleting\n");
+		del_timer(&ub->timer);
 
 		if (ub->skb) {
 			kfree_skb(ub->skb);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index da2c8a368f6..eeaff7e4acb 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -124,23 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
-		} else
+		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
+			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+				goto oif_changed;
+		}
 		break;
 
 	default:
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
-			nf_ct_kill_acct(ct, ctinfo, skb);
-			return NF_DROP;
-		}
+		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+			goto oif_changed;
 	}
 
 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+
+oif_changed:
+	nf_ct_kill_acct(ct, ctinfo, skb);
+	return NF_DROP;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fcdd0c2406e..48990ada0e1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -420,54 +420,43 @@ static int ipv4_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_tcp4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n");
+		pr_err("nf_conntrack_tcp4: pernet registration failed\n");
 		goto out_tcp;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udp4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n");
+		pr_err("nf_conntrack_udp4: pernet registration failed\n");
 		goto out_udp;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_icmp);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n");
+		pr_err("nf_conntrack_icmp4: pernet registration failed\n");
 		goto out_icmp;
 	}
-	ret = nf_conntrack_l3proto_register(net,
-					    &nf_conntrack_l3proto_ipv4);
+	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n");
+		pr_err("nf_conntrack_ipv4: pernet registration failed\n");
 		goto out_ipv4;
 	}
 	return 0;
 out_ipv4:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmp);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
 out_icmp:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
 out_udp:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
 out_tcp:
 	return ret;
 }
 
 static void ipv4_net_exit(struct net *net)
 {
-	nf_conntrack_l3proto_unregister(net,
-					&nf_conntrack_l3proto_ipv4);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmp);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp4);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp4);
+	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
 }
 
 static struct pernet_operations ipv4_net_ops = {
@@ -500,16 +489,49 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 		goto cleanup_pernet;
 	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+		goto cleanup_hooks;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
+		goto cleanup_tcp4;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
+		goto cleanup_udp4;
+	}
+
+	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
+		goto cleanup_icmpv4;
+	}
+
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	ret = nf_conntrack_ipv4_compat_init();
 	if (ret < 0)
-		goto cleanup_hooks;
+		goto cleanup_proto;
 #endif
 	return ret;
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_proto:
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+#endif
+ cleanup_icmpv4:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp4:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp4:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
  cleanup_hooks:
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
-#endif
  cleanup_pernet:
 	unregister_pernet_subsys(&ipv4_net_ops);
  cleanup_sockopt:
@@ -523,6 +545,10 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 	nf_conntrack_ipv4_compat_fini();
 #endif
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 	unregister_pernet_subsys(&ipv4_net_ops);
 	nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8f3d05424a3..6f9c07268cf 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -738,6 +738,7 @@ struct proto ping_prot = {
 	.recvmsg =	ping_recvmsg,
 	.bind =		ping_bind,
 	.backlog_rcv =	ping_queue_rcv_skb,
+	.release_cb =	ip4_datagram_release_cb,
 	.hash =		ping_v4_hash,
 	.unhash =	ping_v4_unhash,
 	.get_port =	ping_v4_get_port,
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 0f9d09f54bd..ce848461acb 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -37,6 +37,12 @@ const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
 
 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
+	if (!prot->netns_ok) {
+		pr_err("Protocol %u is not namespace aware, cannot register.\n",
+			protocol);
+		return -EINVAL;
+	}
+
 	return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 73d1e4df4bf..6f08991409c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -894,6 +894,7 @@ struct proto raw_prot = {
 	.recvmsg	   = raw_recvmsg,
 	.bind		   = raw_bind,
 	.backlog_rcv	   = raw_rcv_skb,
+	.release_cb	   = ip4_datagram_release_cb,
 	.hash		   = raw_hash_sk,
 	.unhash		   = raw_unhash_sk,
 	.obj_size	   = sizeof(struct raw_sock),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 844a9ef60db..a0fcc47fee7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -912,6 +912,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	struct dst_entry *dst = &rt->dst;
 	struct fib_result res;
 
+	if (dst_metric_locked(dst, RTAX_MTU))
+		return;
+
 	if (dst->dev->mtu < mtu)
 		return;
 
@@ -962,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
 }
 EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
 
-void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 {
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	struct flowi4 fl4;
@@ -975,6 +978,53 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 		ip_rt_put(rt);
 	}
 }
+
+void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+{
+	const struct iphdr *iph = (const struct iphdr *) skb->data;
+	struct flowi4 fl4;
+	struct rtable *rt;
+	struct dst_entry *dst;
+	bool new = false;
+
+	bh_lock_sock(sk);
+	rt = (struct rtable *) __sk_dst_get(sk);
+
+	if (sock_owned_by_user(sk) || !rt) {
+		__ipv4_sk_update_pmtu(skb, sk, mtu);
+		goto out;
+	}
+
+	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+	if (!__sk_dst_check(sk, 0)) {
+		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+		if (IS_ERR(rt))
+			goto out;
+
+		new = true;
+	}
+
+	__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+
+	dst = dst_check(&rt->dst, 0);
+	if (!dst) {
+		if (new)
+			dst_release(&rt->dst);
+
+		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+		if (IS_ERR(rt))
+			goto out;
+
+		new = true;
+	}
+
+	if (new)
+		__sk_dst_set(sk, &rt->dst);
+
+out:
+	bh_unlock_sock(sk);
+}
 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
 
 void ipv4_redirect(struct sk_buff *skb, struct net *net,
@@ -1120,7 +1170,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	if (!mtu || time_after_eq(jiffies, rt->dst.expires))
 		mtu = dst_metric_raw(dst, RTAX_MTU);
 
-	if (mtu && rt_is_output_route(rt))
+	if (mtu)
 		return mtu;
 
 	mtu = dst->dev->mtu;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b236ef04914..ef54377fb11 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -232,7 +232,8 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
+			struct net *net, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -247,7 +248,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 
 	tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
 	*ecn_ok = (options >> 5) & 1;
-	if (*ecn_ok && !sysctl_tcp_ecn)
+	if (*ecn_ok && !net->ipv4.sysctl_tcp_ecn)
 		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
@@ -295,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
 
-	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
 		goto out;
 
 	ret = NULL;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d84400b6504..960fd29d9b8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,6 +27,7 @@
 #include <net/tcp_memcontrol.h>
 
 static int zero;
+static int one = 1;
 static int two = 2;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
@@ -232,8 +233,8 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
 	return 0;
 }
 
-int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
-			  size_t *lenp, loff_t *ppos)
+static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
+				 size_t *lenp, loff_t *ppos)
 {
 	ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 	struct tcp_fastopen_context *ctxt;
@@ -538,13 +539,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_ecn",
-		.data		= &sysctl_tcp_ecn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_dsack",
 		.data		= &sysctl_tcp_dsack,
 		.maxlen		= sizeof(int),
@@ -556,14 +550,16 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_tcp_wmem,
 		.maxlen		= sizeof(sysctl_tcp_wmem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "tcp_rmem",
 		.data		= &sysctl_tcp_rmem,
 		.maxlen		= sizeof(sysctl_tcp_rmem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "tcp_app_win",
@@ -637,13 +633,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_tcp_congestion_control,
 	},
 	{
-		.procname	= "tcp_abc",
-		.data		= &sysctl_tcp_abc,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
 		.procname	= "tcp_mtu_probing",
 		.data		= &sysctl_tcp_mtu_probing,
 		.maxlen		= sizeof(int),
@@ -786,7 +775,7 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= sizeof(sysctl_udp_rmem_min),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero
+		.extra1		= &one
 	},
 	{
 		.procname	= "udp_wmem_min",
@@ -794,7 +783,7 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= sizeof(sysctl_udp_wmem_min),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero
+		.extra1		= &one
 	},
 	{ }
 };
@@ -850,6 +839,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= ipv4_ping_group_range,
 	},
 	{
+		.procname	= "tcp_ecn",
+		.data		= &init_net.ipv4.sysctl_tcp_ecn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
 		.procname	= "tcp_mem",
 		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_mem),
 		.mode		= 0644,
@@ -882,6 +878,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 			&net->ipv4.sysctl_icmp_ratemask;
 		table[6].data =
 			&net->ipv4.sysctl_ping_group_range;
+		table[7].data =
+			&net->ipv4.sysctl_tcp_ecn;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ca253635f7..7a5ba48c2cc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
 	tcp_enable_early_retrans(tp);
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
+	tp->tsoffset = 0;
+
 	sk->sk_state = TCP_CLOSE;
 
 	sk->sk_write_space = sk_stream_write_space;
@@ -895,6 +897,7 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;
@@ -1428,12 +1431,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
 }
 #endif
 
-static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
 	u32 offset;
 
-	skb_queue_walk(&sk->sk_receive_queue, skb) {
+	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
 		offset = seq - TCP_SKB_CB(skb)->seq;
 		if (tcp_hdr(skb)->syn)
 			offset--;
@@ -1441,6 +1444,11 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 			*off = offset;
 			return skb;
 		}
+		/* This looks weird, but this can happen if TCP collapsing
+		 * splitted a fat GRO packet, while we released socket lock
+		 * in skb_splice_bits()
+		 */
+		sk_eat_skb(sk, skb, false);
 	}
 	return NULL;
 }
@@ -1482,7 +1490,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 					break;
 			}
 			used = recv_actor(desc, skb, offset, len);
-			if (used < 0) {
+			if (used <= 0) {
 				if (!copied)
 					copied = used;
 				break;
@@ -1520,8 +1528,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	tcp_rcv_space_adjust(sk);
 
 	/* Clean up data we have read: This will do ACK frames. */
-	if (copied > 0)
+	if (copied > 0) {
+		tcp_recv_skb(sk, seq, &offset);
 		tcp_cleanup_rbuf(sk, copied);
+	}
 	return copied;
 }
 EXPORT_SYMBOL(tcp_read_sock);
@@ -2280,7 +2290,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	tp->window_clamp = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
@@ -2704,6 +2713,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			err = -EINVAL;
 		break;
+	case TCP_TIMESTAMP:
+		if (!tp->repair)
+			err = -EPERM;
+		else
+			tp->tsoffset = val - tcp_time_stamp;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2952,6 +2967,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_USER_TIMEOUT:
 		val = jiffies_to_msecs(icsk->icsk_user_timeout);
 		break;
+	case TCP_TIMESTAMP:
+		val = tcp_time_stamp + tp->tsoffset;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -3025,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_GRE |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
@@ -3236,7 +3255,7 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
 		struct crypto_hash *hash;
 
 		hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-		if (!hash || IS_ERR(hash))
+		if (IS_ERR_OR_NULL(hash))
 			goto out_free;
 
 		per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 291f2ed7cc3..019c2389a34 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -310,35 +310,24 @@ void tcp_slow_start(struct tcp_sock *tp)
 {
 	int cnt; /* increase in packets */
 	unsigned int delta = 0;
+	u32 snd_cwnd = tp->snd_cwnd;
 
-	/* RFC3465: ABC Slow start
-	 * Increase only after a full MSS of bytes is acked
-	 *
-	 * TCP sender SHOULD increase cwnd by the number of
-	 * previously unacknowledged bytes ACKed by each incoming
-	 * acknowledgment, provided the increase is not more than L
-	 */
-	if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
-		return;
+	if (unlikely(!snd_cwnd)) {
+		pr_err_once("snd_cwnd is nul, please report this bug.\n");
+		snd_cwnd = 1U;
+	}
 
 	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
 		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
 	else
-		cnt = tp->snd_cwnd;			/* exponential increase */
-
-	/* RFC3465: ABC
-	 * We MAY increase by 2 if discovered delayed ack
-	 */
-	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
-		cnt <<= 1;
-	tp->bytes_acked = 0;
+		cnt = snd_cwnd;				/* exponential increase */
 
 	tp->snd_cwnd_cnt += cnt;
-	while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-		tp->snd_cwnd_cnt -= tp->snd_cwnd;
+	while (tp->snd_cwnd_cnt >= snd_cwnd) {
+		tp->snd_cwnd_cnt -= snd_cwnd;
 		delta++;
 	}
-	tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp);
+	tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp);
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
@@ -372,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 	/* In "safe" area, increase. */
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
-
 	/* In dangerous area, increase slowly. */
-	else if (sysctl_tcp_abc) {
-		/* RFC3465: Appropriate Byte Count
-		 * increase once for each full cwnd acked
-		 */
-		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
-			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
-	} else {
+	else
 		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
-	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a13692560e6..a759e19496d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,8 +81,6 @@ int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
 EXPORT_SYMBOL(sysctl_tcp_reordering);
-int sysctl_tcp_ecn __read_mostly = 2;
-EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -100,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_abc __read_mostly;
 int sysctl_tcp_early_retrans __read_mostly = 2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
@@ -2009,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->frto_counter = 0;
-	tp->bytes_acked = 0;
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
 			       sysctl_tcp_reordering);
@@ -2058,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
-	tp->bytes_acked = 0;
 	tcp_clear_retrans_partial(tp);
 
 	if (tcp_is_reno(tp))
@@ -2686,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->high_seq = tp->snd_nxt;
-	tp->bytes_acked = 0;
 	tp->snd_cwnd_cnt = 0;
 	tp->prior_cwnd = tp->snd_cwnd;
 	tp->prr_delivered = 0;
@@ -2737,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->prior_ssthresh = 0;
-	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
 		tcp_init_cwnd_reduction(sk, set_ssthresh);
@@ -3419,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	TCP_ECN_queue_cwr(tp);
 	tcp_moderate_cwnd(tp);
 }
@@ -3504,6 +3496,11 @@ static bool tcp_process_frto(struct sock *sk, int flag)
 		}
 	} else {
 		if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+			if (!tcp_packets_in_flight(tp)) {
+				tcp_enter_frto_loss(sk, 2, flag);
+				return true;
+			}
+
 			/* Prevent sending of new data. */
 			tp->snd_cwnd = min(tp->snd_cwnd,
 					   tcp_packets_in_flight(tp));
@@ -3610,15 +3607,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, prior_snd_una))
 		flag |= FLAG_SND_UNA_ADVANCED;
 
-	if (sysctl_tcp_abc) {
-		if (icsk->icsk_ca_state < TCP_CA_CWR)
-			tp->bytes_acked += ack - prior_snd_una;
-		else if (icsk->icsk_ca_state == TCP_CA_Loss)
-			/* we assume just one segment left network */
-			tp->bytes_acked += min(ack - prior_snd_una,
-					       tp->mss_cache);
-	}
-
 	prior_fackets = tp->fackets_out;
 	prior_in_flight = tcp_packets_in_flight(tp);
 
@@ -3872,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
 		++ptr;
 		tp->rx_opt.rcv_tsval = ntohl(*ptr);
 		++ptr;
-		tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+		tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
 		return true;
 	}
 	return false;
@@ -3896,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return true;
 	}
+
 	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+	if (tp->rx_opt.saw_tstamp)
+		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
+
 	return true;
 }
 
@@ -5543,6 +5535,9 @@ slow_path:
 	if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
 		goto csum_error;
 
+	if (!th->ack && !th->rst)
+		goto discard;
+
 	/*
 	 *	Standard slow path.
 	 */
@@ -5551,7 +5546,7 @@ slow_path:
 		return 0;
 
 step5:
-	if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
+	if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
 		goto discard;
 
 	/* ts_recent update must be made after we are sure that the packet
@@ -5646,8 +5641,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	 * the remote receives only the retransmitted (regular) SYNs: either
 	 * the original SYN-data or the corresponding SYN-ACK is lost.
 	 */
-	syn_drop = (cookie->len <= 0 && data &&
-		    inet_csk(sk)->icsk_retransmits);
+	syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
 
 	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
 
@@ -5675,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
 	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
+	if (tp->rx_opt.saw_tstamp)
+		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
 	if (th->ack) {
 		/* rfc793:
@@ -5984,11 +5980,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
 			goto discard;
 	}
+
+	if (!th->ack && !th->rst)
+		goto discard;
+
 	if (!tcp_validate_incoming(sk, skb, th, 0))
 		return 0;
 
 	/* step 5: check the ACK field */
-	if (th->ack) {
+	if (true) {
 		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
 
 		switch (sk->sk_state) {
@@ -6138,8 +6138,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			}
 			break;
 		}
-	} else
-		goto discard;
+	}
 
 	/* ts_recent update must be made after we are sure that the packet
 	 * is in window.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 54139fa514e..77f5050efc8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -369,11 +369,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	 * We do take care of PMTU discovery (RFC1191) special case :
 	 * we can receive locally generated ICMP messages while socket is held.
 	 */
-	if (sock_owned_by_user(sk) &&
-	    type != ICMP_DEST_UNREACH &&
-	    code != ICMP_FRAG_NEEDED)
-		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
-
+	if (sock_owned_by_user(sk)) {
+		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
+			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+	}
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
@@ -497,6 +496,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		 * errors returned from accept().
 		 */
 		inet_csk_reqsk_queue_drop(sk, req, prev);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
 	case TCP_SYN_SENT:
@@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
-					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     &tcp_hashinfo, ip_hdr(skb)->saddr,
+					     th->source, ip_hdr(skb)->daddr,
 					     ntohs(th->source), inet_iif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
@@ -725,7 +726,7 @@ release_sk1:
  */
 
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
-			    u32 win, u32 ts, int oif,
+			    u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key,
 			    int reply_flags, u8 tos)
 {
@@ -746,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 
 	arg.iov[0].iov_base = (unsigned char *)&rep;
 	arg.iov[0].iov_len  = sizeof(rep.th);
-	if (ts) {
+	if (tsecr) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				   (TCPOPT_TIMESTAMP << 8) |
 				   TCPOLEN_TIMESTAMP);
-		rep.opt[1] = htonl(tcp_time_stamp);
-		rep.opt[2] = htonl(ts);
+		rep.opt[1] = htonl(tsval);
+		rep.opt[2] = htonl(tsecr);
 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 	}
 
@@ -766,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (key) {
-		int offset = (ts) ? 3 : 0;
+		int offset = (tsecr) ? 3 : 0;
 
 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 					  (TCPOPT_NOP << 16) |
@@ -801,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+			tcp_time_stamp + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
 			tcp_twsk_md5_key(tcptw),
@@ -820,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+			tcp_time_stamp,
 			req->ts_recent,
 			0,
 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1501,8 +1504,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * clogging syn queue with openreqs with exponentially increasing
 	 * timeout.
 	 */
-	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 		goto drop;
+	}
 
 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
 	if (!req)
@@ -1568,7 +1573,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	if (!want_cookie || tmp_opt.tstamp_ok)
-		TCP_ECN_create_request(req, skb);
+		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	if (want_cookie) {
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -1667,6 +1672,7 @@ drop_and_release:
 drop_and_free:
 	reqsk_free(req);
 drop:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return 0;
 }
 EXPORT_SYMBOL(tcp_v4_conn_request);
@@ -2074,6 +2080,7 @@ do_time_wait:
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
 							&tcp_hashinfo,
+							iph->saddr, th->source,
 							iph->daddr, th->dest,
 							inet_iif(skb));
 		if (sk2) {
@@ -2888,6 +2895,7 @@ EXPORT_SYMBOL(tcp_prot);
 
 static int __net_init tcp_sk_init(struct net *net)
 {
+	net->ipv4.sysctl_tcp_ecn = 2;
 	return 0;
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f35f2dfb640..b83a49cc381 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
 
 		if (tmp_opt.saw_tstamp) {
+			tmp_opt.rcv_tsecr	-= tcptw->tw_ts_offset;
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
 			tmp_opt.ts_recent_stamp	= tcptw->tw_ts_recent_stamp;
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
@@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+		tcptw->tw_ts_offset	= tp->tsoffset;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
@@ -446,7 +448,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
-		newtp->bytes_acked = 0;
 
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
@@ -500,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
+		newtp->tsoffset = 0;
 #ifdef CONFIG_TCP_MD5SIG
 		newtp->md5sig_info = NULL;	/*XXX*/
 		if (newtp->af_specific->md5_lookup(sk, newsk))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5d451593ef1..fd0cea114b5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -314,7 +314,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->ecn_flags = 0;
-	if (sysctl_tcp_ecn == 1) {
+	if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) {
 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
@@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 
 	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
 		opts->options |= OPTION_TS;
-		opts->tsval = TCP_SKB_CB(skb)->when;
+		opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
@@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 
 	if (likely(tp->rx_opt.tstamp_ok)) {
 		opts->options |= OPTION_TS;
-		opts->tsval = tcb ? tcb->when : 0;
+		opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
@@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 /* Remove acked data from a packet in the transmit queue. */
 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 {
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return -ENOMEM;
 
 	__pskb_trim_head(skb, len);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 79c8dbe59b5..39a5e7a9a77 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -139,6 +139,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 {
 	struct sock *sk2;
 	struct hlist_nulls_node *node;
+	kuid_t uid = sock_i_uid(sk);
 
 	sk_nulls_for_each(sk2, node, &hslot->head)
 		if (net_eq(sock_net(sk2), net) &&
@@ -147,6 +148,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 		    (!sk2->sk_reuse || !sk->sk_reuse) &&
 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
 		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+		    (!sk2->sk_reuseport || !sk->sk_reuseport ||
+		      !uid_eq(uid, sock_i_uid(sk2))) &&
 		    (*saddr_comp)(sk, sk2)) {
 			if (bitmap)
 				__set_bit(udp_sk(sk2)->udp_port_hash >> log,
@@ -169,6 +172,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
 {
 	struct sock *sk2;
 	struct hlist_nulls_node *node;
+	kuid_t uid = sock_i_uid(sk);
 	int res = 0;
 
 	spin_lock(&hslot2->lock);
@@ -179,6 +183,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
 		    (!sk2->sk_reuse || !sk->sk_reuse) &&
 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
 		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+		    (!sk2->sk_reuseport || !sk->sk_reuseport ||
+		      !uid_eq(uid, sock_i_uid(sk2))) &&
 		    (*saddr_comp)(sk, sk2)) {
 			res = 1;
 			break;
@@ -337,26 +343,26 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
 			!ipv6_only_sock(sk)) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		score = (sk->sk_family == PF_INET ? 1 : 0);
+		score = (sk->sk_family == PF_INET ? 2 : 1);
 		if (inet->inet_rcv_saddr) {
 			if (inet->inet_rcv_saddr != daddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (inet->inet_daddr) {
 			if (inet->inet_daddr != saddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (inet->inet_dport) {
 			if (inet->inet_dport != sport)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -365,7 +371,6 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
 /*
  * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
  */
-#define SCORE2_MAX (1 + 2 + 2 + 2)
 static inline int compute_score2(struct sock *sk, struct net *net,
 				 __be32 saddr, __be16 sport,
 				 __be32 daddr, unsigned int hnum, int dif)
@@ -380,21 +385,21 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 		if (inet->inet_num != hnum)
 			return -1;
 
-		score = (sk->sk_family == PF_INET ? 1 : 0);
+		score = (sk->sk_family == PF_INET ? 2 : 1);
 		if (inet->inet_daddr) {
 			if (inet->inet_daddr != saddr)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (inet->inet_dport) {
 			if (inet->inet_dport != sport)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 		if (sk->sk_bound_dev_if) {
 			if (sk->sk_bound_dev_if != dif)
 				return -1;
-			score += 2;
+			score += 4;
 		}
 	}
 	return score;
@@ -409,19 +414,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 begin:
 	result = NULL;
-	badness = -1;
+	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
 		score = compute_score2(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			result = sk;
 			badness = score;
-			if (score == SCORE2_MAX)
-				goto exact_match;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet_ehashfn(net, daddr, hnum,
+						    saddr, htons(sport));
+				matches = 1;
+			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -431,9 +446,7 @@ begin:
 	 */
 	if (get_nulls_value(node) != slot2)
 		goto begin;
-
 	if (result) {
-exact_match:
 		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score2(result, net, saddr, sport,
@@ -457,7 +470,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 	rcu_read_lock();
 	if (hslot->count > 10) {
@@ -486,13 +500,24 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	}
 begin:
 	result = NULL;
-	badness = -1;
+	badness = 0;
 	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
 		score = compute_score(sk, net, saddr, hnum, sport,
 				      daddr, dport, dif);
 		if (score > badness) {
 			result = sk;
 			badness = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet_ehashfn(net, daddr, hnum,
+						    saddr, htons(sport));
+				matches = 1;
+			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -971,7 +996,7 @@ back_from_confirm:
 				  sizeof(struct udphdr), &ipc, &rt,
 				  msg->msg_flags);
 		err = PTR_ERR(skb);
-		if (skb && !IS_ERR(skb))
+		if (!IS_ERR_OR_NULL(skb))
 			err = udp_send_skb(skb, fl4);
 		goto out;
 	}
@@ -1952,6 +1977,7 @@ struct proto udp_prot = {
 	.recvmsg	   = udp_recvmsg,
 	.sendpage	   = udp_sendpage,
 	.backlog_rcv	   = __udp_queue_rcv_skb,
+	.release_cb	   = ip4_datagram_release_cb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.rehash		   = udp_v4_rehash,
@@ -2279,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 06814b6216d..1f12c8b4586 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * header and optional ESP marker bytes) and then modify the
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto drop;
 
 	/* Now we can update and verify the packet length... */
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ddee0a099a2..fe5189e2e11 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -142,8 +142,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	for_each_input_rcu(rcv_notify_handlers, handler)
 		handler->handler(skb);
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3be0ac2c192..9a459be24af 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -262,21 +262,56 @@ static struct ctl_table xfrm4_policy_table[] = {
 	{ }
 };
 
-static struct ctl_table_header *sysctl_hdr;
-#endif
-
-static void __init xfrm4_policy_init(void)
+static int __net_init xfrm4_net_init(struct net *net)
 {
-	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = xfrm4_policy_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+
+		table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh;
+	}
+
+	hdr = register_net_sysctl(net, "net/ipv4", table);
+	if (!hdr)
+		goto err_reg;
+
+	net->ipv4.xfrm4_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
 }
 
-static void __exit xfrm4_policy_fini(void)
+static void __net_exit xfrm4_net_exit(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	if (sysctl_hdr)
-		unregister_net_sysctl_table(sysctl_hdr);
+	struct ctl_table *table;
+
+	if (net->ipv4.xfrm4_hdr == NULL)
+		return;
+
+	table = net->ipv4.xfrm4_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv4.xfrm4_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct pernet_operations __net_initdata xfrm4_net_ops = {
+	.init	= xfrm4_net_init,
+	.exit	= xfrm4_net_exit,
+};
 #endif
-	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
+
+static void __init xfrm4_policy_init(void)
+{
+	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
 }
 
 void __init xfrm4_init(void)
@@ -286,8 +321,7 @@ void __init xfrm4_init(void)
 	xfrm4_state_init();
 	xfrm4_policy_init();
 #ifdef CONFIG_SYSCTL
-	sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4",
-					 xfrm4_policy_table);
+	register_pernet_subsys(&xfrm4_net_ops);
 #endif
 }
 
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 4ea244891b5..309af19a0a0 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 
-obj-y += addrconf_core.o exthdrs_core.o
+obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o
 obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
 
 obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 408cac4ae00..86c235d05ab 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp)
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
 }
 
-#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
-#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
-#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
-
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -154,6 +150,11 @@ static void addrconf_type_change(struct net_device *dev,
 				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+						  int plen,
+						  const struct net_device *dev,
+						  u32 flags, u32 noflags);
+
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_timer(unsigned long data);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
@@ -243,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
 
 /* Check if a valid qdisc is available */
 static inline bool addrconf_qdisc_ok(const struct net_device *dev)
@@ -250,12 +254,6 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
 	return !qdisc_tx_is_noop(dev);
 }
 
-/* Check if a route is valid prefix route */
-static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
-{
-	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
-}
-
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 {
 	if (del_timer(&ifp->timer))
@@ -433,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	/* protected by rtnl_lock */
 	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
+	/* Join interface-local all-node multicast group */
+	ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
 	/* Join all-node multicast group */
 	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 
@@ -616,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev)
 	if (idev->cnf.forwarding)
 		dev_disable_lro(dev);
 	if (dev->flags & IFF_MULTICAST) {
-		if (idev->cnf.forwarding)
+		if (idev->cnf.forwarding) {
 			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
-		else
+			ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+		} else {
 			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+		}
 	}
 
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
@@ -941,17 +947,15 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
 		struct in6_addr prefix;
 		struct rt6_info *rt;
-		struct net *net = dev_net(ifp->idev->dev);
-		struct flowi6 fl6 = {};
 
 		ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
-		fl6.flowi6_oif = ifp->idev->dev->ifindex;
-		fl6.daddr = prefix;
-		rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
-							 RT6_LOOKUP_F_IFACE);
 
-		if (rt != net->ipv6.ip6_null_entry &&
-		    addrconf_is_prefix_route(rt)) {
+		rt = addrconf_get_prefix_route(&prefix,
+					       ifp->prefix_len,
+					       ifp->idev->dev,
+					       0, RTF_GATEWAY | RTF_DEFAULT);
+
+		if (rt) {
 			if (onlink == 0) {
 				ip6_del_rt(rt);
 				rt = NULL;
@@ -1054,7 +1058,7 @@ retry:
 		ipv6_add_addr(idev, &addr, tmp_plen,
 			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
 			      addr_flags) : NULL;
-	if (!ift || IS_ERR(ift)) {
+	if (IS_ERR_OR_NULL(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
 		pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1663,6 +1667,7 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
 	if (dev->addr_len != IEEE802154_ADDR_LEN)
 		return -1;
 	memcpy(eui, dev->dev_addr, 8);
+	eui[0] ^= 2;
 	return 0;
 }
 
@@ -1877,7 +1882,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 			continue;
 		if ((rt->rt6i_flags & flags) != flags)
 			continue;
-		if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0))
+		if ((rt->rt6i_flags & noflags) != 0)
 			continue;
 		dst_hold(&rt->dst);
 		break;
@@ -2082,7 +2087,7 @@ ok:
 						    addr_type&IPV6_ADDR_SCOPE_MASK,
 						    addr_flags);
 
-			if (!ifp || IS_ERR(ifp)) {
+			if (IS_ERR_OR_NULL(ifp)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b043c60429b..6b793bfc0e1 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -811,11 +811,10 @@ static struct pernet_operations inet6_net_ops = {
 
 static int __init inet6_init(void)
 {
-	struct sk_buff *dummy_skb;
 	struct list_head *r;
 	int err = 0;
 
-	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	/* Register the socket-side information for inet6_create.  */
 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ecc35b93314..bb02e176cb7 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -472,7 +472,10 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, hdr_len);
 	__skb_pull(skb, ah_hlen + hdr_len);
-	skb_set_transport_header(skb, -hdr_len);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
 out:
 	kfree(AH_SKB_CB(skb)->tmp);
 	xfrm_input_resume(skb, err);
@@ -518,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
@@ -593,9 +595,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, hdr_len);
-	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
+
 	err = nexthdr;
 
 out_free:
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 757a810d8f1..921b8b398a8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -47,7 +47,7 @@
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
 /* Big ac list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
 
 
 /*
@@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	err = ipv6_dev_ac_inc(dev, addr);
 	if (!err) {
-		write_lock_bh(&ipv6_sk_ac_lock);
+		spin_lock_bh(&ipv6_sk_ac_lock);
 		pac->acl_next = np->ipv6_ac_list;
 		np->ipv6_ac_list = pac;
-		write_unlock_bh(&ipv6_sk_ac_lock);
+		spin_unlock_bh(&ipv6_sk_ac_lock);
 		pac = NULL;
 	}
 
@@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	struct ipv6_ac_socklist *pac, *prev_pac;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_ac_lock);
+	spin_lock_bh(&ipv6_sk_ac_lock);
 	prev_pac = NULL;
 	for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
 		if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		prev_pac = pac;
 	}
 	if (!pac) {
-		write_unlock_bh(&ipv6_sk_ac_lock);
+		spin_unlock_bh(&ipv6_sk_ac_lock);
 		return -ENOENT;
 	}
 	if (prev_pac)
@@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	else
 		np->ipv6_ac_list = pac->acl_next;
 
-	write_unlock_bh(&ipv6_sk_ac_lock);
+	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
@@ -192,10 +192,10 @@ void ipv6_sock_ac_close(struct sock *sk)
 	if (!np->ipv6_ac_list)
 		return;
 
-	write_lock_bh(&ipv6_sk_ac_lock);
+	spin_lock_bh(&ipv6_sk_ac_lock);
 	pac = np->ipv6_ac_list;
 	np->ipv6_ac_list = NULL;
-	write_unlock_bh(&ipv6_sk_ac_lock);
+	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
 	rcu_read_lock();
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 8edf2601065..f5a54782a34 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -30,6 +30,7 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
+#include <net/dsfield.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -356,12 +357,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-			sin->sin6_addr =
-				*(struct in6_addr *)(nh + serr->addr_offset);
+			const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
+								  struct ipv6hdr, daddr);
+			sin->sin6_addr = ip6h->daddr;
 			if (np->sndflow)
-				sin->sin6_flowinfo =
-					(*(__be32 *)(nh + serr->addr_offset - 24) &
-					 IPV6_FLOWINFO_MASK);
+				sin->sin6_flowinfo = ip6_flowinfo(ip6h);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
@@ -380,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
-				datagram_recv_ctl(sk, msg, skb);
+				ip6_datagram_recv_ctl(sk, msg, skb);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
@@ -468,7 +468,8 @@ out:
 }
 
 
-int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+			  struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -488,13 +489,14 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = ipv6_tclass(ipv6_hdr(skb));
+		int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
-	if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
-		__be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
-		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+	if (np->rxopt.bits.rxflow) {
+		__be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh);
+		if (flowinfo)
+			put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
 	}
 
 	/* HbH is allowed only once */
@@ -597,11 +599,12 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
-int datagram_send_ctl(struct net *net, struct sock *sk,
-		      struct msghdr *msg, struct flowi6 *fl6,
-		      struct ipv6_txoptions *opt,
-		      int *hlimit, int *tclass, int *dontfrag)
+int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
+			  struct msghdr *msg, struct flowi6 *fl6,
+			  struct ipv6_txoptions *opt,
+			  int *hlimit, int *tclass, int *dontfrag)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -871,4 +874,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
 exit_f:
 	return err;
 }
-EXPORT_SYMBOL_GPL(datagram_send_ctl);
+EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 282f3723ee1..40ffd72243a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -300,7 +300,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
 
 	pskb_trim(skb, skb->len - alen - padlen - 2);
 	__skb_pull(skb, hlen);
-	skb_set_transport_header(skb, -hdr_len);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
 
 	err = nexthdr[1];
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 473f628f9f2..07a7d65a7cb 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 	const unsigned char *nh = skb_network_header(skb);
 
 	if (nh[optoff + 1] == 2) {
-		IP6CB(skb)->ra = optoff;
+		IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
 		return true;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4a9fd51dae..fff5bdd8b68 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -81,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net)
 	return net->ipv6.icmp_sk[smp_processor_id()];
 }
 
+static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		       u8 type, u8 code, int offset, __be32 info)
+{
+	struct net *net = dev_net(skb->dev);
+
+	if (type == ICMPV6_PKT_TOOBIG)
+		ip6_update_pmtu(skb, net, info, 0, 0);
+	else if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, 0, 0);
+}
+
 static int icmpv6_rcv(struct sk_buff *skb);
 
 static const struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
+	.err_handler	=	icmpv6_err,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 30647857a37..b386a2ce4c6 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -32,6 +32,9 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 {
 	const struct sock *sk2;
 	const struct hlist_node *node;
+	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
 
 	/* We must walk the whole port owner list in this case. -DaveM */
 	/*
@@ -42,11 +45,17 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 		if (sk != sk2 &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+			if ((!reuse || !sk2->sk_reuse ||
+			     sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			     (sk2->sk_state != TCP_TIME_WAIT &&
+			      !uid_eq(uid,
+				      sock_i_uid((struct sock *)sk2))))) {
+				if (ipv6_rcv_saddr_equal(sk, sk2))
+					break;
+			}
+		}
 	}
 
 	return node != NULL;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index dea17fd28e5..32b4a1675d8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net,
 }
 
 struct sock *inet6_lookup_listener(struct net *net,
-		struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
+		struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+		const __be16 sport, const struct in6_addr *daddr,
 		const unsigned short hnum, const int dif)
 {
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
 	struct sock *result;
-	int score, hiscore;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
 
 	rcu_read_lock();
 begin:
 	result = NULL;
-	hiscore = -1;
+	hiscore = 0;
 	sk_nulls_for_each(sk, node, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif);
 		if (score > hiscore) {
 			hiscore = score;
 			result = sk;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet6_ehashfn(net, daddr, hnum,
+						      saddr, sport);
+				matches = 1;
+			}
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
+				result = sk;
+			phash = next_pseudo_random32(phash);
 		}
 	}
 	/*
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
new file mode 100644
index 00000000000..72d198b8e4d
--- /dev/null
+++ b/net/ipv6/ip6_checksum.c
@@ -0,0 +1,97 @@
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <asm/checksum.h>
+
+#ifndef _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, unsigned short proto,
+			__wsum csum)
+{
+
+	int carry;
+	__u32 ulen;
+	__u32 uproto;
+	__u32 sum = (__force u32)csum;
+
+	sum += (__force u32)saddr->s6_addr32[0];
+	carry = (sum < (__force u32)saddr->s6_addr32[0]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[1];
+	carry = (sum < (__force u32)saddr->s6_addr32[1]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[2];
+	carry = (sum < (__force u32)saddr->s6_addr32[2]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[3];
+	carry = (sum < (__force u32)saddr->s6_addr32[3]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[0];
+	carry = (sum < (__force u32)daddr->s6_addr32[0]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[1];
+	carry = (sum < (__force u32)daddr->s6_addr32[1]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[2];
+	carry = (sum < (__force u32)daddr->s6_addr32[2]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[3];
+	carry = (sum < (__force u32)daddr->s6_addr32[3]);
+	sum += carry;
+
+	ulen = (__force u32)htonl((__u32) len);
+	sum += ulen;
+	carry = (sum < ulen);
+	sum += carry;
+
+	uproto = (__force u32)htonl(proto);
+	sum += uproto;
+	carry = (sum < uproto);
+	sum += carry;
+
+	return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif
+
+int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
+{
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
+	if (uh->check == 0) {
+		/* RFC 2460 section 8.1 says that we SHOULD log
+		   this error. Well, it is reasonable.
+		 */
+		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+		return 1;
+	}
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
+	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			     skb->len, proto, skb->csum))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (!skb_csum_unnecessary(skb))
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len, proto, 0));
+
+	return 0;
+}
+EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 29124b7a04c..a7da2f47247 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -51,25 +51,38 @@
 #define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
 
 static atomic_t fl_size = ATOMIC_INIT(0);
-static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
 
 static void ip6_fl_gc(unsigned long dummy);
 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
 
 /* FL hash table lock: it protects only of GC */
 
-static DEFINE_RWLOCK(ip6_fl_lock);
+static DEFINE_SPINLOCK(ip6_fl_lock);
 
 /* Big socket sock */
 
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
 
+#define for_each_fl_rcu(hash, fl)				\
+	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
+	     fl != NULL;					\
+	     fl = rcu_dereference_bh(fl->next))
+#define for_each_fl_continue_rcu(fl)				\
+	for (fl = rcu_dereference_bh(fl->next);			\
+	     fl != NULL;					\
+	     fl = rcu_dereference_bh(fl->next))
+
+#define for_each_sk_fl_rcu(np, sfl)				\
+	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
+	     sfl != NULL;					\
+	     sfl = rcu_dereference_bh(sfl->next))
 
 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
 
-	for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+	for_each_fl_rcu(FL_HASH(label), fl) {
 		if (fl->label == label && net_eq(fl->fl_net, net))
 			return fl;
 	}
@@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
 
-	read_lock_bh(&ip6_fl_lock);
+	rcu_read_lock_bh();
 	fl = __fl_lookup(net, label);
-	if (fl)
-		atomic_inc(&fl->users);
-	read_unlock_bh(&ip6_fl_lock);
+	if (fl && !atomic_inc_not_zero(&fl->users))
+		fl = NULL;
+	rcu_read_unlock_bh();
 	return fl;
 }
 
@@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl)
 			put_pid(fl->owner.pid);
 		release_net(fl->fl_net);
 		kfree(fl->opt);
+		kfree_rcu(fl, rcu);
 	}
-	kfree(fl);
 }
 
 static void fl_release(struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 
 	fl->lastuse = jiffies;
 	if (atomic_dec_and_test(&fl->users)) {
@@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl)
 		    time_after(ip6_fl_gc_timer.expires, ttd))
 			mod_timer(&ip6_fl_gc_timer, ttd);
 	}
-	write_unlock_bh(&ip6_fl_lock);
+	spin_unlock_bh(&ip6_fl_lock);
 }
 
 static void ip6_fl_gc(unsigned long dummy)
@@ -128,12 +141,13 @@ static void ip6_fl_gc(unsigned long dummy)
 	unsigned long now = jiffies;
 	unsigned long sched = 0;
 
-	write_lock(&ip6_fl_lock);
+	spin_lock(&ip6_fl_lock);
 
 	for (i=0; i<=FL_HASH_MASK; i++) {
 		struct ip6_flowlabel *fl, **flp;
 		flp = &fl_ht[i];
-		while ((fl=*flp) != NULL) {
+		while ((fl = rcu_dereference_protected(*flp,
+						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
 			if (atomic_read(&fl->users) == 0) {
 				unsigned long ttd = fl->lastuse + fl->linger;
 				if (time_after(ttd, fl->expires))
@@ -156,18 +170,19 @@ static void ip6_fl_gc(unsigned long dummy)
 	if (sched) {
 		mod_timer(&ip6_fl_gc_timer, sched);
 	}
-	write_unlock(&ip6_fl_lock);
+	spin_unlock(&ip6_fl_lock);
 }
 
 static void __net_exit ip6_fl_purge(struct net *net)
 {
 	int i;
 
-	write_lock(&ip6_fl_lock);
+	spin_lock(&ip6_fl_lock);
 	for (i = 0; i <= FL_HASH_MASK; i++) {
 		struct ip6_flowlabel *fl, **flp;
 		flp = &fl_ht[i];
-		while ((fl = *flp) != NULL) {
+		while ((fl = rcu_dereference_protected(*flp,
+						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
 			if (net_eq(fl->fl_net, net) &&
 			    atomic_read(&fl->users) == 0) {
 				*flp = fl->next;
@@ -178,7 +193,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
 			flp = &fl->next;
 		}
 	}
-	write_unlock(&ip6_fl_lock);
+	spin_unlock(&ip6_fl_lock);
 }
 
 static struct ip6_flowlabel *fl_intern(struct net *net,
@@ -188,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
 
 	fl->label = label & IPV6_FLOWLABEL_MASK;
 
-	write_lock_bh(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 	if (label == 0) {
 		for (;;) {
 			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
@@ -210,16 +225,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
 		lfl = __fl_lookup(net, fl->label);
 		if (lfl != NULL) {
 			atomic_inc(&lfl->users);
-			write_unlock_bh(&ip6_fl_lock);
+			spin_unlock_bh(&ip6_fl_lock);
 			return lfl;
 		}
 	}
 
 	fl->lastuse = jiffies;
 	fl->next = fl_ht[FL_HASH(fl->label)];
-	fl_ht[FL_HASH(fl->label)] = fl;
+	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
 	atomic_inc(&fl_size);
-	write_unlock_bh(&ip6_fl_lock);
+	spin_unlock_bh(&ip6_fl_lock);
 	return NULL;
 }
 
@@ -234,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 
 	label &= IPV6_FLOWLABEL_MASK;
 
-	read_lock_bh(&ip6_sk_fl_lock);
-	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+	rcu_read_lock_bh();
+	for_each_sk_fl_rcu(np, sfl) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 			return fl;
 		}
 	}
-	read_unlock_bh(&ip6_sk_fl_lock);
+	rcu_read_unlock_bh();
 	return NULL;
 }
 
@@ -255,11 +270,21 @@ void fl6_free_socklist(struct sock *sk)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_fl_socklist *sfl;
 
-	while ((sfl = np->ipv6_fl_list) != NULL) {
+	if (!rcu_access_pointer(np->ipv6_fl_list))
+		return;
+
+	spin_lock_bh(&ip6_sk_fl_lock);
+	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
 		np->ipv6_fl_list = sfl->next;
+		spin_unlock_bh(&ip6_sk_fl_lock);
+
 		fl_release(sfl->fl);
-		kfree(sfl);
+		kfree_rcu(sfl, rcu);
+
+		spin_lock_bh(&ip6_sk_fl_lock);
 	}
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 /* Service routines */
@@ -365,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		msg.msg_control = (void*)(fl->opt+1);
 		memset(&flowi6, 0, sizeof(flowi6));
 
-		err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
-					&junk, &junk);
+		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
+					    &junk, &junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
@@ -424,7 +449,7 @@ static int mem_check(struct sock *sk)
 	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 		return 0;
 
-	for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+	for_each_sk_fl_rcu(np, sfl)
 		count++;
 
 	if (room <= 0 ||
@@ -467,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
 		struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_sk_fl_lock);
+	spin_lock_bh(&ip6_sk_fl_lock);
 	sfl->fl = fl;
 	sfl->next = np->ipv6_fl_list;
-	np->ipv6_fl_list = sfl;
-	write_unlock_bh(&ip6_sk_fl_lock);
+	rcu_assign_pointer(np->ipv6_fl_list, sfl);
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
@@ -493,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 	switch (freq.flr_action) {
 	case IPV6_FL_A_PUT:
-		write_lock_bh(&ip6_sk_fl_lock);
-		for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+		spin_lock_bh(&ip6_sk_fl_lock);
+		for (sflp = &np->ipv6_fl_list;
+		     (sfl = rcu_dereference(*sflp))!=NULL;
+		     sflp = &sfl->next) {
 			if (sfl->fl->label == freq.flr_label) {
 				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-				*sflp = sfl->next;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				*sflp = rcu_dereference(sfl->next);
+				spin_unlock_bh(&ip6_sk_fl_lock);
 				fl_release(sfl->fl);
-				kfree(sfl);
+				kfree_rcu(sfl, rcu);
 				return 0;
 			}
 		}
-		write_unlock_bh(&ip6_sk_fl_lock);
+		spin_unlock_bh(&ip6_sk_fl_lock);
 		return -ESRCH;
 
 	case IPV6_FL_A_RENEW:
-		read_lock_bh(&ip6_sk_fl_lock);
-		for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+		rcu_read_lock_bh();
+		for_each_sk_fl_rcu(np, sfl) {
 			if (sfl->fl->label == freq.flr_label) {
 				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
-				read_unlock_bh(&ip6_sk_fl_lock);
+				rcu_read_unlock_bh();
 				return err;
 			}
 		}
-		read_unlock_bh(&ip6_sk_fl_lock);
+		rcu_read_unlock_bh();
 
 		if (freq.flr_share == IPV6_FL_S_NONE &&
 		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
@@ -541,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 		if (freq.flr_label) {
 			err = -EEXIST;
-			read_lock_bh(&ip6_sk_fl_lock);
-			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+			rcu_read_lock_bh();
+			for_each_sk_fl_rcu(np, sfl) {
 				if (sfl->fl->label == freq.flr_label) {
 					if (freq.flr_flags&IPV6_FL_F_EXCL) {
-						read_unlock_bh(&ip6_sk_fl_lock);
+						rcu_read_unlock_bh();
 						goto done;
 					}
 					fl1 = sfl->fl;
@@ -553,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					break;
 				}
 			}
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 
 			if (fl1 == NULL)
 				fl1 = fl_lookup(net, freq.flr_label);
@@ -641,13 +668,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
-		fl = fl_ht[state->bucket];
-
-		while (fl && !net_eq(fl->fl_net, net))
-			fl = fl->next;
-		if (fl)
-			break;
+		for_each_fl_rcu(state->bucket, fl) {
+			if (net_eq(fl->fl_net, net))
+				goto out;
+		}
 	}
+	fl = NULL;
+out:
 	return fl;
 }
 
@@ -656,18 +683,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
 	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 	struct net *net = seq_file_net(seq);
 
-	fl = fl->next;
+	for_each_fl_continue_rcu(fl) {
+		if (net_eq(fl->fl_net, net))
+			goto out;
+	}
+
 try_again:
-	while (fl && !net_eq(fl->fl_net, net))
-		fl = fl->next;
-
-	while (!fl) {
-		if (++state->bucket <= FL_HASH_MASK) {
-			fl = fl_ht[state->bucket];
-			goto try_again;
-		} else
-			break;
+	if (++state->bucket <= FL_HASH_MASK) {
+		for_each_fl_rcu(state->bucket, fl) {
+			if (net_eq(fl->fl_net, net))
+				goto out;
+		}
+		goto try_again;
 	}
+	fl = NULL;
+
+out:
 	return fl;
 }
 
@@ -681,9 +712,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(ip6_fl_lock)
+	__acquires(RCU)
 {
-	read_lock_bh(&ip6_fl_lock);
+	rcu_read_lock_bh();
 	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -700,9 +731,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
-	__releases(ip6_fl_lock)
+	__releases(RCU)
 {
-	read_unlock_bh(&ip6_fl_lock);
+	rcu_read_unlock_bh();
 }
 
 static int ip6fl_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 867466c96aa..e4efffe2522 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -758,8 +758,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 		skb_dst_set_noref(skb, dst);
 	}
 
-	skb->transport_header = skb->network_header;
-
 	proto = NEXTHDR_GRE;
 	if (encap_limit >= 0) {
 		init_tel_txopt(&opt, encap_limit);
@@ -768,14 +766,13 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, sizeof(*ipv6h));
 
 	/*
 	 *	Push down and install the IP header.
 	 */
 	ipv6h = ipv6_hdr(skb);
-	*(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
-	dsfield = INET_ECN_encapsulate(0, dsfield);
-	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
 	ipv6h->hop_limit = tunnel->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
@@ -961,7 +958,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
 	int ret;
 
 	if (!ip6_tnl_xmit_ctl(t))
-		return -1;
+		goto tx_err;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -1241,7 +1238,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
 	__be16 *p = (__be16 *)(ipv6h+1);
 
-	*(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
+	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a52d864d562..5b10414e619 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -118,6 +118,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	    ipv6_addr_loopback(&hdr->daddr))
 		goto err;
 
+	/* RFC4291 2.7
+	 * Nodes must not originate a packet to a multicast address whose scope
+	 * field contains the reserved value 0; if such a packet is received, it
+	 * must be silently dropped.
+	 */
+	if (ipv6_addr_is_multicast(&hdr->daddr) &&
+	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
+		goto err;
+
 	/*
 	 * RFC4291 2.7
 	 * Multicast addresses must not be used as source addresses in IPv6
@@ -212,7 +221,7 @@ resubmit:
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
 			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
 			    &hdr->saddr) &&
-			    !ipv6_is_mld(skb, nexthdr))
+			    !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
 				goto discard;
 		}
 		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
@@ -280,10 +289,8 @@ int ip6_mc_input(struct sk_buff *skb)
 		struct inet6_skb_parm *opt = IP6CB(skb);
 
 		/* Check for MLD */
-		if (unlikely(opt->ra)) {
+		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 			/* Check if this is a mld message */
-			u8 *ptr = skb_network_header(skb) + opt->ra;
-			struct icmp6hdr *icmp6;
 			u8 nexthdr = hdr->nexthdr;
 			__be16 frag_off;
 			int offset;
@@ -291,7 +298,7 @@ int ip6_mc_input(struct sk_buff *skb)
 			/* Check if the value of Router Alert
 			 * is for MLD (0x0000).
 			 */
-			if ((ptr[2] | ptr[3]) == 0) {
+			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
 				deliver = false;
 
 				if (!ipv6_ext_hdr(nexthdr)) {
@@ -303,24 +310,10 @@ int ip6_mc_input(struct sk_buff *skb)
 				if (offset < 0)
 					goto out;
 
-				if (nexthdr != IPPROTO_ICMPV6)
+				if (!ipv6_is_mld(skb, nexthdr, offset))
 					goto out;
 
-				if (!pskb_may_pull(skb, (skb_network_header(skb) +
-						   offset + 1 - skb->data)))
-					goto out;
-
-				icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
-
-				switch (icmp6->icmp6_type) {
-				case ICMPV6_MGM_QUERY:
-				case ICMPV6_MGM_REPORT:
-				case ICMPV6_MGM_REDUCTION:
-				case ICMPV6_MLD2_REPORT:
-					deliver = true;
-					break;
-				}
-				goto out;
+				deliver = true;
 			}
 			/* unknown RA - process it normally */
 		}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da7f09..8234c1dcdf7 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		     ~(SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       SKB_GSO_TCPV6 |
 		       0)))
 		goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5552d13ae92..155eccfa776 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,8 +56,6 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 
-int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-
 int __ip6_local_out(struct sk_buff *skb)
 {
 	int len;
@@ -88,7 +86,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
 	struct neighbour *neigh;
-	struct rt6_info *rt;
+	struct in6_addr *nexthop;
+	int ret;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
@@ -121,12 +120,26 @@ static int ip6_finish_output2(struct sk_buff *skb)
 
 		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 				skb->len);
+
+		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
+		    IPV6_ADDR_SCOPE_NODELOCAL &&
+		    !(dev->flags & IFF_LOOPBACK)) {
+			kfree_skb(skb);
+			return 0;
+		}
 	}
 
-	rt = (struct rt6_info *) dst;
-	neigh = rt->n;
-	if (neigh)
-		return dst_neigh_output(dst, neigh, skb);
+	rcu_read_lock_bh();
+	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+	if (unlikely(!neigh))
+		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+	if (!IS_ERR(neigh)) {
+		ret = dst_neigh_output(dst, neigh, skb);
+		rcu_read_unlock_bh();
+		return ret;
+	}
+	rcu_read_unlock_bh();
 
 	IP6_INC_STATS_BH(dev_net(dst->dev),
 			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -216,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
-	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
+	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -236,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 			       dst->dev, dst_output);
 	}
 
-	net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
@@ -246,39 +258,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 
 EXPORT_SYMBOL(ip6_xmit);
 
-/*
- *	To avoid extra problems ND packets are send through this
- *	routine. It's code duplication but I really want to avoid
- *	extra checks since ipv6_build_header is used by TCP (which
- *	is for us performance critical)
- */
-
-int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
-	       const struct in6_addr *saddr, const struct in6_addr *daddr,
-	       int proto, int len)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct ipv6hdr *hdr;
-
-	skb->protocol = htons(ETH_P_IPV6);
-	skb->dev = dev;
-
-	skb_reset_network_header(skb);
-	skb_put(skb, sizeof(struct ipv6hdr));
-	hdr = ipv6_hdr(skb);
-
-	*(__be32*)hdr = htonl(0x60000000);
-
-	hdr->payload_len = htons(len);
-	hdr->nexthdr = proto;
-	hdr->hop_limit = np->hop_limit;
-
-	hdr->saddr = *saddr;
-	hdr->daddr = *daddr;
-
-	return 0;
-}
-
 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 {
 	struct ip6_ra_chain *ra;
@@ -913,8 +892,12 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	 * dst entry of the nexthop router
 	 */
 	rt = (struct rt6_info *) *dst;
-	n = rt->n;
-	if (n && !(n->nud_state & NUD_VALID)) {
+	rcu_read_lock_bh();
+	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
+	rcu_read_unlock_bh();
+
+	if (err) {
 		struct inet6_ifaddr *ifp;
 		struct flowi6 fl_gw6;
 		int redirect;
@@ -1213,10 +1196,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		if (dst_allfrag(rt->dst.path))
 			cork->flags |= IPCORK_ALLFRAG;
 		cork->length = 0;
-		exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
+		exthdrlen = (opt ? opt->opt_flen : 0);
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
-		dst_exthdrlen = rt->dst.header_len;
+		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
 	} else {
 		rt = (struct rt6_info *)cork->dst;
 		fl6 = &inet->cork.fl.u.ip6;
@@ -1548,9 +1531,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	*(__be32*)hdr = fl6->flowlabel |
-		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
-
+	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a14f28b280f..fff83cbc197 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1030,9 +1030,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	*(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
-	dsfield = INET_ECN_encapsulate(0, dsfield);
-	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 26dcdec9e3a..351ce98e90d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 	return NULL;
 }
 
+/* Look for a (*,*,oif) entry */
+static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
+						      mifi_t mifi)
+{
+	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
+	struct mfc6_cache *c;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+		if (ipv6_addr_any(&c->mf6c_origin) &&
+		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
+		    (c->mfc_un.res.ttls[mifi] < 255))
+			return c;
+
+	return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+					       struct in6_addr *mcastgrp,
+					       mifi_t mifi)
+{
+	int line = MFC6_HASH(mcastgrp, &in6addr_any);
+	struct mfc6_cache *c, *proxy;
+
+	if (ipv6_addr_any(mcastgrp))
+		goto skip;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+		if (ipv6_addr_any(&c->mf6c_origin) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
+			if (c->mfc_un.res.ttls[mifi] < 255)
+				return c;
+
+			/* It's ok if the mifi is part of the static tree */
+			proxy = ip6mr_cache_find_any_parent(mrt,
+							    c->mf6c_parent);
+			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
+				return c;
+		}
+
+skip:
+	return ip6mr_cache_find_any_parent(mrt, mifi);
+}
+
 /*
  *	Allocate a multicast cache entry
  */
@@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+			    int parent)
 {
 	int line;
 	struct mfc6_cache *c, *next;
@@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
 
 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+		    ipv6_addr_equal(&c->mf6c_mcastgrp,
+				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
+		    (parent == -1 || parent == c->mf6c_parent)) {
 			write_lock_bh(&mrt_lock);
 			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
@@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void)
 }
 
 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
-			 struct mf6cctl *mfc, int mrtsock)
+			 struct mf6cctl *mfc, int mrtsock, int parent)
 {
 	bool found = false;
 	int line;
@@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+		    ipv6_addr_equal(&c->mf6c_mcastgrp,
+				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
+		    (parent == -1 || parent == mfc->mf6cc_parent)) {
 			found = true;
 			break;
 		}
@@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 		return 0;
 	}
 
-	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
+	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
 	c = ip6mr_cache_alloc();
@@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 
 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
-	int ret;
+	int ret, parent = 0;
 	struct mif6ctl vif;
 	struct mf6cctl mfc;
 	mifi_t mifi;
@@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	 */
 	case MRT6_ADD_MFC:
 	case MRT6_DEL_MFC:
+		parent = -1;
+	case MRT6_ADD_MFC_PROXY:
+	case MRT6_DEL_MFC_PROXY:
 		if (optlen < sizeof(mfc))
 			return -EINVAL;
 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
 			return -EFAULT;
+		if (parent == 0)
+			parent = mfc.mf6cc_parent;
 		rtnl_lock();
-		if (optname == MRT6_DEL_MFC)
-			ret = ip6mr_mfc_delete(mrt, &mfc);
+		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
+			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
 		else
-			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
+			ret = ip6mr_mfc_add(net, mrt, &mfc,
+					    sk == mrt->mroute6_sk, parent);
 		rtnl_unlock();
 		return ret;
 
@@ -1710,6 +1766,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			return -EINVAL;
 		if (get_user(v, (u32 __user *)optval))
 			return -EFAULT;
+		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
+		if (v != RT_TABLE_DEFAULT && v >= 100000000)
+			return -EINVAL;
 		if (sk == mrt->mroute6_sk)
 			return -EBUSY;
 
@@ -2015,19 +2074,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 {
 	int psend = -1;
 	int vif, ct;
+	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
 	cache->mfc_un.res.bytes += skb->len;
 
+	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+		struct mfc6_cache *cache_proxy;
+
+		/* For an (*,G) entry, we only check that the incomming
+		 * interface is part of the static tree.
+		 */
+		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+		if (cache_proxy &&
+		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+			goto forward;
+	}
+
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
 	if (mrt->vif6_table[vif].dev != skb->dev) {
-		int true_vifi;
-
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2045,14 +2114,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 		goto dont_forward;
 	}
 
+forward:
 	mrt->vif6_table[vif].pkt_in++;
 	mrt->vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
 	 */
+	if (ipv6_addr_any(&cache->mf6c_origin) &&
+	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+		if (true_vifi >= 0 &&
+		    true_vifi != cache->mf6c_parent &&
+		    ipv6_hdr(skb)->hop_limit >
+				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+			/* It's an (*,*) entry and the packet is not coming from
+			 * the upstream: forward the packet to the upstream
+			 * only.
+			 */
+			psend = cache->mf6c_parent;
+			goto last_forward;
+		}
+		goto dont_forward;
+	}
 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
-		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+		/* For (*,G) entry, don't forward to the incoming interface */
+		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
+		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
@@ -2061,6 +2148,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			psend = ct;
 		}
 	}
+last_forward:
 	if (psend != -1) {
 		ip6mr_forward2(net, mrt, skb, cache, psend);
 		return 0;
@@ -2096,6 +2184,14 @@ int ip6_mr_input(struct sk_buff *skb)
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt,
 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+	if (cache == NULL) {
+		int vif = ip6mr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ip6mr_cache_find_any(mrt,
+						     &ipv6_hdr(skb)->daddr,
+						     vif);
+	}
 
 	/*
 	 *	No usable cache entry
@@ -2183,6 +2279,13 @@ int ip6mr_get_route(struct net *net,
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	if (!cache && skb->dev) {
+		int vif = ip6mr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
+						     vif);
+	}
 
 	if (!cache) {
 		struct sk_buff *skb2;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ee94d31c9d4..d1e2e8ef29c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -476,8 +476,8 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
-					 &junk);
+		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
+					     &junk, &junk);
 		if (retv)
 			goto done;
 update:
@@ -1002,7 +1002,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		release_sock(sk);
 
 		if (skb) {
-			int err = datagram_recv_ctl(sk, &msg, skb);
+			int err = ip6_datagram_recv_ctl(sk, &msg, skb);
 			kfree_skb(skb);
 			if (err)
 				return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 28dfa5f3801..3a2849f660c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -376,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 			goto done;	/* err = -EADDRNOTAVAIL */
 		rv = !0;
 		for (i=0; i<psl->sl_count; i++) {
-			rv = memcmp(&psl->sl_addr[i], source,
-				sizeof(struct in6_addr));
+			rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
 			if (rv == 0)
 				break;
 		}
@@ -427,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
-		rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
-		if (rv == 0)
-			break;
+		rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
+		if (rv == 0) /* There is an error in the address. */
+			goto done;
 	}
-	if (rv == 0)		/* address already there is an error */
-		goto done;
 	for (j=psl->sl_count-1; j>=i; j--)
 		psl->sl_addr[j+1] = psl->sl_addr[j];
 	psl->sl_addr[i] = *source;
@@ -664,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
 
+	if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+	    IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
 	spin_lock_bh(&mc->mca_lock);
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
@@ -690,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
 
+	if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+	    IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
 	spin_lock_bh(&mc->mca_lock);
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
@@ -935,33 +940,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
 }
 
 /*
- * identify MLD packets for MLD filter exceptions
- */
-bool ipv6_is_mld(struct sk_buff *skb, int nexthdr)
-{
-	struct icmp6hdr *pic;
-
-	if (nexthdr != IPPROTO_ICMPV6)
-		return false;
-
-	if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
-		return false;
-
-	pic = icmp6_hdr(skb);
-
-	switch (pic->icmp6_type) {
-	case ICMPV6_MGM_QUERY:
-	case ICMPV6_MGM_REPORT:
-	case ICMPV6_MGM_REDUCTION:
-	case ICMPV6_MLD2_REPORT:
-		return true;
-	default:
-		break;
-	}
-	return false;
-}
-
-/*
  *	check if the interface/address pair is valid
  */
 bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
@@ -1340,6 +1318,31 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
 	return scount;
 }
 
+static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
+		       struct net_device *dev,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr,
+		       int proto, int len)
+{
+	struct ipv6hdr *hdr;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	skb_reset_network_header(skb);
+	skb_put(skb, sizeof(struct ipv6hdr));
+	hdr = ipv6_hdr(skb);
+
+	ip6_flow_hdr(hdr, 0, 0);
+
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = proto;
+	hdr->hop_limit = inet6_sk(sk)->hop_limit;
+
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
+}
+
 static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 {
 	struct net *net = dev_net(dev);
@@ -1375,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 	} else
 		saddr = &addr_buf;
 
-	ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+	ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
@@ -1418,7 +1421,7 @@ static void mld_sendpack(struct sk_buff *skb)
 	icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
-	dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+	dst = icmp6_dst_alloc(skb->dev, &fl6);
 
 	err = 0;
 	if (IS_ERR(dst)) {
@@ -1767,7 +1770,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	} else
 		saddr = &addr_buf;
 
-	ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
+	ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
@@ -1786,7 +1789,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	icmpv6_flow_init(sk, &fl6, type,
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
-	dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+	dst = icmp6_dst_alloc(skb->dev, &fl6);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
 		goto err_out;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 6574175795d..76ef4353d51 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -143,16 +143,12 @@ struct neigh_table nd_tbl = {
 	.gc_thresh3 =	1024,
 };
 
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
 {
-	return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
-}
-
-static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
-				  unsigned short addr_type)
-{
-	int pad   = ndisc_addr_option_pad(addr_type);
-	int space = NDISC_OPT_SPACE(data_len + pad);
+	int pad   = ndisc_addr_option_pad(skb->dev->type);
+	int data_len = skb->dev->addr_len;
+	int space = ndisc_opt_addr_space(skb->dev);
+	u8 *opt = skb_put(skb, space);
 
 	opt[0] = type;
 	opt[1] = space>>3;
@@ -166,7 +162,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
 	opt += data_len;
 	if ((space -= data_len) > 0)
 		memset(opt, 0, space);
-	return opt + space;
 }
 
 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
@@ -370,91 +365,88 @@ static void pndisc_destructor(struct pneigh_entry *n)
 	ipv6_dev_mc_dec(dev, &maddr);
 }
 
-static struct sk_buff *ndisc_build_skb(struct net_device *dev,
-				       const struct in6_addr *daddr,
-				       const struct in6_addr *saddr,
-				       struct icmp6hdr *icmp6h,
-				       const struct in6_addr *target,
-				       int llinfo)
+static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+				       int len)
 {
-	struct net *net = dev_net(dev);
-	struct sock *sk = net->ipv6.ndisc_sk;
-	struct sk_buff *skb;
-	struct icmp6hdr *hdr;
 	int hlen = LL_RESERVED_SPACE(dev);
 	int tlen = dev->needed_tailroom;
-	int len;
+	struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
+	struct sk_buff *skb;
 	int err;
-	u8 *opt;
-
-	if (!dev->addr_len)
-		llinfo = 0;
-
-	len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
-	if (llinfo)
-		len += ndisc_opt_addr_space(dev);
 
 	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + hlen + tlen),
+				  hlen + sizeof(struct ipv6hdr) + len + tlen,
 				  1, &err);
 	if (!skb) {
-		ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n",
+		ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
 			  __func__, err);
 		return NULL;
 	}
 
-	skb_reserve(skb, hlen);
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
 
-	skb->transport_header = skb->tail;
-	skb_put(skb, len);
+	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
+	skb_reset_transport_header(skb);
 
-	hdr = (struct icmp6hdr *)skb_transport_header(skb);
-	memcpy(hdr, icmp6h, sizeof(*hdr));
+	return skb;
+}
 
-	opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
-	if (target) {
-		*(struct in6_addr *)opt = *target;
-		opt += sizeof(*target);
-	}
+static void ip6_nd_hdr(struct sk_buff *skb,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr,
+		       int hop_limit, int len)
+{
+	struct ipv6hdr *hdr;
 
-	if (llinfo)
-		ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
-				       dev->addr_len, dev->type);
+	skb_push(skb, sizeof(*hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
 
-	hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial(hdr,
-							len, 0));
+	ip6_flow_hdr(hdr, 0, 0);
 
-	return skb;
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = IPPROTO_ICMPV6;
+	hdr->hop_limit = hop_limit;
+
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
 }
 
-static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
-			   struct neighbour *neigh,
+static void ndisc_send_skb(struct sk_buff *skb,
 			   const struct in6_addr *daddr,
-			   const struct in6_addr *saddr,
-			   struct icmp6hdr *icmp6h)
+			   const struct in6_addr *saddr)
 {
-	struct flowi6 fl6;
-	struct dst_entry *dst;
-	struct net *net = dev_net(dev);
+	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk = net->ipv6.ndisc_sk;
 	struct inet6_dev *idev;
 	int err;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
 	u8 type;
 
 	type = icmp6h->icmp6_type;
 
-	icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
-	dst = icmp6_dst_alloc(dev, neigh, &fl6);
-	if (IS_ERR(dst)) {
-		kfree_skb(skb);
-		return;
+	if (!dst) {
+		struct sock *sk = net->ipv6.ndisc_sk;
+		struct flowi6 fl6;
+
+		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+		dst = icmp6_dst_alloc(skb->dev, &fl6);
+		if (IS_ERR(dst)) {
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_dst_set(skb, dst);
 	}
 
-	skb_dst_set(skb, dst);
+	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
+					      IPPROTO_ICMPV6,
+					      csum_partial(icmp6h,
+							   skb->len, 0));
+
+	ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dst->dev);
@@ -470,36 +462,17 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
 	rcu_read_unlock();
 }
 
-/*
- *	Send a Neighbour Discover packet
- */
-static void __ndisc_send(struct net_device *dev,
-			 struct neighbour *neigh,
-			 const struct in6_addr *daddr,
-			 const struct in6_addr *saddr,
-			 struct icmp6hdr *icmp6h, const struct in6_addr *target,
-			 int llinfo)
-{
-	struct sk_buff *skb;
-
-	skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
-	if (!skb)
-		return;
-
-	ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
-}
-
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 			  const struct in6_addr *daddr,
 			  const struct in6_addr *solicited_addr,
-			  int router, int solicited, int override, int inc_opt)
+			  bool router, bool solicited, bool override, bool inc_opt)
 {
+	struct sk_buff *skb;
 	struct in6_addr tmpaddr;
 	struct inet6_ifaddr *ifp;
 	const struct in6_addr *src_addr;
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
-	};
+	struct nd_msg *msg;
+	int optlen = 0;
 
 	/* for anycast or proxy, solicited_addr != src_addr */
 	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
@@ -517,13 +490,32 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		src_addr = &tmpaddr;
 	}
 
-	icmp6h.icmp6_router = router;
-	icmp6h.icmp6_solicited = solicited;
-	icmp6h.icmp6_override = override;
+	if (!dev->addr_len)
+		inc_opt = 0;
+	if (inc_opt)
+		optlen += ndisc_opt_addr_space(dev);
+
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
 
-	__ndisc_send(dev, neigh, daddr, src_addr,
-		     &icmp6h, solicited_addr,
-		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
+	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct nd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+			.icmp6_router = router,
+			.icmp6_solicited = solicited,
+			.icmp6_override = override,
+		},
+		.target = *solicited_addr,
+	};
+
+	if (inc_opt)
+		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+				       dev->dev_addr);
+
+
+	ndisc_send_skb(skb, daddr, src_addr);
 }
 
 static void ndisc_send_unsol_na(struct net_device *dev)
@@ -551,10 +543,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   const struct in6_addr *solicit,
 		   const struct in6_addr *daddr, const struct in6_addr *saddr)
 {
+	struct sk_buff *skb;
 	struct in6_addr addr_buf;
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
-	};
+	int inc_opt = dev->addr_len;
+	int optlen = 0;
+	struct nd_msg *msg;
 
 	if (saddr == NULL) {
 		if (ipv6_get_lladdr(dev, &addr_buf,
@@ -563,18 +556,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		saddr = &addr_buf;
 	}
 
-	__ndisc_send(dev, neigh, daddr, saddr,
-		     &icmp6h, solicit,
-		     !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
+	if (ipv6_addr_any(saddr))
+		inc_opt = 0;
+	if (inc_opt)
+		optlen += ndisc_opt_addr_space(dev);
+
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
+
+	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct nd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+		},
+		.target = *solicit,
+	};
+
+	if (inc_opt)
+		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+				       dev->dev_addr);
+
+	ndisc_send_skb(skb, daddr, saddr);
 }
 
 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		   const struct in6_addr *daddr)
 {
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_ROUTER_SOLICITATION,
-	};
+	struct sk_buff *skb;
+	struct rs_msg *msg;
 	int send_sllao = dev->addr_len;
+	int optlen = 0;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	/*
@@ -598,9 +610,27 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		}
 	}
 #endif
-	__ndisc_send(dev, NULL, daddr, saddr,
-		     &icmp6h, NULL,
-		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
+	if (!dev->addr_len)
+		send_sllao = 0;
+	if (send_sllao)
+		optlen += ndisc_opt_addr_space(dev);
+
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
+
+	msg = (struct rs_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct rs_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_ROUTER_SOLICITATION,
+		},
+	};
+
+	if (send_sllao)
+		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+				       dev->dev_addr);
+
+	ndisc_send_skb(skb, daddr, saddr);
 }
 
 
@@ -676,6 +706,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	bool inc;
 	int is_router = -1;
 
+	if (skb->len < sizeof(struct nd_msg)) {
+		ND_PRINTK(2, warn, "NS: packet too short\n");
+		return;
+	}
+
 	if (ipv6_addr_is_multicast(&msg->target)) {
 		ND_PRINTK(2, warn, "NS: multicast target address\n");
 		return;
@@ -685,11 +720,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	 * RFC2461 7.1.1:
 	 * DAD has to be destined for solicited node multicast address.
 	 */
-	if (dad &&
-	    !(daddr->s6_addr32[0] == htonl(0xff020000) &&
-	      daddr->s6_addr32[1] == htonl(0x00000000) &&
-	      daddr->s6_addr32[2] == htonl(0x00000001) &&
-	      daddr->s6_addr [12] == 0xff )) {
+	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
 		ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
 		return;
 	}
@@ -780,11 +811,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	}
 
 	if (is_router < 0)
-		is_router = !!idev->cnf.forwarding;
+		is_router = idev->cnf.forwarding;
 
 	if (dad) {
 		ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
-			      is_router, 0, (ifp != NULL), 1);
+			      !!is_router, false, (ifp != NULL), true);
 		goto out;
 	}
 
@@ -805,8 +836,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			     NEIGH_UPDATE_F_OVERRIDE);
 	if (neigh || !dev->header_ops) {
 		ndisc_send_na(dev, neigh, saddr, &msg->target,
-			      is_router,
-			      1, (ifp != NULL && inc), inc);
+			      !!is_router,
+			      true, (ifp != NULL && inc), inc);
 		if (neigh)
 			neigh_release(neigh);
 	}
@@ -1350,25 +1381,34 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
 }
 
+static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
+					   struct sk_buff *orig_skb,
+					   int rd_len)
+{
+	u8 *opt = skb_put(skb, rd_len);
+
+	memset(opt, 0, 8);
+	*(opt++) = ND_OPT_REDIRECT_HDR;
+	*(opt++) = (rd_len >> 3);
+	opt += 6;
+
+	memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+}
+
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 {
 	struct net_device *dev = skb->dev;
 	struct net *net = dev_net(dev);
 	struct sock *sk = net->ipv6.ndisc_sk;
-	int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	int optlen = 0;
 	struct inet_peer *peer;
 	struct sk_buff *buff;
-	struct icmp6hdr *icmph;
+	struct rd_msg *msg;
 	struct in6_addr saddr_buf;
-	struct in6_addr *addrp;
 	struct rt6_info *rt;
 	struct dst_entry *dst;
-	struct inet6_dev *idev;
 	struct flowi6 fl6;
-	u8 *opt;
-	int hlen, tlen;
 	int rd_len;
-	int err;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
 	bool ret;
 
@@ -1424,7 +1464,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			memcpy(ha_buf, neigh->ha, dev->addr_len);
 			read_unlock_bh(&neigh->lock);
 			ha = ha_buf;
-			len += ndisc_opt_addr_space(dev);
+			optlen += ndisc_opt_addr_space(dev);
 		} else
 			read_unlock_bh(&neigh->lock);
 
@@ -1432,80 +1472,40 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	}
 
 	rd_len = min_t(unsigned int,
-		     IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
+		       skb->len + 8);
 	rd_len &= ~0x7;
-	len += rd_len;
-
-	hlen = LL_RESERVED_SPACE(dev);
-	tlen = dev->needed_tailroom;
-	buff = sock_alloc_send_skb(sk,
-				   (MAX_HEADER + sizeof(struct ipv6hdr) +
-				    len + hlen + tlen),
-				   1, &err);
-	if (buff == NULL) {
-		ND_PRINTK(0, err,
-			  "Redirect: %s failed to allocate an skb, err=%d\n",
-			  __func__, err);
-		goto release;
-	}
-
-	skb_reserve(buff, hlen);
-	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
-		   IPPROTO_ICMPV6, len);
-
-	skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
-	skb_put(buff, len);
-	icmph = icmp6_hdr(buff);
-
-	memset(icmph, 0, sizeof(struct icmp6hdr));
-	icmph->icmp6_type = NDISC_REDIRECT;
+	optlen += rd_len;
 
-	/*
-	 *	copy target and destination addresses
-	 */
-
-	addrp = (struct in6_addr *)(icmph + 1);
-	*addrp = *target;
-	addrp++;
-	*addrp = ipv6_hdr(skb)->daddr;
+	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!buff)
+		goto release;
 
-	opt = (u8*) (addrp + 1);
+	msg = (struct rd_msg *)skb_put(buff, sizeof(*msg));
+	*msg = (struct rd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_REDIRECT,
+		},
+		.target = *target,
+		.dest = ipv6_hdr(skb)->daddr,
+	};
 
 	/*
 	 *	include target_address option
 	 */
 
 	if (ha)
-		opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
-					     dev->addr_len, dev->type);
+		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha);
 
 	/*
 	 *	build redirect option and copy skb over to the new packet.
 	 */
 
-	memset(opt, 0, 8);
-	*(opt++) = ND_OPT_REDIRECT_HDR;
-	*(opt++) = (rd_len >> 3);
-	opt += 6;
-
-	memcpy(opt, ipv6_hdr(skb), rd_len - 8);
-
-	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
-					     len, IPPROTO_ICMPV6,
-					     csum_partial(icmph, len, 0));
+	if (rd_len)
+		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
 
 	skb_dst_set(buff, dst);
-	rcu_read_lock();
-	idev = __in6_dev_get(dst->dev);
-	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
-		      dst_output);
-	if (!err) {
-		ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
-		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-	}
-
-	rcu_read_unlock();
+	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
 	return;
 
 release:
@@ -1522,7 +1522,7 @@ int ndisc_rcv(struct sk_buff *skb)
 {
 	struct nd_msg *msg;
 
-	if (!pskb_may_pull(skb, skb->len))
+	if (skb_linearize(skb))
 		return 0;
 
 	msg = (struct nd_msg *)skb_transport_header(skb);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 125a90d6a79..341b54ade72 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1098,7 +1098,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
 				    "ip6table_%s", name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1157,7 +1157,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
 	}
 
 	t = xt_find_table_lock(net, AF_INET6, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n", private->number);
 		if (get.size == private->size)
@@ -1197,7 +1197,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
 				    "ip6table_%s", name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
@@ -1355,7 +1355,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	}
 
 	t = xt_find_table_lock(net, AF_INET6, name);
-	if (!t || IS_ERR(t)) {
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
@@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
 
 	xt_compat_lock(AF_INET6);
 	t = xt_find_table_lock(net, AF_INET6, get.name);
-	if (t && !IS_ERR(t)) {
+	if (!IS_ERR_OR_NULL(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
 		duprintf("t->private->number = %u\n", private->number);
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index e9486915eff..83acc1405a1 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -9,47 +9,38 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
+#include <net/ipv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_ipv6/ip6t_NPT.h>
 #include <linux/netfilter/x_tables.h>
 
-static __sum16 csum16_complement(__sum16 a)
-{
-	return (__force __sum16)(0xffff - (__force u16)a);
-}
-
-static __sum16 csum16_add(__sum16 a, __sum16 b)
-{
-	u16 sum;
-
-	sum = (__force u16)a + (__force u16)b;
-	sum += (__force u16)a < (__force u16)b;
-	return (__force __sum16)sum;
-}
-
-static __sum16 csum16_sub(__sum16 a, __sum16 b)
-{
-	return csum16_add(a, csum16_complement(b));
-}
-
 static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 {
 	struct ip6t_npt_tginfo *npt = par->targinfo;
-	__sum16 src_sum = 0, dst_sum = 0;
+	__wsum src_sum = 0, dst_sum = 0;
+	struct in6_addr pfx;
 	unsigned int i;
 
 	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
 		return -EINVAL;
 
+	/* Ensure that LSB of prefix is zero */
+	ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len);
+	if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6))
+		return -EINVAL;
+	ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len);
+	if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
+		return -EINVAL;
+
 	for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
-		src_sum = csum16_add(src_sum,
-				(__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
-		dst_sum = csum16_add(dst_sum,
-				(__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
+		src_sum = csum_add(src_sum,
+				(__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
+		dst_sum = csum_add(dst_sum,
+				(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
 	}
 
-	npt->adjustment = csum16_sub(src_sum, dst_sum);
+	npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
 	return 0;
 }
 
@@ -70,7 +61,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
 
 		idx = i / 32;
 		addr->s6_addr32[idx] &= mask;
-		addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
+		addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx];
 	}
 
 	if (pfx_len <= 48)
@@ -85,8 +76,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
 			return false;
 	}
 
-	sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
-			 npt->adjustment);
+	sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]),
+				  csum_unfold(npt->adjustment)));
 	if (sum == CSUM_MANGLED_0)
 		sum = 0;
 	*(__force __sum16 *)&addr->s6_addr16[idx] = sum;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index fd4fb34c51c..ed3b427b284 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -126,12 +126,13 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	skb_put(nskb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(nskb);
 	ip6h = ipv6_hdr(nskb);
-	*(__be32 *)ip6h =  htonl(0x60000000 | (tclass << 20));
+	ip6_flow_hdr(ip6h, tclass, 0);
 	ip6h->hop_limit = ip6_dst_hoplimit(dst);
 	ip6h->nexthdr = IPPROTO_TCP;
 	ip6h->saddr = oip6h->daddr;
 	ip6h->daddr = oip6h->saddr;
 
+	skb_reset_transport_header(nskb);
 	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
 	/* Truncate to length (no data) */
 	tcph->doff = sizeof(struct tcphdr)/4;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 7431121b87d..6134a1ebfb1 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/slab.h>
+#include <net/ipv6.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -60,8 +61,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 			    dev_net(out)->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
-	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
+	     !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
 	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 6c8ae24b85e..e0e788d25b1 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -127,23 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
-		} else
+		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
+			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+				goto oif_changed;
+		}
 		break;
 
 	default:
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
-			nf_ct_kill_acct(ct, ctinfo, skb);
-			return NF_DROP;
-		}
+		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+			goto oif_changed;
 	}
 
 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+
+oif_changed:
+	nf_ct_kill_acct(ct, ctinfo, skb);
+	return NF_DROP;
 }
 
 static unsigned int
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 00ee17c3e89..8a45bb20bed 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -81,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	}
 	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
 	/*
-	 * (protoff == skb->len) mean that the packet doesn't have no data
-	 * except of IPv6 & ext headers. but it's tracked anyway. - YK
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
 	 */
 	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
 		pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
@@ -421,54 +421,43 @@ static int ipv6_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_tcp6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n");
+		pr_err("nf_conntrack_tcp6: pernet registration failed\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udp6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n");
+		pr_err("nf_conntrack_udp6: pernet registration failed\n");
 		goto cleanup_tcp6;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_icmpv6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
+		pr_err("nf_conntrack_icmp6: pernet registration failed\n");
 		goto cleanup_udp6;
 	}
-	ret = nf_conntrack_l3proto_register(net,
-					    &nf_conntrack_l3proto_ipv6);
+	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
+		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
 		goto cleanup_icmpv6;
 	}
 	return 0;
  cleanup_icmpv6:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmpv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
  cleanup_udp6:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
  cleanup_tcp6:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
  out:
 	return ret;
 }
 
 static void ipv6_net_exit(struct net *net)
 {
-	nf_conntrack_l3proto_unregister(net,
-					&nf_conntrack_l3proto_ipv6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_icmpv6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_udp6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_tcp6);
+	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
 }
 
 static struct pernet_operations ipv6_net_ops = {
@@ -491,19 +480,52 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	ret = register_pernet_subsys(&ipv6_net_ops);
 	if (ret < 0)
-		goto cleanup_pernet;
+		goto cleanup_sockopt;
+
 	ret = nf_register_hooks(ipv6_conntrack_ops,
 				ARRAY_SIZE(ipv6_conntrack_ops));
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
 		       "hook.\n");
-		goto cleanup_ipv6;
+		goto cleanup_pernet;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+		goto cleanup_hooks;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
+		goto cleanup_tcp6;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
+		goto cleanup_udp6;
+	}
+
+	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
+		goto cleanup_icmpv6;
 	}
 	return ret;
 
- cleanup_ipv6:
-	unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_icmpv6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ cleanup_hooks:
+	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
  cleanup_pernet:
+	unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_sockopt:
 	nf_unregister_sockopt(&so_getorigdst6);
 	return ret;
 }
@@ -511,6 +533,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 {
 	synchronize_net();
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
 	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
 	unregister_pernet_subsys(&ipv6_net_ops);
 	nf_unregister_sockopt(&so_getorigdst6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 22c8ea95118..b89a8c3186c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -311,12 +311,15 @@ found:
 	else
 		fq->q.fragments = skb;
 
-	skb->dev = NULL;
+	if (skb->dev) {
+		fq->iif = skb->dev->ifindex;
+		skb->dev = NULL;
+	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
 	if (payload_len > fq->q.max_size)
 		fq->q.max_size = payload_len;
-	atomic_add(skb->truesize, &fq->q.net->mem);
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -325,9 +328,8 @@ found:
 		fq->nhoffset = nhoff;
 		fq->q.last_in |= INET_FRAG_FIRST_IN;
 	}
-	write_lock(&nf_frags.lock);
-	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
-	write_unlock(&nf_frags.lock);
+
+	inet_frag_lru_move(&fq->q);
 	return 0;
 
 discard_fq:
@@ -366,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	}
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+	if (skb_unclone(head, GFP_ATOMIC)) {
 		pr_debug("skb is cloned but can't expand head");
 		goto out_oom;
 	}
@@ -395,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 		clone->ip_summed = head->ip_summed;
 
 		NFCT_FRAG6_CB(clone)->orig = NULL;
-		atomic_add(clone->truesize, &fq->q.net->mem);
+		add_frag_mem_limit(&fq->q, clone->truesize);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -419,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 	}
-	atomic_sub(head->truesize, &fq->q.net->mem);
+	sub_frag_mem_limit(&fq->q, head->truesize);
 
 	head->local_df = 1;
 	head->next = NULL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 6cd29b1e8b9..70fa8144999 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -507,7 +507,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
-		datagram_recv_ctl(sk, msg, skb);
+		ip6_datagram_recv_ctl(sk, msg, skb);
 
 	err = copied;
 	if (flags & MSG_TRUNC)
@@ -822,8 +822,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
-					&hlimit, &tclass, &dontfrag);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e5253ec9e0f..9a6428aaa2d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -79,20 +79,7 @@ unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
 {
 	u32 c;
 
-	c = jhash_3words((__force u32)saddr->s6_addr32[0],
-			 (__force u32)saddr->s6_addr32[1],
-			 (__force u32)saddr->s6_addr32[2],
-			 rnd);
-
-	c = jhash_3words((__force u32)saddr->s6_addr32[3],
-			 (__force u32)daddr->s6_addr32[0],
-			 (__force u32)daddr->s6_addr32[1],
-			 c);
-
-	c =  jhash_3words((__force u32)daddr->s6_addr32[2],
-			  (__force u32)daddr->s6_addr32[3],
-			  (__force u32)id,
-			  c);
+	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), id, rnd);
 
 	return c & (INETFRAGS_HASHSZ - 1);
 }
@@ -327,7 +314,7 @@ found:
 	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
-	atomic_add(skb->truesize, &fq->q.net->mem);
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -341,9 +328,7 @@ found:
 	    fq->q.meat == fq->q.len)
 		return ip6_frag_reasm(fq, prev, dev);
 
-	write_lock(&ip6_frags.lock);
-	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
-	write_unlock(&ip6_frags.lock);
+	inet_frag_lru_move(&fq->q);
 	return -1;
 
 discard_fq:
@@ -406,7 +391,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_oom;
 
 	/* If the first fragment is fragmented itself, we split
@@ -429,7 +414,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &fq->q.net->mem);
+		add_frag_mem_limit(&fq->q, clone->truesize);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -467,7 +452,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		}
 		fp = next;
 	}
-	atomic_sub(sum_truesize, &fq->q.net->mem);
+	sub_frag_mem_limit(&fq->q, sum_truesize);
 
 	head->next = NULL;
 	head->dev = dev;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e229a3bc345..f3328bc1174 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -145,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 	struct neighbour *n;
 
 	daddr = choose_neigh_daddr(rt, skb, daddr);
-	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
+	n = __ipv6_neigh_lookup(dst->dev, daddr);
 	if (n)
 		return n;
 	return neigh_create(&nd_tbl, daddr, dst->dev);
 }
 
-static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
-{
-	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
-	if (!n) {
-		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
-		if (IS_ERR(n))
-			return PTR_ERR(n);
-	}
-	rt->n = n;
-
-	return 0;
-}
-
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
@@ -301,9 +288,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
 
-	if (rt->n)
-		neigh_release(rt->n);
-
 	if (!(rt->dst.flags & DST_HOST))
 		dst_destroy_metrics_generic(dst);
 
@@ -354,11 +338,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 				in6_dev_put(idev);
 			}
 		}
-		if (rt->n && rt->n->dev == dev) {
-			rt->n->dev = loopback_dev;
-			dev_hold(loopback_dev);
-			dev_put(dev);
-		}
 	}
 }
 
@@ -388,15 +367,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 {
 	unsigned int val = fl6->flowi6_proto;
 
-	val ^= (__force u32)fl6->daddr.s6_addr32[0];
-	val ^= (__force u32)fl6->daddr.s6_addr32[1];
-	val ^= (__force u32)fl6->daddr.s6_addr32[2];
-	val ^= (__force u32)fl6->daddr.s6_addr32[3];
-
-	val ^= (__force u32)fl6->saddr.s6_addr32[0];
-	val ^= (__force u32)fl6->saddr.s6_addr32[1];
-	val ^= (__force u32)fl6->saddr.s6_addr32[2];
-	val ^= (__force u32)fl6->saddr.s6_addr32[3];
+	val ^= ipv6_addr_hash(&fl6->daddr);
+	val ^= ipv6_addr_hash(&fl6->saddr);
 
 	/* Work only if this not encapsulated */
 	switch (fl6->flowi6_proto) {
@@ -505,24 +477,34 @@ static void rt6_probe(struct rt6_info *rt)
 	 * Router Reachability Probe MUST be rate-limited
 	 * to no more than one per minute.
 	 */
-	neigh = rt ? rt->n : NULL;
-	if (!neigh || (neigh->nud_state & NUD_VALID))
+	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 		return;
-	read_lock_bh(&neigh->lock);
-	if (!(neigh->nud_state & NUD_VALID) &&
+	rcu_read_lock_bh();
+	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	if (neigh) {
+		write_lock(&neigh->lock);
+		if (neigh->nud_state & NUD_VALID)
+			goto out;
+	}
+
+	if (!neigh ||
 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 		struct in6_addr mcaddr;
 		struct in6_addr *target;
 
-		neigh->updated = jiffies;
-		read_unlock_bh(&neigh->lock);
+		if (neigh) {
+			neigh->updated = jiffies;
+			write_unlock(&neigh->lock);
+		}
 
-		target = (struct in6_addr *)&neigh->primary_key;
+		target = (struct in6_addr *)&rt->rt6i_gateway;
 		addrconf_addr_solict_mult(target, &mcaddr);
 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 	} else {
-		read_unlock_bh(&neigh->lock);
+out:
+		write_unlock(&neigh->lock);
 	}
+	rcu_read_unlock_bh();
 }
 #else
 static inline void rt6_probe(struct rt6_info *rt)
@@ -549,20 +531,24 @@ static inline bool rt6_check_neigh(struct rt6_info *rt)
 	struct neighbour *neigh;
 	bool ret = false;
 
-	neigh = rt->n;
 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 	    !(rt->rt6i_flags & RTF_GATEWAY))
-		ret = true;
-	else if (neigh) {
-		read_lock_bh(&neigh->lock);
+		return true;
+
+	rcu_read_lock_bh();
+	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	if (neigh) {
+		read_lock(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID)
 			ret = true;
 #ifdef CONFIG_IPV6_ROUTER_PREF
 		else if (!(neigh->nud_state & NUD_FAILED))
 			ret = true;
 #endif
-		read_unlock_bh(&neigh->lock);
+		read_unlock(&neigh->lock);
 	}
+	rcu_read_unlock_bh();
+
 	return ret;
 }
 
@@ -838,8 +824,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 	rt = ip6_rt_copy(ort, daddr);
 
 	if (rt) {
-		int attempts = !in_softirq();
-
 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 			if (ort->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
@@ -855,32 +839,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 			rt->rt6i_src.plen = 128;
 		}
 #endif
-
-	retry:
-		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
-			struct net *net = dev_net(rt->dst.dev);
-			int saved_rt_min_interval =
-				net->ipv6.sysctl.ip6_rt_gc_min_interval;
-			int saved_rt_elasticity =
-				net->ipv6.sysctl.ip6_rt_gc_elasticity;
-
-			if (attempts-- > 0) {
-				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
-				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
-
-				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
-
-				net->ipv6.sysctl.ip6_rt_gc_elasticity =
-					saved_rt_elasticity;
-				net->ipv6.sysctl.ip6_rt_gc_min_interval =
-					saved_rt_min_interval;
-				goto retry;
-			}
-
-			net_warn_ratelimited("Neighbour table overflow\n");
-			dst_free(&rt->dst);
-			return NULL;
-		}
 	}
 
 	return rt;
@@ -891,10 +849,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 {
 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 
-	if (rt) {
+	if (rt)
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->n = neigh_clone(ort->n);
-	}
 	return rt;
 }
 
@@ -928,7 +884,7 @@ restart:
 	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
-	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
+	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 	else if (!(rt->dst.flags & DST_HOST))
 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -994,7 +950,7 @@ void ip6_route_input(struct sk_buff *skb)
 		.flowi6_iif = skb->dev->ifindex,
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
-		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+		.flowlabel = ip6_flowinfo(iph),
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
 	};
@@ -1159,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 	fl6.flowi6_flags = 0;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
-	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+	fl6.flowlabel = ip6_flowinfo(iph);
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1187,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 	fl6.flowi6_flags = 0;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
-	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+	fl6.flowlabel = ip6_flowinfo(iph);
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1247,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list;
 static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
-				  struct neighbour *neigh,
 				  struct flowi6 *fl6)
 {
 	struct dst_entry *dst;
@@ -1265,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 		goto out;
 	}
 
-	if (neigh)
-		neigh_hold(neigh);
-	else {
-		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
-		if (IS_ERR(neigh)) {
-			in6_dev_put(idev);
-			dst_free(&rt->dst);
-			return ERR_CAST(neigh);
-		}
-	}
-
 	rt->dst.flags |= DST_HOST;
 	rt->dst.output  = ip6_output;
-	rt->n = neigh;
 	atomic_set(&rt->dst.__refcnt, 1);
 	rt->rt6i_dst.addr = fl6->daddr;
 	rt->rt6i_dst.plen = 128;
@@ -1587,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg)
 	} else
 		rt->rt6i_prefsrc.plen = 0;
 
-	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
-		err = rt6_bind_neighbour(rt, dev);
-		if (err)
-			goto out;
-	}
-
 	rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
@@ -1705,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	struct net *net = dev_net(skb->dev);
 	struct netevent_redirect netevent;
 	struct rt6_info *rt, *nrt = NULL;
-	const struct in6_addr *target;
 	struct ndisc_options ndopts;
-	const struct in6_addr *dest;
-	struct neighbour *old_neigh;
 	struct inet6_dev *in6_dev;
 	struct neighbour *neigh;
-	struct icmp6hdr *icmph;
+	struct rd_msg *msg;
 	int optlen, on_link;
 	u8 *lladdr;
 
 	optlen = skb->tail - skb->transport_header;
-	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	optlen -= sizeof(*msg);
 
 	if (optlen < 0) {
 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
 		return;
 	}
 
-	icmph = icmp6_hdr(skb);
-	target = (const struct in6_addr *) (icmph + 1);
-	dest = target + 1;
+	msg = (struct rd_msg *)icmp6_hdr(skb);
 
-	if (ipv6_addr_is_multicast(dest)) {
+	if (ipv6_addr_is_multicast(&msg->dest)) {
 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
 		return;
 	}
 
 	on_link = 0;
-	if (ipv6_addr_equal(dest, target)) {
+	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
 		on_link = 1;
-	} else if (ipv6_addr_type(target) !=
+	} else if (ipv6_addr_type(&msg->target) !=
 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
 		return;
@@ -1752,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 *	first-hop router for the specified ICMP Destination Address.
 	 */
 
-	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
 		return;
 	}
@@ -1779,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 */
 	dst_confirm(&rt->dst);
 
-	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
 	if (!neigh)
 		return;
 
-	/* Duplicate redirect: silently ignore. */
-	old_neigh = rt->n;
-	if (neigh == old_neigh)
-		goto out;
-
 	/*
 	 *	We have finally decided to accept it.
 	 */
@@ -1799,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 				     NEIGH_UPDATE_F_ISROUTER))
 		     );
 
-	nrt = ip6_rt_copy(rt, dest);
+	nrt = ip6_rt_copy(rt, &msg->dest);
 	if (!nrt)
 		goto out;
 
@@ -1808,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
-	nrt->n = neigh_clone(neigh);
 
 	if (ip6_ins_rt(nrt))
 		goto out;
 
 	netevent.old = &rt->dst;
-	netevent.old_neigh = old_neigh;
 	netevent.new = &nrt->dst;
-	netevent.new_neigh = neigh;
-	netevent.daddr = dest;
+	netevent.daddr = &msg->dest;
+	netevent.neigh = neigh;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags & RTF_CACHE) {
@@ -2123,7 +2048,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
-	int err;
 
 	if (!rt) {
 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
@@ -2142,11 +2066,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	err = rt6_bind_neighbour(rt, rt->dst.dev);
-	if (err) {
-		dst_free(&rt->dst);
-		return ERR_PTR(err);
-	}
 
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
@@ -2492,7 +2411,6 @@ static int rt6_fill_node(struct net *net,
 	struct nlmsghdr *nlh;
 	long expires;
 	u32 table;
-	struct neighbour *n;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2605,9 +2523,8 @@ static int rt6_fill_node(struct net *net,
 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 		goto nla_put_failure;
 
-	n = rt->n;
-	if (n) {
-		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
 			goto nla_put_failure;
 	}
 
@@ -2802,7 +2719,6 @@ struct rt6_proc_arg
 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 {
 	struct seq_file *m = p_arg;
-	struct neighbour *n;
 
 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
 
@@ -2811,9 +2727,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 #else
 	seq_puts(m, "00000000000000000000000000000000 00 ");
 #endif
-	n = rt->n;
-	if (n) {
-		seq_printf(m, "%pi6", n->primary_key);
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		seq_printf(m, "%pi6", &rt->rt6i_gateway);
 	} else {
 		seq_puts(m, "00000000000000000000000000000000");
 	}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cfba99b2c2a..02f96dcbcf0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -72,6 +72,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 static int ipip6_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_setup(struct net_device *dev);
 static void ipip6_dev_free(struct net_device *dev);
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+		      __be32 *v4dst);
 static struct rtnl_link_ops sit_link_ops __read_mostly;
 
 static int sit_net_id __read_mostly;
@@ -590,17 +592,21 @@ out:
 	return err;
 }
 
+static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
+				  const struct in6_addr *v6addr)
+{
+	__be32 v4embed = 0;
+	if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
+		return true;
+	return false;
+}
+
 static int ipip6_rcv(struct sk_buff *skb)
 {
-	const struct iphdr *iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct ip_tunnel *tunnel;
 	int err;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		goto out;
-
-	iph = ip_hdr(skb);
-
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 				     iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
@@ -613,10 +619,19 @@ static int ipip6_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
 
-		if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
-		    !isatap_chksrc(skb, iph, tunnel)) {
-			tunnel->dev->stats.rx_errors++;
-			goto out;
+		if (tunnel->dev->priv_flags & IFF_ISATAP) {
+			if (!isatap_chksrc(skb, iph, tunnel)) {
+				tunnel->dev->stats.rx_errors++;
+				goto out;
+			}
+		} else {
+			if (is_spoofed_6rd(tunnel, iph->saddr,
+					   &ipv6_hdr(skb)->saddr) ||
+			    is_spoofed_6rd(tunnel, iph->daddr,
+					   &ipv6_hdr(skb)->daddr)) {
+				tunnel->dev->stats.rx_errors++;
+				goto out;
+			}
 		}
 
 		__skb_tunnel_rx(skb, tunnel->dev);
@@ -650,14 +665,12 @@ out:
 }
 
 /*
- * Returns the embedded IPv4 address if the IPv6 address
- * comes from 6rd / 6to4 (RFC 3056) addr space.
+ * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
+ * stores the embedded IPv4 address in v4dst and returns true.
  */
-static inline
-__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+		      __be32 *v4dst)
 {
-	__be32 dst = 0;
-
 #ifdef CONFIG_IPV6_SIT_6RD
 	if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
 			      tunnel->ip6rd.prefixlen)) {
@@ -676,14 +689,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
 			d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
 			     (32 - pbi1);
 
-		dst = tunnel->ip6rd.relay_prefix | htonl(d);
+		*v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
+		return true;
 	}
 #else
 	if (v6dst->s6_addr16[0] == htons(0x2002)) {
 		/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
-		memcpy(&dst, &v6dst->s6_addr16[1], 4);
+		memcpy(v4dst, &v6dst->s6_addr16[1], 4);
+		return true;
 	}
 #endif
+	return false;
+}
+
+static inline __be32 try_6rd(struct ip_tunnel *tunnel,
+			     const struct in6_addr *v6dst)
+{
+	__be32 dst = 0;
+	check_6rd(tunnel, v6dst, &dst);
 	return dst;
 }
 
@@ -744,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (!dst)
-		dst = try_6rd(&iph6->daddr, tunnel);
+		dst = try_6rd(tunnel, &iph6->daddr);
 
 	if (!dst) {
 		struct neighbour *neigh = NULL;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 40161977f7c..8a0848b60b3 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -179,7 +179,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
 
-	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
 		goto out;
 
 	ret = NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93825dd3a7c..9b6460055df 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -423,6 +423,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 
 		inet_csk_reqsk_queue_drop(sk, req, prev);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
 	case TCP_SYN_SENT:
@@ -712,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 #endif
 
 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
-				 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
+				 u32 tsval, u32 tsecr,
+				 struct tcp_md5sig_key *key, int rst, u8 tclass)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -724,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	struct dst_entry *dst;
 	__be32 *topt;
 
-	if (ts)
+	if (tsecr)
 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 #ifdef CONFIG_TCP_MD5SIG
 	if (key)
@@ -754,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 
 	topt = (__be32 *)(t1 + 1);
 
-	if (ts) {
+	if (tsecr) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
-		*topt++ = htonl(tcp_time_stamp);
-		*topt++ = htonl(ts);
+		*topt++ = htonl(tsval);
+		*topt++ = htonl(tsecr);
 	}
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -834,7 +836,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
-					   &tcp_hashinfo, &ipv6h->daddr,
+					   &tcp_hashinfo, &ipv6h->saddr,
+					   th->source, &ipv6h->daddr,
 					   ntohs(th->source), inet6_iif(skb));
 		if (!sk1)
 			return;
@@ -858,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
 			  (th->doff << 2);
 
-	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
 
 #ifdef CONFIG_TCP_MD5SIG
 release_sk1:
@@ -869,10 +872,11 @@ release_sk1:
 #endif
 }
 
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+			    u32 win, u32 tsval, u32 tsecr,
 			    struct tcp_md5sig_key *key, u8 tclass)
 {
-	tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
+	tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -882,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+			tcp_time_stamp + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
 			tw->tw_tclass);
 
@@ -891,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
-	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
+	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
+			req->rcv_wnd, tcp_time_stamp, req->ts_recent,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
 }
 
@@ -958,8 +964,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 		goto drop;
+	}
 
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
@@ -1027,7 +1035,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq->rmt_addr = ipv6_hdr(skb)->saddr;
 	treq->loc_addr = ipv6_hdr(skb)->daddr;
 	if (!want_cookie || tmp_opt.tstamp_ok)
-		TCP_ECN_create_request(req, skb);
+		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	treq->iif = sk->sk_bound_dev_if;
 
@@ -1108,6 +1116,7 @@ drop_and_release:
 drop_and_free:
 	reqsk_free(req);
 drop:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return 0; /* don't send reset */
 }
 
@@ -1163,7 +1172,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
-		newnp->rcv_tclass  = ipv6_tclass(ipv6_hdr(skb));
+		newnp->rcv_tclass  = ipv6_get_dsfield(ipv6_hdr(skb));
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1243,7 +1252,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
-	newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+	newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1456,7 +1465,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (np->rxopt.bits.rxtclass)
-			np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1598,6 +1607,7 @@ do_time_wait:
 		struct sock *sk2;
 
 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), inet6_iif(skb));
 		if (sk2 != NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dfaa29b8b29..599e1ba6d1c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,6 +45,7 @@
 #include <net/tcp_states.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/inet6_hashtables.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 begin:
 	result = NULL;
@@ -214,8 +216,18 @@ begin:
 		if (score > badness) {
 			result = sk;
 			badness = score;
-			if (score == SCORE2_MAX)
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet6_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			} else if (score == SCORE2_MAX)
 				goto exact_match;
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
-	int score, badness;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
 
 	rcu_read_lock();
 	if (hslot->count > 10) {
@@ -284,6 +297,17 @@ begin:
 		if (score > badness) {
 			result = sk;
 			badness = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = inet6_ehashfn(net, daddr, hnum,
+						     saddr, sport);
+				matches = 1;
+			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
 	/*
@@ -443,7 +467,7 @@ try_again:
 			ip_cmsg_recv(msg, skb);
 	} else {
 		if (np->rxopt.all)
-			datagram_recv_ctl(sk, msg, skb);
+			ip6_datagram_recv_ctl(sk, msg, skb);
 	}
 
 	err = copied;
@@ -752,40 +776,6 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	return 0;
 }
 
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
-				 int proto)
-{
-	int err;
-
-	UDP_SKB_CB(skb)->partial_cov = 0;
-	UDP_SKB_CB(skb)->cscov = skb->len;
-
-	if (proto == IPPROTO_UDPLITE) {
-		err = udplite_checksum_init(skb, uh);
-		if (err)
-			return err;
-	}
-
-	if (uh->check == 0) {
-		/* RFC 2460 section 8.1 says that we SHOULD log
-		   this error. Well, it is reasonable.
-		 */
-		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
-		return 1;
-	}
-	if (skb->ip_summed == CHECKSUM_COMPLETE &&
-	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
-			     skb->len, proto, skb->csum))
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	if (!skb_csum_unnecessary(skb))
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-							 &ipv6_hdr(skb)->daddr,
-							 skb->len, proto, 0));
-
-	return 0;
-}
-
 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
@@ -1153,8 +1143,8 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
-					&hlimit, &tclass, &dontfrag);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 0c8934a317c..cf05cf073c5 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b19ad..9bf6a74a71d 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -69,8 +69,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c9844135c9c..4ef7bdb6544 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -110,7 +110,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
-	xdst->u.rt6.n = neigh_clone(rt->n);
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
 						   RTF_LOCAL);
 	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
@@ -321,7 +320,51 @@ static struct ctl_table xfrm6_policy_table[] = {
 	{ }
 };
 
-static struct ctl_table_header *sysctl_hdr;
+static int __net_init xfrm6_net_init(struct net *net)
+{
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = xfrm6_policy_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+
+		table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh;
+	}
+
+	hdr = register_net_sysctl(net, "net/ipv6", table);
+	if (!hdr)
+		goto err_reg;
+
+	net->ipv6.sysctl.xfrm6_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+	struct ctl_table *table;
+
+	if (net->ipv6.sysctl.xfrm6_hdr == NULL)
+		return;
+
+	table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct pernet_operations xfrm6_net_ops = {
+	.init	= xfrm6_net_init,
+	.exit	= xfrm6_net_exit,
+};
 #endif
 
 int __init xfrm6_init(void)
@@ -340,8 +383,7 @@ int __init xfrm6_init(void)
 		goto out_policy;
 
 #ifdef CONFIG_SYSCTL
-	sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6",
-					 xfrm6_policy_table);
+	register_pernet_subsys(&xfrm6_net_ops);
 #endif
 out:
 	return ret;
@@ -353,8 +395,7 @@ out_policy:
 void xfrm6_fini(void)
 {
 #ifdef CONFIG_SYSCTL
-	if (sysctl_hdr)
-		unregister_net_sysctl_table(sysctl_hdr);
+	unregister_pernet_subsys(&xfrm6_net_ops);
 #endif
 	xfrm6_policy_fini();
 	xfrm6_state_fini();
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ee5a7065aac..6cc48012b73 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -72,7 +72,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad
 {
 	unsigned int h;
 
-	h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
+	h = ipv6_addr_hash((const struct in6_addr *)addr);
 	h ^= h >> 16;
 	h ^= h >> 8;
 	h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
@@ -94,7 +94,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const
 	hlist_for_each_entry_rcu(x6spi, pos,
 			     &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
-		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
+		if (xfrm6_addr_equal(&x6spi->addr, saddr))
 			return x6spi;
 	}
 
@@ -211,7 +211,7 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 				  &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 				  list_byaddr)
 	{
-		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+		if (xfrm6_addr_equal(&x6spi->addr, saddr)) {
 			if (atomic_dec_and_test(&x6spi->refcnt)) {
 				hlist_del_rcu(&x6spi->list_byaddr);
 				hlist_del_rcu(&x6spi->list_byspi);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index b833677d83d..d07e3a62644 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2567,8 +2567,7 @@ bed:
 						   err);
 
 			/* If watchdog is still activated, kill it! */
-			if(timer_pending(&(self->watchdog)))
-				del_timer(&(self->watchdog));
+			del_timer(&(self->watchdog));
 
 			IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__);
 
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 2bb2beb6a37..3c83a1e5ab0 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -214,8 +214,7 @@ irnet_get_discovery_log(irnet_socket *	ap)
  * After reading :    discoveries = NULL ; disco_index = Y ; disco_number = -1
  */
 static inline int
-irnet_read_discovery_log(irnet_socket *	ap,
-			 char *		event)
+irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size)
 {
   int		done_event = 0;
 
@@ -237,12 +236,13 @@ irnet_read_discovery_log(irnet_socket *	ap,
   if(ap->disco_index < ap->disco_number)
     {
       /* Write an event */
-      sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
-	      ap->discoveries[ap->disco_index].daddr,
-	      ap->discoveries[ap->disco_index].info,
-	      ap->discoveries[ap->disco_index].saddr,
-	      ap->discoveries[ap->disco_index].hints[0],
-	      ap->discoveries[ap->disco_index].hints[1]);
+      snprintf(event, buf_size,
+	       "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
+	       ap->discoveries[ap->disco_index].daddr,
+	       ap->discoveries[ap->disco_index].info,
+	       ap->discoveries[ap->disco_index].saddr,
+	       ap->discoveries[ap->disco_index].hints[0],
+	       ap->discoveries[ap->disco_index].hints[1]);
       DEBUG(CTRL_INFO, "Writing discovery %d : %s\n",
 	    ap->disco_index, ap->discoveries[ap->disco_index].info);
 
@@ -282,27 +282,24 @@ irnet_ctrl_read(irnet_socket *	ap,
 		size_t		count)
 {
   DECLARE_WAITQUEUE(wait, current);
-  char		event[64];	/* Max event is 61 char */
+  char		event[75];
   ssize_t	ret = 0;
 
   DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count);
 
-  /* Check if we can write an event out in one go */
-  DABORT(count < sizeof(event), -EOVERFLOW, CTRL_ERROR, "Buffer to small.\n");
-
 #ifdef INITIAL_DISCOVERY
   /* Check if we have read the log */
-  if(irnet_read_discovery_log(ap, event))
+  if (irnet_read_discovery_log(ap, event, sizeof(event)))
     {
-      /* We have an event !!! Copy it to the user */
-      if(copy_to_user(buf, event, strlen(event)))
+      count = min(strlen(event), count);
+      if (copy_to_user(buf, event, count))
 	{
 	  DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
 	  return -EFAULT;
 	}
 
       DEXIT(CTRL_TRACE, "\n");
-      return strlen(event);
+      return count;
     }
 #endif /* INITIAL_DISCOVERY */
 
@@ -339,79 +336,81 @@ irnet_ctrl_read(irnet_socket *	ap,
   switch(irnet_events.log[ap->event_index].event)
     {
     case IRNET_DISCOVER:
-      sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr,
-	      irnet_events.log[ap->event_index].hints.byte[0],
-	      irnet_events.log[ap->event_index].hints.byte[1]);
+      snprintf(event, sizeof(event),
+	       "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].saddr,
+	       irnet_events.log[ap->event_index].hints.byte[0],
+	       irnet_events.log[ap->event_index].hints.byte[1]);
       break;
     case IRNET_EXPIRE:
-      sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr,
-	      irnet_events.log[ap->event_index].hints.byte[0],
-	      irnet_events.log[ap->event_index].hints.byte[1]);
+      snprintf(event, sizeof(event),
+	       "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].saddr,
+	       irnet_events.log[ap->event_index].hints.byte[0],
+	       irnet_events.log[ap->event_index].hints.byte[1]);
       break;
     case IRNET_CONNECT_TO:
-      sprintf(event, "Connected to %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_CONNECT_FROM:
-      sprintf(event, "Connection from %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_REQUEST_FROM:
-      sprintf(event, "Request from %08x (%s) behind %08x\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].saddr);
+      snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].saddr);
       break;
     case IRNET_NOANSWER_FROM:
-      sprintf(event, "No-answer from %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_BLOCKED_LINK:
-      sprintf(event, "Blocked link with %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_DISCONNECT_FROM:
-      sprintf(event, "Disconnection from %08x (%s) on ppp%d\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name,
-	      irnet_events.log[ap->event_index].unit);
+      snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name,
+	       irnet_events.log[ap->event_index].unit);
       break;
     case IRNET_DISCONNECT_TO:
-      sprintf(event, "Disconnected to %08x (%s)\n",
-	      irnet_events.log[ap->event_index].daddr,
-	      irnet_events.log[ap->event_index].name);
+      snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n",
+	       irnet_events.log[ap->event_index].daddr,
+	       irnet_events.log[ap->event_index].name);
       break;
     default:
-      sprintf(event, "Bug\n");
+      snprintf(event, sizeof(event), "Bug\n");
     }
   /* Increment our event index */
   ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS;
 
   DEBUG(CTRL_INFO, "Event is :%s", event);
 
-  /* Copy it to the user */
-  if(copy_to_user(buf, event, strlen(event)))
+  count = min(strlen(event), count);
+  if (copy_to_user(buf, event, count))
     {
       DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
       return -EFAULT;
     }
 
   DEXIT(CTRL_TRACE, "\n");
-  return strlen(event);
+  return count;
 }
 
 /*------------------------------------------------------------------*/
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 3ad1f9db5f8..df082508362 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1806,7 +1806,7 @@ static void iucv_external_interrupt(struct ext_code ext_code,
 	struct iucv_irq_data *p;
 	struct iucv_irq_list *work;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_IUC]++;
+	inc_irq_stat(IRQEXT_IUC);
 	p = iucv_irq_data[smp_processor_id()];
 	if (p->ippathid >= iucv_max_pathid) {
 		WARN_ON(p->ippathid >= iucv_max_pathid);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5b426a64654..7b3ba32ca67 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -203,7 +203,6 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
 	}
 	if (*skb2 != NULL) {
 		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-			skb_orphan(*skb2);
 			skb_set_owner_r(*skb2, sk);
 			skb_queue_tail(&sk->sk_receive_queue, *skb2);
 			sk->sk_data_ready(sk, (*skb2)->len);
@@ -762,7 +761,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	}
 
 	/* identity & sensitivity */
-	if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family))
+	if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family))
 		size += sizeof(struct sadb_address) + sockaddr_size;
 
 	if (add_keys) {
@@ -816,18 +815,21 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	sa->sadb_sa_auth = 0;
 	if (x->aalg) {
 		struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
-		sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0;
+		sa->sadb_sa_auth = (a && a->pfkey_supported) ?
+					a->desc.sadb_alg_id : 0;
 	}
 	sa->sadb_sa_encrypt = 0;
 	BUG_ON(x->ealg && x->calg);
 	if (x->ealg) {
 		struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0);
-		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+		sa->sadb_sa_encrypt = (a && a->pfkey_supported) ?
+					a->desc.sadb_alg_id : 0;
 	}
 	/* KAME compatible: sadb_sa_encrypt is overloaded with calg id */
 	if (x->calg) {
 		struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0);
-		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+		sa->sadb_sa_encrypt = (a && a->pfkey_supported) ?
+					a->desc.sadb_alg_id : 0;
 	}
 
 	sa->sadb_sa_flags = 0;
@@ -909,8 +911,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 	if (!addr->sadb_address_prefixlen)
 		BUG();
 
-	if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr,
-			  x->props.family)) {
+	if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr,
+			     x->props.family)) {
 		addr = (struct sadb_address*) skb_put(skb,
 			sizeof(struct sadb_address)+sockaddr_size);
 		addr->sadb_address_len =
@@ -1138,7 +1140,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
 		struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
-		if (!a) {
+		if (!a || !a->pfkey_supported) {
 			err = -ENOSYS;
 			goto out;
 		}
@@ -1160,7 +1162,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	if (sa->sadb_sa_encrypt) {
 		if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) {
 			struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt);
-			if (!a) {
+			if (!a || !a->pfkey_supported) {
 				err = -ENOSYS;
 				goto out;
 			}
@@ -1172,7 +1174,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		} else {
 			int keysize = 0;
 			struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt);
-			if (!a) {
+			if (!a || !a->pfkey_supported) {
 				err = -ENOSYS;
 				goto out;
 			}
@@ -1321,7 +1323,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 	if (hdr->sadb_msg_seq) {
 		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
-		if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
+		if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
 		}
@@ -1578,13 +1580,13 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 	struct sadb_msg *hdr;
 	int len, auth_len, enc_len, i;
 
-	auth_len = xfrm_count_auth_supported();
+	auth_len = xfrm_count_pfkey_auth_supported();
 	if (auth_len) {
 		auth_len *= sizeof(struct sadb_alg);
 		auth_len += sizeof(struct sadb_supported);
 	}
 
-	enc_len = xfrm_count_enc_supported();
+	enc_len = xfrm_count_pfkey_enc_supported();
 	if (enc_len) {
 		enc_len *= sizeof(struct sadb_alg);
 		enc_len += sizeof(struct sadb_supported);
@@ -1615,6 +1617,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
 			if (!aalg)
 				break;
+			if (!aalg->pfkey_supported)
+				continue;
 			if (aalg->available)
 				*ap++ = aalg->desc;
 		}
@@ -1634,6 +1638,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 			struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
 			if (!ealg)
 				break;
+			if (!ealg->pfkey_supported)
+				continue;
 			if (ealg->available)
 				*ap++ = ealg->desc;
 		}
@@ -2825,6 +2831,8 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
 		const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
 		if (!aalg)
 			break;
+		if (!aalg->pfkey_supported)
+			continue;
 		if (aalg_tmpl_set(t, aalg) && aalg->available)
 			sz += sizeof(struct sadb_comb);
 	}
@@ -2840,6 +2848,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
 		if (!ealg)
 			break;
 
+		if (!ealg->pfkey_supported)
+			continue;
+
 		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
 			continue;
 
@@ -2848,6 +2859,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
 			if (!aalg)
 				break;
 
+			if (!aalg->pfkey_supported)
+				continue;
+
 			if (aalg_tmpl_set(t, aalg) && aalg->available)
 				sz += sizeof(struct sadb_comb);
 		}
@@ -2871,6 +2885,9 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 		if (!aalg)
 			break;
 
+		if (!aalg->pfkey_supported)
+			continue;
+
 		if (aalg_tmpl_set(t, aalg) && aalg->available) {
 			struct sadb_comb *c;
 			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
@@ -2903,6 +2920,9 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 		if (!ealg)
 			break;
 
+		if (!ealg->pfkey_supported)
+			continue;
+
 		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
 			continue;
 
@@ -2911,6 +2931,8 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 			const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
 			if (!aalg)
 				break;
+			if (!aalg->pfkey_supported)
+				continue;
 			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
 				continue;
 			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1a9f3723c13..dcfd64e83ab 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -101,6 +101,7 @@ struct l2tp_skb_cb {
 
 static atomic_t l2tp_tunnel_count;
 static atomic_t l2tp_session_count;
+static struct workqueue_struct *l2tp_wq;
 
 /* per-net private data for this module */
 static unsigned int l2tp_net_id;
@@ -122,7 +123,6 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
 	return net_generic(net, l2tp_net_id);
 }
 
-
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
  */
@@ -168,6 +168,51 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
 
 }
 
+/* Lookup the tunnel socket, possibly involving the fs code if the socket is
+ * owned by userspace.  A struct sock returned from this function must be
+ * released using l2tp_tunnel_sock_put once you're done with it.
+ */
+struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
+{
+	int err = 0;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+
+	if (!tunnel)
+		goto out;
+
+	if (tunnel->fd >= 0) {
+		/* Socket is owned by userspace, who might be in the process
+		 * of closing it.  Look the socket up using the fd to ensure
+		 * consistency.
+		 */
+		sock = sockfd_lookup(tunnel->fd, &err);
+		if (sock)
+			sk = sock->sk;
+	} else {
+		/* Socket is owned by kernelspace */
+		sk = tunnel->sock;
+	}
+
+out:
+	return sk;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup);
+
+/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */
+void l2tp_tunnel_sock_put(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel) {
+		if (tunnel->fd >= 0) {
+			/* Socket is owned by userspace */
+			sockfd_put(sk->sk_socket);
+		}
+		sock_put(sk);
+	}
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put);
+
 /* Lookup a session by id in the global session list
  */
 static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
@@ -1123,8 +1168,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	struct udphdr *uh;
 	struct inet_sock *inet;
 	__wsum csum;
-	int old_headroom;
-	int new_headroom;
 	int headroom;
 	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	int udp_len;
@@ -1136,16 +1179,12 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	 */
 	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
 		uhlen + hdr_len;
-	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, headroom)) {
 		kfree_skb(skb);
 		return NET_XMIT_DROP;
 	}
 
-	new_headroom = skb_headroom(skb);
 	skb_orphan(skb);
-	skb->truesize += new_headroom - old_headroom;
-
 	/* Setup L2TP header */
 	session->build_header(session, __skb_push(skb, hdr_len));
 
@@ -1232,6 +1271,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
 static void l2tp_tunnel_destruct(struct sock *sk)
 {
 	struct l2tp_tunnel *tunnel;
+	struct l2tp_net *pn;
 
 	tunnel = sk->sk_user_data;
 	if (tunnel == NULL)
@@ -1239,9 +1279,8 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
 
-	/* Close all sessions */
-	l2tp_tunnel_closeall(tunnel);
 
+	/* Disable udp encapsulation */
 	switch (tunnel->encap) {
 	case L2TP_ENCAPTYPE_UDP:
 		/* No longer an encapsulation socket. See net/ipv4/udp.c */
@@ -1253,17 +1292,23 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	}
 
 	/* Remove hooks into tunnel socket */
-	tunnel->sock = NULL;
 	sk->sk_destruct = tunnel->old_sk_destruct;
 	sk->sk_user_data = NULL;
+	tunnel->sock = NULL;
 
-	/* Call the original destructor */
-	if (sk->sk_destruct)
-		(*sk->sk_destruct)(sk);
+	/* Remove the tunnel struct from the tunnel list */
+	pn = l2tp_pernet(tunnel->l2tp_net);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_rcu(&tunnel->list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	atomic_dec(&l2tp_tunnel_count);
 
-	/* We're finished with the socket */
+	l2tp_tunnel_closeall(tunnel);
 	l2tp_tunnel_dec_refcount(tunnel);
 
+	/* Call the original destructor */
+	if (sk->sk_destruct)
+		(*sk->sk_destruct)(sk);
 end:
 	return;
 }
@@ -1337,48 +1382,77 @@ again:
  */
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 {
-	struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
 	BUG_ON(atomic_read(&tunnel->ref_count) != 0);
 	BUG_ON(tunnel->sock != NULL);
-
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
-
-	/* Remove from tunnel list */
-	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_del_rcu(&tunnel->list);
 	kfree_rcu(tunnel, rcu);
-	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+}
 
-	atomic_dec(&l2tp_tunnel_count);
+/* Workqueue tunnel deletion function */
+static void l2tp_tunnel_del_work(struct work_struct *work)
+{
+	struct l2tp_tunnel *tunnel = NULL;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+
+	tunnel = container_of(work, struct l2tp_tunnel, del_work);
+	sk = l2tp_tunnel_sock_lookup(tunnel);
+	if (!sk)
+		return;
+
+	sock = sk->sk_socket;
+	BUG_ON(!sock);
+
+	/* If the tunnel socket was created directly by the kernel, use the
+	 * sk_* API to release the socket now.  Otherwise go through the
+	 * inet_* layer to shut the socket down, and let userspace close it.
+	 * In either case the tunnel resources are freed in the socket
+	 * destructor when the tunnel socket goes away.
+	 */
+	if (sock->file == NULL) {
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sk);
+	} else {
+		inet_shutdown(sock, 2);
+	}
+
+	l2tp_tunnel_sock_put(sk);
 }
 
 /* Create a socket for the tunnel, if one isn't set up by
  * userspace. This is used for static tunnels where there is no
  * managing L2TP daemon.
+ *
+ * Since we don't want these sockets to keep a namespace alive by
+ * themselves, we drop the socket's namespace refcount after creation.
+ * These sockets are freed when the namespace exits using the pernet
+ * exit hook.
  */
-static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
+static int l2tp_tunnel_sock_create(struct net *net,
+				u32 tunnel_id,
+				u32 peer_tunnel_id,
+				struct l2tp_tunnel_cfg *cfg,
+				struct socket **sockp)
 {
 	int err = -EINVAL;
-	struct sockaddr_in udp_addr;
+	struct socket *sock = NULL;
+	struct sockaddr_in udp_addr = {0};
+	struct sockaddr_l2tpip ip_addr = {0};
 #if IS_ENABLED(CONFIG_IPV6)
-	struct sockaddr_in6 udp6_addr;
-	struct sockaddr_l2tpip6 ip6_addr;
+	struct sockaddr_in6 udp6_addr = {0};
+	struct sockaddr_l2tpip6 ip6_addr = {0};
 #endif
-	struct sockaddr_l2tpip ip_addr;
-	struct socket *sock = NULL;
 
 	switch (cfg->encap) {
 	case L2TP_ENCAPTYPE_UDP:
 #if IS_ENABLED(CONFIG_IPV6)
 		if (cfg->local_ip6 && cfg->peer_ip6) {
-			err = sock_create(AF_INET6, SOCK_DGRAM, 0, sockp);
+			err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&udp6_addr, 0, sizeof(udp6_addr));
 			udp6_addr.sin6_family = AF_INET6;
 			memcpy(&udp6_addr.sin6_addr, cfg->local_ip6,
 			       sizeof(udp6_addr.sin6_addr));
@@ -1400,13 +1474,12 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 		} else
 #endif
 		{
-			err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
+			err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&udp_addr, 0, sizeof(udp_addr));
 			udp_addr.sin_family = AF_INET;
 			udp_addr.sin_addr = cfg->local_ip;
 			udp_addr.sin_port = htons(cfg->local_udp_port);
@@ -1433,14 +1506,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 	case L2TP_ENCAPTYPE_IP:
 #if IS_ENABLED(CONFIG_IPV6)
 		if (cfg->local_ip6 && cfg->peer_ip6) {
-			err = sock_create(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP,
-					  sockp);
+			err = sock_create_kern(AF_INET6, SOCK_DGRAM,
+					  IPPROTO_L2TP, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&ip6_addr, 0, sizeof(ip6_addr));
 			ip6_addr.l2tp_family = AF_INET6;
 			memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
 			       sizeof(ip6_addr.l2tp_addr));
@@ -1462,14 +1534,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 		} else
 #endif
 		{
-			err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP,
-					  sockp);
+			err = sock_create_kern(AF_INET, SOCK_DGRAM,
+					  IPPROTO_L2TP, &sock);
 			if (err < 0)
 				goto out;
 
-			sock = *sockp;
+			sk_change_net(sock->sk, net);
 
-			memset(&ip_addr, 0, sizeof(ip_addr));
 			ip_addr.l2tp_family = AF_INET;
 			ip_addr.l2tp_addr = cfg->local_ip;
 			ip_addr.l2tp_conn_id = tunnel_id;
@@ -1493,8 +1564,10 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 	}
 
 out:
+	*sockp = sock;
 	if ((err < 0) && sock) {
-		sock_release(sock);
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sock->sk);
 		*sockp = NULL;
 	}
 
@@ -1517,15 +1590,23 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	 * kernel socket.
 	 */
 	if (fd < 0) {
-		err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
+		err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
+				cfg, &sock);
 		if (err < 0)
 			goto err;
 	} else {
-		err = -EBADF;
 		sock = sockfd_lookup(fd, &err);
 		if (!sock) {
-			pr_err("tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+			pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n",
 			       tunnel_id, fd, err);
+			err = -EBADF;
+			goto err;
+		}
+
+		/* Reject namespace mismatches */
+		if (!net_eq(sock_net(sock->sk), net)) {
+			pr_err("tunl %u: netns mismatch\n", tunnel_id);
+			err = -EINVAL;
 			goto err;
 		}
 	}
@@ -1607,10 +1688,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->old_sk_destruct = sk->sk_destruct;
 	sk->sk_destruct = &l2tp_tunnel_destruct;
 	tunnel->sock = sk;
+	tunnel->fd = fd;
 	lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
 
 	sk->sk_allocation = GFP_ATOMIC;
 
+	/* Init delete workqueue struct */
+	INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
+
 	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
 	atomic_inc(&l2tp_tunnel_count);
@@ -1642,25 +1727,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
  */
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
-	int err = 0;
-	struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
-
-	/* Force the tunnel socket to close. This will eventually
-	 * cause the tunnel to be deleted via the normal socket close
-	 * mechanisms when userspace closes the tunnel socket.
-	 */
-	if (sock != NULL) {
-		err = inet_shutdown(sock, 2);
-
-		/* If the tunnel's socket was created by the kernel,
-		 * close the socket here since the socket was not
-		 * created by userspace.
-		 */
-		if (sock->file == NULL)
-			err = inet_release(sock);
-	}
-
-	return err;
+	return (false == queue_work(l2tp_wq, &tunnel->del_work));
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
 
@@ -1844,8 +1911,21 @@ static __net_init int l2tp_init_net(struct net *net)
 	return 0;
 }
 
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct l2tp_tunnel *tunnel = NULL;
+
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+		(void)l2tp_tunnel_delete(tunnel);
+	}
+	rcu_read_unlock_bh();
+}
+
 static struct pernet_operations l2tp_net_ops = {
 	.init = l2tp_init_net,
+	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
 };
@@ -1858,6 +1938,13 @@ static int __init l2tp_init(void)
 	if (rc)
 		goto out;
 
+	l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0);
+	if (!l2tp_wq) {
+		pr_err("alloc_workqueue failed\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
 	pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION);
 
 out:
@@ -1867,6 +1954,10 @@ out:
 static void __exit l2tp_exit(void)
 {
 	unregister_pernet_device(&l2tp_net_ops);
+	if (l2tp_wq) {
+		destroy_workqueue(l2tp_wq);
+		l2tp_wq = NULL;
+	}
 }
 
 module_init(l2tp_init);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 56d583e083a..8eb8f1d47f3 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -188,7 +188,10 @@ struct l2tp_tunnel {
 	int (*recv_payload_hook)(struct sk_buff *skb);
 	void (*old_sk_destruct)(struct sock *);
 	struct sock		*sock;		/* Parent socket */
-	int			fd;
+	int			fd;		/* Parent fd, if tunnel socket
+						 * was created by userspace */
+
+	struct work_struct	del_work;
 
 	uint8_t			priv[0];	/* private data */
 };
@@ -228,6 +231,8 @@ out:
 	return tunnel;
 }
 
+extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel);
+extern void l2tp_tunnel_sock_put(struct sock *sk);
 extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
 extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 61d8b75d268..f7ac8f42fee 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -115,6 +115,7 @@ static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, in
  */
 static int l2tp_ip_recv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	u32 session_id;
 	u32 tunnel_id;
@@ -142,7 +143,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_find(&init_net, NULL, session_id);
+	session = l2tp_session_find(net, NULL, session_id);
 	if (session == NULL)
 		goto discard;
 
@@ -173,14 +174,14 @@ pass_up:
 		goto discard;
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
-	tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
 	if (tunnel != NULL)
 		sk = tunnel->sock;
 	else {
 		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
 
 		read_lock_bh(&l2tp_ip_lock);
-		sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+		sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id);
 		read_unlock_bh(&l2tp_ip_lock);
 	}
 
@@ -239,6 +240,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+	struct net *net = sock_net(sk);
 	int ret;
 	int chk_addr_ret;
 
@@ -251,7 +253,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	ret = -EADDRINUSE;
 	read_lock_bh(&l2tp_ip_lock);
-	if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+	if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+				  sk->sk_bound_dev_if, addr->l2tp_conn_id))
 		goto out_in_use;
 
 	read_unlock_bh(&l2tp_ip_lock);
@@ -260,7 +263,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
 		goto out;
 
-	chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+	chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr);
 	ret = -EADDRNOTAVAIL;
 	if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -369,7 +372,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
 	return 0;
 
 drop:
-	IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
 	kfree_skb(skb);
 	return -1;
 }
@@ -605,6 +608,7 @@ static struct inet_protosw l2tp_ip_protosw = {
 
 static struct net_protocol l2tp_ip_protocol __read_mostly = {
 	.handler	= l2tp_ip_recv,
+	.netns_ok	= 1,
 };
 
 static int __init l2tp_ip_init(void)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 927547171bc..8ee4a86ae99 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -554,8 +554,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
-					&hlimit, &tclass, &dontfrag);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -646,7 +646,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len, int noblock,
 			    int flags, int *addr_len)
 {
-	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)msg->msg_name;
 	size_t copied = 0;
 	int err = -EOPNOTSUPP;
@@ -688,8 +688,8 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			lsa->l2tp_scope_id = IP6CB(skb)->iif;
 	}
 
-	if (inet->cmsg_flags)
-		ip_cmsg_recv(msg, skb);
+	if (np->rxopt.all)
+		ip6_datagram_recv_ctl(sk, msg, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bbba3a19e94..c1bab22db85 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -37,6 +37,7 @@ static struct genl_family l2tp_nl_family = {
 	.version	= L2TP_GENL_VERSION,
 	.hdrsize	= 0,
 	.maxattr	= L2TP_ATTR_MAX,
+	.netnsok	= true,
 };
 
 /* Accessed under genl lock */
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 286366ef893..716605c241f 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -388,8 +388,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
-	int old_headroom;
-	int new_headroom;
 	int uhlen, headroom;
 
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -408,7 +406,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (tunnel == NULL)
 		goto abort_put_sess;
 
-	old_headroom = skb_headroom(skb);
 	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	headroom = NET_SKB_PAD +
 		   sizeof(struct iphdr) + /* IP header */
@@ -418,9 +415,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (skb_cow_head(skb, headroom))
 		goto abort_put_sess_tun;
 
-	new_headroom = skb_headroom(skb);
-	skb->truesize += new_headroom - old_headroom;
-
 	/* Setup PPP header */
 	__skb_push(skb, sizeof(ppph));
 	skb->data[0] = ppph[0];
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 1191039c2b1..d20c6d3c247 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -41,7 +41,7 @@ static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val)
 		return -EINVAL;
 
 	*val = skb->data[0];
-	 skb_pull(skb, 1);
+	skb_pull(skb, 1);
 
 	return 0;
 }
@@ -137,16 +137,12 @@ static int mac802154_header_create(struct sk_buff *skb,
 	struct ieee802154_addr dev_addr;
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
 	int pos = 2;
-	u8 *head;
+	u8 head[MAC802154_FRAME_HARD_HEADER_LEN];
 	u16 fc;
 
 	if (!daddr)
 		return -EINVAL;
 
-	head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL);
-	if (head == NULL)
-		return -ENOMEM;
-
 	head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
 	fc = mac_cb_type(skb);
 
@@ -210,7 +206,6 @@ static int mac802154_header_create(struct sk_buff *skb,
 	head[1] = fc >> 8;
 
 	memcpy(skb_push(skb, pos), head, pos);
-	kfree(head);
 
 	return pos;
 }
@@ -389,7 +384,7 @@ void mac802154_wpan_setup(struct net_device *dev)
 
 static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
 {
-	return netif_rx(skb);
+	return netif_rx_ni(skb);
 }
 
 static int
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fefa514b991..eb2c8ebf6d9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -124,6 +124,12 @@ config NF_CONNTRACK_TIMESTAMP
 
 	  If unsure, say `N'.
 
+config NF_CONNTRACK_LABELS
+	bool
+	help
+	  This option enables support for assigning user-defined flag bits
+	  to connection tracking entries.  It selected by the connlabel match.
+
 config NF_CT_PROTO_DCCP
 	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
@@ -680,6 +686,13 @@ config NETFILTER_XT_TARGET_NFQUEUE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_NOTRACK
+	tristate  '"NOTRACK" target support (DEPRECATED)'
+	depends on NF_CONNTRACK
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_TARGET_CT
+
 config NETFILTER_XT_TARGET_RATEEST
 	tristate '"RATEEST" target support'
 	depends on NETFILTER_ADVANCED
@@ -798,6 +811,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_BPF
+	tristate '"bpf" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  BPF matching applies a linux socket filter to each packet and
+	  accepts those for which the filter returns non-zero.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
@@ -835,6 +857,18 @@ config NETFILTER_XT_MATCH_CONNBYTES
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_CONNLABEL
+	tristate '"connlabel" match support'
+	select NF_CONNTRACK_LABELS
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This match allows you to test and assign userspace-defined labels names
+	  to a connection.  The kernel only stores bit values - mapping
+	  names to bits is done by userspace.
+
+	  Unlike connmark, more than 32 flag bits may be assigned to a
+	  connection simultaneously.
+
 config NETFILTER_XT_MATCH_CONNLIMIT
 	tristate '"connlimit" match support"'
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 32596978df1..a1abf87d43b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -4,6 +4,7 @@ nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -98,9 +99,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 5c0b78528e5..b7d4cb475ae 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -234,7 +234,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
 		    const struct hash_ip6_elem *ip2,
 		    u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
 }
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 6283351f4ee..d8f77bacae8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -284,7 +284,7 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
 			const struct hash_ipport6_elem *ip2,
 			u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6a21271c8d5..1da1e955f38 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -294,8 +294,8 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
 			  const struct hash_ipportip6_elem *ip2,
 			  u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
-	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 2d5cd4ee30e..f2627226a08 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -388,8 +388,8 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
 			   const struct hash_ipportnet6_elem *ip2,
 			   u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
-	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) &&
 	       ip1->cidr == ip2->cidr &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 29e94b981f3..4b677cf6bf7 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -286,7 +286,7 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
 		     const struct hash_net6_elem *ip2,
 		     u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->cidr == ip2->cidr;
 }
 
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 45a101439bc..6ba985f1c96 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -471,7 +471,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
 			  const struct hash_netiface6_elem *ip2,
 			  u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->cidr == ip2->cidr &&
 	       (++*multi) &&
 	       ip1->physdev == ip2->physdev &&
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 7ef700de596..af20c0c5ced 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -350,7 +350,7 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
 			 const struct hash_netport6_elem *ip2,
 			 u32 *multi)
 {
-	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
 	       ip1->port == ip2->port &&
 	       ip1->proto == ip2->proto &&
 	       ip1->cidr == ip2->cidr;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 30e764ad021..68e368a4bee 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -796,8 +796,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	 */
 	if (likely(atomic_read(&cp->refcnt) == 1)) {
 		/* delete the timer if it is activated by other users */
-		if (timer_pending(&cp->timer))
-			del_timer(&cp->timer);
+		del_timer(&cp->timer);
 
 		/* does anybody control me? */
 		if (cp->control)
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 746048b13ef..ae8ec6f2768 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -61,14 +61,27 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	return 1;
 }
 
+static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
+			  unsigned int sctphoff)
+{
+	__u32 crc32;
+	struct sk_buff *iter;
+
+	crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
+	skb_walk_frags(skb, iter)
+		crc32 = sctp_update_cksum((u8 *) iter->data,
+					  skb_headlen(iter), crc32);
+	sctph->checksum = sctp_end_cksum(crc32);
+
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
 static int
 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
 	sctp_sctphdr_t *sctph;
 	unsigned int sctphoff = iph->len;
-	struct sk_buff *iter;
-	__be32 crc32;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -92,13 +105,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	sctph = (void *) skb_network_header(skb) + sctphoff;
 	sctph->source = cp->vport;
 
-	/* Calculate the checksum */
-	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
-	skb_walk_frags(skb, iter)
-		crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
-				          crc32);
-	crc32 = sctp_end_cksum(crc32);
-	sctph->checksum = crc32;
+	sctp_nat_csum(skb, sctph, sctphoff);
 
 	return 1;
 }
@@ -109,8 +116,6 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	sctp_sctphdr_t *sctph;
 	unsigned int sctphoff = iph->len;
-	struct sk_buff *iter;
-	__be32 crc32;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -134,13 +139,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	sctph = (void *) skb_network_header(skb) + sctphoff;
 	sctph->dest = cp->dport;
 
-	/* Calculate the checksum */
-	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
-	skb_walk_frags(skb, iter)
-		crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
-					  crc32);
-	crc32 = sctp_end_cksum(crc32);
-	sctph->checksum = crc32;
+	sctp_nat_csum(skb, sctph, sctphoff);
 
 	return 1;
 }
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index effa10c9e4e..44fd10c539a 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1795,6 +1795,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 					     GFP_KERNEL);
 			if (!tinfo->buf)
 				goto outtinfo;
+		} else {
+			tinfo->buf = NULL;
 		}
 		tinfo->id = id;
 
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 7df424e2d10..2d3030ab5b6 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -106,36 +106,26 @@ static void nf_conntrack_acct_fini_sysctl(struct net *net)
 }
 #endif
 
-int nf_conntrack_acct_init(struct net *net)
+int nf_conntrack_acct_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_acct = nf_ct_acct;
+	return nf_conntrack_acct_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&acct_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_acct_pernet_fini(struct net *net)
+{
+	nf_conntrack_acct_fini_sysctl(net);
+}
 
-	ret = nf_conntrack_acct_init_sysctl(net);
+int nf_conntrack_acct_init(void)
+{
+	int ret = nf_ct_extend_register(&acct_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&acct_extend);
-out_extend_register:
+		pr_err("nf_conntrack_acct: Unable to register extension\n");
 	return ret;
 }
 
-void nf_conntrack_acct_fini(struct net *net)
+void nf_conntrack_acct_fini(void)
 {
-	nf_conntrack_acct_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&acct_extend);
+	nf_ct_extend_unregister(&acct_extend);
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 08cdc71d8e8..c8e001a9c45 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -45,6 +45,7 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
+
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
@@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
+	nf_ct_labels_ext_add(ct);
 
 	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
 	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1331,18 +1334,42 @@ static int untrack_refs(void)
 	return cnt;
 }
 
-static void nf_conntrack_cleanup_init_net(void)
+void nf_conntrack_cleanup_start(void)
 {
+	RCU_INIT_POINTER(ip_ct_attach, NULL);
+}
+
+void nf_conntrack_cleanup_end(void)
+{
+	RCU_INIT_POINTER(nf_ct_destroy, NULL);
 	while (untrack_refs() > 0)
 		schedule();
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
+	nf_conntrack_proto_fini();
+	nf_conntrack_labels_fini();
+	nf_conntrack_helper_fini();
+	nf_conntrack_timeout_fini();
+	nf_conntrack_ecache_fini();
+	nf_conntrack_tstamp_fini();
+	nf_conntrack_acct_fini();
+	nf_conntrack_expect_fini();
 }
 
-static void nf_conntrack_cleanup_net(struct net *net)
+/*
+ * Mishearing the voices in his head, our hero wonders how he's
+ * supposed to kill the mall.
+ */
+void nf_conntrack_cleanup_net(struct net *net)
 {
+	/*
+	 * This makes sure all current packets have passed through
+	 *  netfilter framework.  Roll on, two-stage module
+	 *  delete...
+	 */
+	synchronize_net();
  i_see_dead_people:
 	nf_ct_iterate_cleanup(net, kill_all, NULL);
 	nf_ct_release_dying_list(net);
@@ -1352,37 +1379,17 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-	nf_conntrack_helper_fini(net);
-	nf_conntrack_timeout_fini(net);
-	nf_conntrack_ecache_fini(net);
-	nf_conntrack_tstamp_fini(net);
-	nf_conntrack_acct_fini(net);
-	nf_conntrack_expect_fini(net);
+	nf_conntrack_proto_pernet_fini(net);
+	nf_conntrack_helper_pernet_fini(net);
+	nf_conntrack_ecache_pernet_fini(net);
+	nf_conntrack_tstamp_pernet_fini(net);
+	nf_conntrack_acct_pernet_fini(net);
+	nf_conntrack_expect_pernet_fini(net);
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void nf_conntrack_cleanup(struct net *net)
-{
-	if (net_eq(net, &init_net))
-		RCU_INIT_POINTER(ip_ct_attach, NULL);
-
-	/* This makes sure all current packets have passed through
-	   netfilter framework.  Roll on, two-stage module
-	   delete... */
-	synchronize_net();
-	nf_conntrack_proto_fini(net);
-	nf_conntrack_cleanup_net(net);
-
-	if (net_eq(net, &init_net)) {
-		RCU_INIT_POINTER(nf_ct_destroy, NULL);
-		nf_conntrack_cleanup_init_net();
-	}
-}
-
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 {
 	struct hlist_nulls_head *hash;
@@ -1473,7 +1480,7 @@ void nf_ct_untracked_status_or(unsigned long bits)
 }
 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 
-static int nf_conntrack_init_init_net(void)
+int nf_conntrack_init_start(void)
 {
 	int max_factor = 8;
 	int ret, cpu;
@@ -1500,11 +1507,44 @@ static int nf_conntrack_init_init_net(void)
 	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
+
+	ret = nf_conntrack_expect_init();
+	if (ret < 0)
+		goto err_expect;
+
+	ret = nf_conntrack_acct_init();
+	if (ret < 0)
+		goto err_acct;
+
+	ret = nf_conntrack_tstamp_init();
+	if (ret < 0)
+		goto err_tstamp;
+
+	ret = nf_conntrack_ecache_init();
+	if (ret < 0)
+		goto err_ecache;
+
+	ret = nf_conntrack_timeout_init();
+	if (ret < 0)
+		goto err_timeout;
+
+	ret = nf_conntrack_helper_init();
+	if (ret < 0)
+		goto err_helper;
+
+	ret = nf_conntrack_labels_init();
+	if (ret < 0)
+		goto err_labels;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
 		goto err_extend;
 #endif
+	ret = nf_conntrack_proto_init();
+	if (ret < 0)
+		goto err_proto;
+
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
 	for_each_possible_cpu(cpu) {
 		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
@@ -1515,25 +1555,53 @@ static int nf_conntrack_init_init_net(void)
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
+err_proto:
 #ifdef CONFIG_NF_CONNTRACK_ZONES
+	nf_ct_extend_unregister(&nf_ct_zone_extend);
 err_extend:
 #endif
+	nf_conntrack_labels_fini();
+err_labels:
+	nf_conntrack_helper_fini();
+err_helper:
+	nf_conntrack_timeout_fini();
+err_timeout:
+	nf_conntrack_ecache_fini();
+err_ecache:
+	nf_conntrack_tstamp_fini();
+err_tstamp:
+	nf_conntrack_acct_fini();
+err_acct:
+	nf_conntrack_expect_fini();
+err_expect:
 	return ret;
 }
 
+void nf_conntrack_init_end(void)
+{
+	/* For use by REJECT target */
+	RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
+	RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
+
+	/* Howto get NAT offsets */
+	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
+}
+
 /*
  * We need to use special "null" values, not used in hash table
  */
 #define UNCONFIRMED_NULLS_VAL	((1<<30)+0)
 #define DYING_NULLS_VAL		((1<<30)+1)
+#define TEMPLATE_NULLS_VAL	((1<<30)+2)
 
-static int nf_conntrack_init_net(struct net *net)
+int nf_conntrack_init_net(struct net *net)
 {
 	int ret;
 
 	atomic_set(&net->ct.count, 0);
 	INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
 	INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
+	INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
 	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
 	if (!net->ct.stat) {
 		ret = -ENOMEM;
@@ -1562,35 +1630,36 @@ static int nf_conntrack_init_net(struct net *net)
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_hash;
 	}
-	ret = nf_conntrack_expect_init(net);
+	ret = nf_conntrack_expect_pernet_init(net);
 	if (ret < 0)
 		goto err_expect;
-	ret = nf_conntrack_acct_init(net);
+	ret = nf_conntrack_acct_pernet_init(net);
 	if (ret < 0)
 		goto err_acct;
-	ret = nf_conntrack_tstamp_init(net);
+	ret = nf_conntrack_tstamp_pernet_init(net);
 	if (ret < 0)
 		goto err_tstamp;
-	ret = nf_conntrack_ecache_init(net);
+	ret = nf_conntrack_ecache_pernet_init(net);
 	if (ret < 0)
 		goto err_ecache;
-	ret = nf_conntrack_timeout_init(net);
-	if (ret < 0)
-		goto err_timeout;
-	ret = nf_conntrack_helper_init(net);
+	ret = nf_conntrack_helper_pernet_init(net);
 	if (ret < 0)
 		goto err_helper;
+	ret = nf_conntrack_proto_pernet_init(net);
+	if (ret < 0)
+		goto err_proto;
 	return 0;
+
+err_proto:
+	nf_conntrack_helper_pernet_fini(net);
 err_helper:
-	nf_conntrack_timeout_fini(net);
-err_timeout:
-	nf_conntrack_ecache_fini(net);
+	nf_conntrack_ecache_pernet_fini(net);
 err_ecache:
-	nf_conntrack_tstamp_fini(net);
+	nf_conntrack_tstamp_pernet_fini(net);
 err_tstamp:
-	nf_conntrack_acct_fini(net);
+	nf_conntrack_acct_pernet_fini(net);
 err_acct:
-	nf_conntrack_expect_fini(net);
+	nf_conntrack_expect_pernet_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
@@ -1607,38 +1676,3 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			enum ip_conntrack_dir dir,
 			u32 seq);
 EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
-
-int nf_conntrack_init(struct net *net)
-{
-	int ret;
-
-	if (net_eq(net, &init_net)) {
-		ret = nf_conntrack_init_init_net();
-		if (ret < 0)
-			goto out_init_net;
-	}
-	ret = nf_conntrack_proto_init(net);
-	if (ret < 0)
-		goto out_proto;
-	ret = nf_conntrack_init_net(net);
-	if (ret < 0)
-		goto out_net;
-
-	if (net_eq(net, &init_net)) {
-		/* For use by REJECT target */
-		RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
-		RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
-
-		/* Howto get NAT offsets */
-		RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
-	}
-	return 0;
-
-out_net:
-	nf_conntrack_proto_fini(net);
-out_proto:
-	if (net_eq(net, &init_net))
-		nf_conntrack_cleanup_init_net();
-out_init_net:
-	return ret;
-}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index faa978f1714..b5d2eb8bf0d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -233,38 +233,27 @@ static void nf_conntrack_event_fini_sysctl(struct net *net)
 }
 #endif /* CONFIG_SYSCTL */
 
-int nf_conntrack_ecache_init(struct net *net)
+int nf_conntrack_ecache_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_events = nf_ct_events;
 	net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
+	return nf_conntrack_event_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&event_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_event: Unable to register "
-					"event extension.\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_ecache_pernet_fini(struct net *net)
+{
+	nf_conntrack_event_fini_sysctl(net);
+}
 
-	ret = nf_conntrack_event_init_sysctl(net);
+int nf_conntrack_ecache_init(void)
+{
+	int ret = nf_ct_extend_register(&event_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&event_extend);
-out_extend_register:
+		pr_err("nf_ct_event: Unable to register event extension.\n");
 	return ret;
 }
 
-void nf_conntrack_ecache_fini(struct net *net)
+void nf_conntrack_ecache_fini(void)
 {
-	nf_conntrack_event_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&event_extend);
+	nf_ct_extend_unregister(&event_extend);
 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 527651a53a4..bdd341899ed 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -587,53 +587,50 @@ static void exp_proc_remove(struct net *net)
 
 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
-int nf_conntrack_expect_init(struct net *net)
+int nf_conntrack_expect_pernet_init(struct net *net)
 {
 	int err = -ENOMEM;
 
-	if (net_eq(net, &init_net)) {
-		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = net->ct.htable_size / 256;
-			if (!nf_ct_expect_hsize)
-				nf_ct_expect_hsize = 1;
-		}
-		nf_ct_expect_max = nf_ct_expect_hsize * 4;
-	}
-
 	net->ct.expect_count = 0;
 	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
 	if (net->ct.expect_hash == NULL)
 		goto err1;
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
-					sizeof(struct nf_conntrack_expect),
-					0, 0, NULL);
-		if (!nf_ct_expect_cachep)
-			goto err2;
-	}
-
 	err = exp_proc_init(net);
 	if (err < 0)
-		goto err3;
+		goto err2;
 
 	return 0;
-
-err3:
-	if (net_eq(net, &init_net))
-		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
 	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 err1:
 	return err;
 }
 
-void nf_conntrack_expect_fini(struct net *net)
+void nf_conntrack_expect_pernet_fini(struct net *net)
 {
 	exp_proc_remove(net);
-	if (net_eq(net, &init_net)) {
-		rcu_barrier(); /* Wait for call_rcu() before destroy */
-		kmem_cache_destroy(nf_ct_expect_cachep);
-	}
 	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 }
+
+int nf_conntrack_expect_init(void)
+{
+	if (!nf_ct_expect_hsize) {
+		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+		if (!nf_ct_expect_hsize)
+			nf_ct_expect_hsize = 1;
+	}
+	nf_ct_expect_max = nf_ct_expect_hsize * 4;
+	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+				sizeof(struct nf_conntrack_expect),
+				0, 0, NULL);
+	if (!nf_ct_expect_cachep)
+		return -ENOMEM;
+	return 0;
+}
+
+void nf_conntrack_expect_fini(void)
+{
+	rcu_barrier(); /* Wait for call_rcu() before destroy */
+	kmem_cache_destroy(nf_ct_expect_cachep);
+}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 884f2b39319..2f380f73c4c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -423,44 +423,41 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.id	= NF_CT_EXT_HELPER,
 };
 
-int nf_conntrack_helper_init(struct net *net)
+int nf_conntrack_helper_pernet_init(struct net *net)
 {
-	int err;
-
 	net->ct.auto_assign_helper_warned = false;
 	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+	return nf_conntrack_helper_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-		nf_ct_helper_hash =
-			nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
-		if (!nf_ct_helper_hash)
-			return -ENOMEM;
+void nf_conntrack_helper_pernet_fini(struct net *net)
+{
+	nf_conntrack_helper_fini_sysctl(net);
+}
 
-		err = nf_ct_extend_register(&helper_extend);
-		if (err < 0)
-			goto err1;
+int nf_conntrack_helper_init(void)
+{
+	int ret;
+	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+	nf_ct_helper_hash =
+		nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+	if (!nf_ct_helper_hash)
+		return -ENOMEM;
+
+	ret = nf_ct_extend_register(&helper_extend);
+	if (ret < 0) {
+		pr_err("nf_ct_helper: Unable to register helper extension.\n");
+		goto out_extend;
 	}
 
-	err = nf_conntrack_helper_init_sysctl(net);
-	if (err < 0)
-		goto out_sysctl;
-
 	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&helper_extend);
-err1:
+out_extend:
 	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
-	return err;
+	return ret;
 }
 
-void nf_conntrack_helper_fini(struct net *net)
+void nf_conntrack_helper_fini(void)
 {
-	nf_conntrack_helper_fini_sysctl(net);
-	if (net_eq(net, &init_net)) {
-		nf_ct_extend_unregister(&helper_extend);
-		nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
-	}
+	nf_ct_extend_unregister(&helper_extend);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
new file mode 100644
index 00000000000..8fe2e99428b
--- /dev/null
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -0,0 +1,112 @@
+/*
+ * test/set flag bits stored in conntrack extension area.
+ *
+ * (C) 2013 Astaro GmbH & Co KG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ctype.h>
+#include <linux/export.h>
+#include <linux/jhash.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+
+static unsigned int label_bits(const struct nf_conn_labels *l)
+{
+	unsigned int longs = l->words;
+	return longs * BITS_PER_LONG;
+}
+
+bool nf_connlabel_match(const struct nf_conn *ct, u16 bit)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels)
+		return false;
+
+	return bit < label_bits(labels) && test_bit(bit, labels->bits);
+}
+EXPORT_SYMBOL_GPL(nf_connlabel_match);
+
+int nf_connlabel_set(struct nf_conn *ct, u16 bit)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels || bit >= label_bits(labels))
+		return -ENOSPC;
+
+	if (test_bit(bit, labels->bits))
+		return 0;
+
+	if (test_and_set_bit(bit, labels->bits))
+		nf_conntrack_event_cache(IPCT_LABEL, ct);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabel_set);
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+static void replace_u32(u32 *address, u32 mask, u32 new)
+{
+	u32 old, tmp;
+
+	do {
+		old = *address;
+		tmp = (old & mask) ^ new;
+	} while (cmpxchg(address, old, tmp) != old);
+}
+
+int nf_connlabels_replace(struct nf_conn *ct,
+			  const u32 *data,
+			  const u32 *mask, unsigned int words32)
+{
+	struct nf_conn_labels *labels;
+	unsigned int size, i;
+	u32 *dst;
+
+	labels = nf_ct_labels_find(ct);
+	if (!labels)
+		return -ENOSPC;
+
+	size = labels->words * sizeof(long);
+	if (size < (words32 * sizeof(u32)))
+		words32 = size / sizeof(u32);
+
+	dst = (u32 *) labels->bits;
+	if (words32) {
+		for (i = 0; i < words32; i++)
+			replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]);
+	}
+
+	size /= sizeof(u32);
+	for (i = words32; i < size; i++) /* pad */
+		replace_u32(&dst[i], 0, 0);
+
+	nf_conntrack_event_cache(IPCT_LABEL, ct);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_replace);
+#endif
+
+static struct nf_ct_ext_type labels_extend __read_mostly = {
+	.len    = sizeof(struct nf_conn_labels),
+	.align  = __alignof__(struct nf_conn_labels),
+	.id     = NF_CT_EXT_LABELS,
+};
+
+int nf_conntrack_labels_init(void)
+{
+	return nf_ct_extend_register(&labels_extend);
+}
+
+void nf_conntrack_labels_fini(void)
+{
+	nf_ct_extend_unregister(&labels_extend);
+}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e078cd84d8..2334cc5d2b1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -43,6 +43,7 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_labels.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -323,6 +324,40 @@ nla_put_failure:
 #define ctnetlink_dump_secctx(a, b) (0)
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+static int ctnetlink_label_size(const struct nf_conn *ct)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels)
+		return 0;
+	return nla_total_size(labels->words * sizeof(long));
+}
+
+static int
+ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+	unsigned int len, i;
+
+	if (!labels)
+		return 0;
+
+	len = labels->words * sizeof(long);
+	i = 0;
+	do {
+		if (labels->bits[i] != 0)
+			return nla_put(skb, CTA_LABELS, len, labels->bits);
+		i++;
+	} while (i < labels->words);
+
+	return 0;
+}
+#else
+#define ctnetlink_dump_labels(a, b) (0)
+#define ctnetlink_label_size(a)	(0)
+#endif
+
 #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
 
 static inline int
@@ -463,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
 	    ctnetlink_dump_secctx(skb, ct) < 0 ||
+	    ctnetlink_dump_labels(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
 	    ctnetlink_dump_master(skb, ct) < 0 ||
@@ -561,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
 #endif
 	       + ctnetlink_proto_size(ct)
+	       + ctnetlink_label_size(ct)
 	       ;
 }
 
@@ -662,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		    && ctnetlink_dump_secctx(skb, ct) < 0)
 			goto nla_put_failure;
 #endif
+		if (events & (1 << IPCT_LABEL) &&
+		     ctnetlink_dump_labels(skb, ct) < 0)
+			goto nla_put_failure;
 
 		if (events & (1 << IPCT_RELATED) &&
 		    ctnetlink_dump_master(skb, ct) < 0)
@@ -921,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
 	return 0;
 }
 
+#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
 static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_TUPLE_ORIG]	= { .type = NLA_NESTED },
 	[CTA_TUPLE_REPLY]	= { .type = NLA_NESTED },
@@ -937,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED },
 	[CTA_ZONE]		= { .type = NLA_U16 },
 	[CTA_MARK_MASK]		= { .type = NLA_U32 },
+	[CTA_LABELS]		= { .type = NLA_BINARY,
+				    .len = __CTA_LABELS_MAX_LENGTH },
+	[CTA_LABELS_MASK]	= { .type = NLA_BINARY,
+				    .len = __CTA_LABELS_MAX_LENGTH },
 };
 
 static int
@@ -1465,6 +1510,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
 #endif
 
 static int
+ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	size_t len = nla_len(cda[CTA_LABELS]);
+	const void *mask = cda[CTA_LABELS_MASK];
+
+	if (len & (sizeof(u32)-1)) /* must be multiple of u32 */
+		return -EINVAL;
+
+	if (mask) {
+		if (nla_len(cda[CTA_LABELS_MASK]) == 0 ||
+		    nla_len(cda[CTA_LABELS_MASK]) != len)
+			return -EINVAL;
+		mask = nla_data(cda[CTA_LABELS_MASK]);
+	}
+
+	len /= sizeof(u32);
+
+	return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len);
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int
 ctnetlink_change_conntrack(struct nf_conn *ct,
 			   const struct nlattr * const cda[])
 {
@@ -1510,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 			return err;
 	}
 #endif
+	if (cda[CTA_LABELS]) {
+		err = ctnetlink_attach_labels(ct, cda);
+		if (err < 0)
+			return err;
+	}
 
 	return 0;
 }
@@ -1598,6 +1673,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+	nf_ct_labels_ext_add(ct);
+
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
 
@@ -1716,6 +1793,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			else
 				events = IPCT_NEW;
 
+			if (cda[CTA_LABELS] &&
+			    ctnetlink_attach_labels(ct, cda) == 0)
+				events |= (1 << IPCT_LABEL);
+
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
 						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
@@ -1983,6 +2064,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
 	if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
 		goto nla_put_failure;
 #endif
+	if (ctnetlink_dump_labels(skb, ct) < 0)
+		goto nla_put_failure;
 	rcu_read_unlock();
 	return 0;
 
@@ -2011,6 +2094,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 		if (err < 0)
 			return err;
 	}
+	if (cda[CTA_LABELS]) {
+		err = ctnetlink_attach_labels(ct, cda);
+		if (err < 0)
+			return err;
+	}
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
@@ -2624,7 +2712,7 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	if (!help) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
 			err = -EINVAL;
-			goto out;
+			goto err_out;
 		}
 		exp->timeout.expires =
 		  jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 51e928db48c..58ab4050830 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -212,8 +212,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net,
 #endif
 }
 
-static int
-nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
@@ -242,8 +241,9 @@ out_unlock:
 	return ret;
 
 }
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 
-int nf_conntrack_l3proto_register(struct net *net,
+int nf_ct_l3proto_pernet_register(struct net *net,
 				  struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
@@ -254,22 +254,11 @@ int nf_conntrack_l3proto_register(struct net *net,
 			return ret;
 	}
 
-	ret = nf_ct_l3proto_register_sysctl(net, proto);
-	if (ret < 0)
-		return ret;
-
-	if (net == &init_net) {
-		ret = nf_conntrack_l3proto_register_net(proto);
-		if (ret < 0)
-			nf_ct_l3proto_unregister_sysctl(net, proto);
-	}
-
-	return ret;
+	return nf_ct_l3proto_register_sysctl(net, proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
 
-static void
-nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= AF_MAX);
 
@@ -283,19 +272,17 @@ nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto)
 
 	synchronize_rcu();
 }
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
-void nf_conntrack_l3proto_unregister(struct net *net,
+void nf_ct_l3proto_pernet_unregister(struct net *net,
 				     struct nf_conntrack_l3proto *proto)
 {
-	if (net == &init_net)
-		nf_conntrack_l3proto_unregister_net(proto);
-
 	nf_ct_l3proto_unregister_sysctl(net, proto);
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l3proto, proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
 
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 					      struct nf_conntrack_l4proto *l4proto)
@@ -376,8 +363,7 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
 
 /* FIXME: Allow NULL functions and sub in pointers to generic for
    them. --RR */
-static int
-nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 
@@ -431,8 +417,9 @@ out_unlock:
 	mutex_unlock(&nf_ct_proto_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
-int nf_conntrack_l4proto_register(struct net *net,
+int nf_ct_l4proto_pernet_register(struct net *net,
 				  struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
@@ -452,22 +439,13 @@ int nf_conntrack_l4proto_register(struct net *net,
 	if (ret < 0)
 		goto out;
 
-	if (net == &init_net) {
-		ret = nf_conntrack_l4proto_register_net(l4proto);
-		if (ret < 0) {
-			nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
-			goto out;
-		}
-	}
-
 	pn->users++;
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
 
-static void
-nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 {
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
@@ -482,15 +460,13 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto)
 
 	synchronize_rcu();
 }
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
 
-void nf_conntrack_l4proto_unregister(struct net *net,
+void nf_ct_l4proto_pernet_unregister(struct net *net,
 				     struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = NULL;
 
-	if (net == &init_net)
-		nf_conntrack_l4proto_unregister_net(l4proto);
-
 	pn = nf_ct_l4proto_net(net, l4proto);
 	if (pn == NULL)
 		return;
@@ -501,11 +477,10 @@ void nf_conntrack_l4proto_unregister(struct net *net,
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
-int nf_conntrack_proto_init(struct net *net)
+int nf_conntrack_proto_pernet_init(struct net *net)
 {
-	unsigned int i;
 	int err;
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
@@ -520,19 +495,12 @@ int nf_conntrack_proto_init(struct net *net)
 	if (err < 0)
 		return err;
 
-	if (net == &init_net) {
-		for (i = 0; i < AF_MAX; i++)
-			rcu_assign_pointer(nf_ct_l3protos[i],
-					   &nf_conntrack_l3proto_generic);
-	}
-
 	pn->users++;
 	return 0;
 }
 
-void nf_conntrack_proto_fini(struct net *net)
+void nf_conntrack_proto_pernet_fini(struct net *net)
 {
-	unsigned int i;
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
 
@@ -540,9 +508,21 @@ void nf_conntrack_proto_fini(struct net *net)
 	nf_ct_l4proto_unregister_sysctl(net,
 					pn,
 					&nf_conntrack_l4proto_generic);
-	if (net == &init_net) {
-		/* free l3proto protocol tables */
-		for (i = 0; i < PF_MAX; i++)
-			kfree(nf_ct_protos[i]);
-	}
+}
+
+int nf_conntrack_proto_init(void)
+{
+	unsigned int i;
+	for (i = 0; i < AF_MAX; i++)
+		rcu_assign_pointer(nf_ct_l3protos[i],
+				   &nf_conntrack_l3proto_generic);
+	return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+	/* free l3proto protocol tables */
+	for (i = 0; i < PF_MAX; i++)
+		kfree(nf_ct_protos[i]);
 }
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a8ae287bc7a..432f9578000 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -935,32 +935,27 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 static __net_init int dccp_net_init(struct net *net)
 {
 	int ret = 0;
-	ret = nf_conntrack_l4proto_register(net,
-					    &dccp_proto4);
+	ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_dccp4 :protocol register failed.\n");
+		pr_err("nf_conntrack_dccp4: pernet registration failed.\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &dccp_proto6);
+	ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_dccp6 :protocol register failed.\n");
+		pr_err("nf_conntrack_dccp6: pernet registration failed.\n");
 		goto cleanup_dccp4;
 	}
 	return 0;
 cleanup_dccp4:
-	nf_conntrack_l4proto_unregister(net,
-					&dccp_proto4);
+	nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
 out:
 	return ret;
 }
 
 static __net_exit void dccp_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net,
-					&dccp_proto6);
-	nf_conntrack_l4proto_unregister(net,
-					&dccp_proto4);
+	nf_ct_l4proto_pernet_unregister(net, &dccp_proto6);
+	nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
 }
 
 static struct pernet_operations dccp_net_ops = {
@@ -972,11 +967,33 @@ static struct pernet_operations dccp_net_ops = {
 
 static int __init nf_conntrack_proto_dccp_init(void)
 {
-	return register_pernet_subsys(&dccp_net_ops);
+	int ret;
+
+	ret = nf_ct_l4proto_register(&dccp_proto4);
+	if (ret < 0)
+		goto out_dccp4;
+
+	ret = nf_ct_l4proto_register(&dccp_proto6);
+	if (ret < 0)
+		goto out_dccp6;
+
+	ret = register_pernet_subsys(&dccp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_ct_l4proto_unregister(&dccp_proto6);
+out_dccp6:
+	nf_ct_l4proto_unregister(&dccp_proto4);
+out_dccp4:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_dccp_fini(void)
 {
+	nf_ct_l4proto_unregister(&dccp_proto6);
+	nf_ct_l4proto_unregister(&dccp_proto4);
 	unregister_pernet_subsys(&dccp_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index b09b7af7f6f..bd7d01d9c7e 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -397,15 +397,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 static int proto_gre_net_init(struct net *net)
 {
 	int ret = 0;
-	ret = nf_conntrack_l4proto_register(net, &nf_conntrack_l4proto_gre4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4);
 	if (ret < 0)
-		pr_err("nf_conntrack_l4proto_gre4 :protocol register failed.\n");
+		pr_err("nf_conntrack_gre4: pernet registration failed.\n");
 	return ret;
 }
 
 static void proto_gre_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_gre4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4);
 	nf_ct_gre_keymap_flush(net);
 }
 
@@ -418,11 +418,26 @@ static struct pernet_operations proto_gre_net_ops = {
 
 static int __init nf_ct_proto_gre_init(void)
 {
-	return register_pernet_subsys(&proto_gre_net_ops);
+	int ret;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (ret < 0)
+		goto out_gre4;
+
+	ret = register_pernet_subsys(&proto_gre_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+out_gre4:
+	return ret;
 }
 
 static void __exit nf_ct_proto_gre_fini(void)
 {
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
 	unregister_pernet_subsys(&proto_gre_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c746d61f83e..480f616d593 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -853,33 +853,28 @@ static int sctp_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_sctp4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_sctp4 :protocol register failed.\n");
+		pr_err("nf_conntrack_sctp4: pernet registration failed.\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_sctp6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_sctp6 :protocol register failed.\n");
+		pr_err("nf_conntrack_sctp6: pernet registration failed.\n");
 		goto cleanup_sctp4;
 	}
 	return 0;
 
 cleanup_sctp4:
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_sctp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
 out:
 	return ret;
 }
 
 static void sctp_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_sctp6);
-	nf_conntrack_l4proto_unregister(net,
-					&nf_conntrack_l4proto_sctp4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -891,11 +886,33 @@ static struct pernet_operations sctp_net_ops = {
 
 static int __init nf_conntrack_proto_sctp_init(void)
 {
-	return register_pernet_subsys(&sctp_net_ops);
+	int ret;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
+	if (ret < 0)
+		goto out_sctp4;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6);
+	if (ret < 0)
+		goto out_sctp6;
+
+	ret = register_pernet_subsys(&sctp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+out_sctp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+out_sctp4:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_sctp_fini(void)
 {
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
 	unregister_pernet_subsys(&sctp_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4b66df20928..157489581c3 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -336,30 +336,28 @@ static int udplite_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udplite4);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n");
+		pr_err("nf_conntrack_udplite4: pernet registration failed.\n");
 		goto out;
 	}
-	ret = nf_conntrack_l4proto_register(net,
-					    &nf_conntrack_l4proto_udplite6);
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6);
 	if (ret < 0) {
-		pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n");
+		pr_err("nf_conntrack_udplite6: pernet registration failed.\n");
 		goto cleanup_udplite4;
 	}
 	return 0;
 
 cleanup_udplite4:
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
 out:
 	return ret;
 }
 
 static void udplite_net_exit(struct net *net)
 {
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite6);
-	nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
 }
 
 static struct pernet_operations udplite_net_ops = {
@@ -371,11 +369,33 @@ static struct pernet_operations udplite_net_ops = {
 
 static int __init nf_conntrack_proto_udplite_init(void)
 {
-	return register_pernet_subsys(&udplite_net_ops);
+	int ret;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
+	if (ret < 0)
+		goto out_udplite4;
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6);
+	if (ret < 0)
+		goto out_udplite6;
+
+	ret = register_pernet_subsys(&udplite_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+out_pernet:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
+out_udplite6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+out_udplite4:
+	return ret;
 }
 
 static void __exit nf_conntrack_proto_udplite_exit(void)
 {
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
 	unregister_pernet_subsys(&udplite_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index df8f4f28448..72a67bbe351 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1440,8 +1440,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned int matchoff, matchlen;
 	unsigned int cseq, i;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	/* Many Cisco IP phones use a high source port for SIP requests, but
+	 * listen for the response on port 5060.  If we are the local
+	 * router for one of these phones, save the port number from the
+	 * Via: header so that nf_nat_sip can redirect the responses to
+	 * the correct port.
+	 */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff,
+				    &matchlen, &addr, &port) > 0 &&
+	    port != ct->tuplehash[dir].tuple.src.u.udp.port &&
+	    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3))
+		ct_sip_info->forced_dport = port;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 6e545e26289..87b95a2c270 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
 
 #define SNMP_PORT	161
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 363285d544a..7936bf7f90b 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -472,13 +472,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
 	struct ctl_table *table;
 
-	if (net_eq(net, &init_net)) {
-		nf_ct_netfilter_header =
-		       register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
-		if (!nf_ct_netfilter_header)
-			goto out;
-	}
-
 	table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
 			GFP_KERNEL);
 	if (!table)
@@ -502,10 +495,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 out_unregister_netfilter:
 	kfree(table);
 out_kmemdup:
-	if (net_eq(net, &init_net))
-		unregister_net_sysctl_table(nf_ct_netfilter_header);
-out:
-	printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n");
 	return -ENOMEM;
 }
 
@@ -513,8 +502,6 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
 	struct ctl_table *table;
 
-	if (net_eq(net, &init_net))
-		unregister_net_sysctl_table(nf_ct_netfilter_header);
 	table = net->ct.sysctl_header->ctl_table_arg;
 	unregister_net_sysctl_table(net->ct.sysctl_header);
 	kfree(table);
@@ -530,51 +517,86 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 }
 #endif /* CONFIG_SYSCTL */
 
-static int nf_conntrack_net_init(struct net *net)
+static int nf_conntrack_pernet_init(struct net *net)
 {
 	int ret;
 
-	ret = nf_conntrack_init(net);
+	ret = nf_conntrack_init_net(net);
 	if (ret < 0)
 		goto out_init;
+
 	ret = nf_conntrack_standalone_init_proc(net);
 	if (ret < 0)
 		goto out_proc;
+
 	net->ct.sysctl_checksum = 1;
 	net->ct.sysctl_log_invalid = 0;
 	ret = nf_conntrack_standalone_init_sysctl(net);
 	if (ret < 0)
 		goto out_sysctl;
+
 	return 0;
 
 out_sysctl:
 	nf_conntrack_standalone_fini_proc(net);
 out_proc:
-	nf_conntrack_cleanup(net);
+	nf_conntrack_cleanup_net(net);
 out_init:
 	return ret;
 }
 
-static void nf_conntrack_net_exit(struct net *net)
+static void nf_conntrack_pernet_exit(struct net *net)
 {
 	nf_conntrack_standalone_fini_sysctl(net);
 	nf_conntrack_standalone_fini_proc(net);
-	nf_conntrack_cleanup(net);
+	nf_conntrack_cleanup_net(net);
 }
 
 static struct pernet_operations nf_conntrack_net_ops = {
-	.init = nf_conntrack_net_init,
-	.exit = nf_conntrack_net_exit,
+	.init = nf_conntrack_pernet_init,
+	.exit = nf_conntrack_pernet_exit,
 };
 
 static int __init nf_conntrack_standalone_init(void)
 {
-	return register_pernet_subsys(&nf_conntrack_net_ops);
+	int ret = nf_conntrack_init_start();
+	if (ret < 0)
+		goto out_start;
+
+#ifdef CONFIG_SYSCTL
+	nf_ct_netfilter_header =
+		register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
+	if (!nf_ct_netfilter_header) {
+		pr_err("nf_conntrack: can't register to sysctl.\n");
+		goto out_sysctl;
+	}
+#endif
+
+	ret = register_pernet_subsys(&nf_conntrack_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	nf_conntrack_init_end();
+	return 0;
+
+out_pernet:
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(nf_ct_netfilter_header);
+out_sysctl:
+#endif
+	nf_conntrack_cleanup_end();
+out_start:
+	return ret;
 }
 
 static void __exit nf_conntrack_standalone_fini(void)
 {
+	nf_conntrack_cleanup_start();
 	unregister_pernet_subsys(&nf_conntrack_net_ops);
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(nf_ct_netfilter_header);
+#endif
+	nf_conntrack_cleanup_end();
 }
 
 module_init(nf_conntrack_standalone_init);
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index a878ce5b252..93da609d9d2 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -37,24 +37,15 @@ static struct nf_ct_ext_type timeout_extend __read_mostly = {
 	.id	= NF_CT_EXT_TIMEOUT,
 };
 
-int nf_conntrack_timeout_init(struct net *net)
+int nf_conntrack_timeout_init(void)
 {
-	int ret = 0;
-
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&timeout_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_timeout: Unable to register "
-					"timeout extension.\n");
-			return ret;
-		}
-	}
-
-	return 0;
+	int ret = nf_ct_extend_register(&timeout_extend);
+	if (ret < 0)
+		pr_err("nf_ct_timeout: Unable to register timeout extension.\n");
+	return ret;
 }
 
-void nf_conntrack_timeout_fini(struct net *net)
+void nf_conntrack_timeout_fini(void)
 {
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&timeout_extend);
+	nf_ct_extend_unregister(&timeout_extend);
 }
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 7ea8026f07c..902fb0a6b38 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -88,37 +88,28 @@ static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
 }
 #endif
 
-int nf_conntrack_tstamp_init(struct net *net)
+int nf_conntrack_tstamp_pernet_init(struct net *net)
 {
-	int ret;
-
 	net->ct.sysctl_tstamp = nf_ct_tstamp;
+	return nf_conntrack_tstamp_init_sysctl(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		ret = nf_ct_extend_register(&tstamp_extend);
-		if (ret < 0) {
-			printk(KERN_ERR "nf_ct_tstamp: Unable to register "
-					"extension\n");
-			goto out_extend_register;
-		}
-	}
+void nf_conntrack_tstamp_pernet_fini(struct net *net)
+{
+	nf_conntrack_tstamp_fini_sysctl(net);
+	nf_ct_extend_unregister(&tstamp_extend);
+}
 
-	ret = nf_conntrack_tstamp_init_sysctl(net);
+int nf_conntrack_tstamp_init(void)
+{
+	int ret;
+	ret = nf_ct_extend_register(&tstamp_extend);
 	if (ret < 0)
-		goto out_sysctl;
-
-	return 0;
-
-out_sysctl:
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&tstamp_extend);
-out_extend_register:
+		pr_err("nf_ct_tstamp: Unable to register extension\n");
 	return ret;
 }
 
-void nf_conntrack_tstamp_fini(struct net *net)
+void nf_conntrack_tstamp_fini(void)
 {
-	nf_conntrack_tstamp_fini_sysctl(net);
-	if (net_eq(net, &init_net))
-		nf_ct_extend_unregister(&tstamp_extend);
+	nf_ct_extend_unregister(&tstamp_extend);
 }
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 16303c75221..5951146e768 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 	union nf_inet_addr newaddr;
@@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	} else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
 		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
 		newaddr = ct->tuplehash[!dir].tuple.src.u3;
-		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+		newport = ct_sip_info->forced_dport ? :
+			  ct->tuplehash[!dir].tuple.src.u.udp.port;
 	} else
 		return 1;
 
@@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	unsigned int coff, matchoff, matchlen;
 	enum sip_header_types hdr;
 	union nf_inet_addr addr;
@@ -258,6 +261,21 @@ next:
 	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
 
+	/* Mangle destination port for Cisco phones, then fix up checksums */
+	if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
+		struct udphdr *uh;
+
+		if (!skb_make_writable(skb, skb->len))
+			return NF_DROP;
+
+		uh = (void *)skb->data + protoff;
+		uh->dest = ct_sip_info->forced_dport;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff,
+					      0, 0, NULL, 0))
+			return NF_DROP;
+	}
+
 	return NF_ACCEPT;
 }
 
@@ -311,8 +329,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	union nf_inet_addr newaddr;
 	u_int16_t port;
+	__be16 srcport;
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 
@@ -326,8 +346,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	/* If the signalling port matches the connection's source port in the
 	 * original direction, try to use the destination port in the opposite
 	 * direction. */
-	if (exp->tuple.dst.u.udp.port ==
-	    ct->tuplehash[dir].tuple.src.u.udp.port)
+	srcport = ct_sip_info->forced_dport ? :
+		  ct->tuplehash[dir].tuple.src.u.udp.port;
+	if (exp->tuple.dst.u.udp.port == srcport)
 		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
 	else
 		port = ntohs(exp->tuple.dst.u.udp.port);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9f199f2e31f..92fd8eca0d3 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -13,6 +13,7 @@
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/init.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -384,6 +385,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 	struct nfgenmsg *nfmsg;
 	sk_buff_data_t old_tail = inst->skb->tail;
 	struct sock *sk;
+	const unsigned char *hwhdrp;
 
 	nlh = nlmsg_put(inst->skb, 0, 0,
 			NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
@@ -485,9 +487,17 @@ __build_packet_message(struct nfulnl_instance *inst,
 	if (indev && skb_mac_header_was_set(skb)) {
 		if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
 		    nla_put_be16(inst->skb, NFULA_HWLEN,
-				 htons(skb->dev->hard_header_len)) ||
-		    nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
-			    skb_mac_header(skb)))
+				 htons(skb->dev->hard_header_len)))
+			goto nla_put_failure;
+
+		hwhdrp = skb_mac_header(skb);
+
+		if (skb->dev->type == ARPHRD_SIT)
+			hwhdrp -= ETH_HLEN;
+
+		if (hwhdrp >= skb->head &&
+		    nla_put(inst->skb, NFULA_HWHEADER,
+			    skb->dev->hard_header_len, hwhdrp))
 			goto nla_put_failure;
 	}
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8d987c3573f..7b3a9e5999c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -345,19 +345,27 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
-static char *textify_hooks(char *buf, size_t size, unsigned int mask)
+static char *
+textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
 {
-	static const char *const names[] = {
+	static const char *const inetbr_names[] = {
 		"PREROUTING", "INPUT", "FORWARD",
 		"OUTPUT", "POSTROUTING", "BROUTING",
 	};
-	unsigned int i;
+	static const char *const arp_names[] = {
+		"INPUT", "FORWARD", "OUTPUT",
+	};
+	const char *const *names;
+	unsigned int i, max;
 	char *p = buf;
 	bool np = false;
 	int res;
 
+	names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
+	max   = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
+	                                   ARRAY_SIZE(inetbr_names);
 	*p = '\0';
-	for (i = 0; i < ARRAY_SIZE(names); ++i) {
+	for (i = 0; i < max; ++i) {
 		if (!(mask & (1 << i)))
 			continue;
 		res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
@@ -402,8 +410,10 @@ int xt_check_match(struct xt_mtchk_param *par,
 		pr_err("%s_tables: %s match: used from hooks %s, but only "
 		       "valid from %s\n",
 		       xt_prefix[par->family], par->match->name,
-		       textify_hooks(used, sizeof(used), par->hook_mask),
-		       textify_hooks(allow, sizeof(allow), par->match->hooks));
+		       textify_hooks(used, sizeof(used), par->hook_mask,
+		                     par->family),
+		       textify_hooks(allow, sizeof(allow), par->match->hooks,
+		                     par->family));
 		return -EINVAL;
 	}
 	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
@@ -575,8 +585,10 @@ int xt_check_target(struct xt_tgchk_param *par,
 		pr_err("%s_tables: %s target: used from hooks %s, but only "
 		       "usable from %s\n",
 		       xt_prefix[par->family], par->target->name,
-		       textify_hooks(used, sizeof(used), par->hook_mask),
-		       textify_hooks(allow, sizeof(allow), par->target->hooks));
+		       textify_hooks(used, sizeof(used), par->hook_mask,
+		                     par->family),
+		       textify_hooks(allow, sizeof(allow), par->target->hooks,
+		                     par->family));
 		return -EINVAL;
 	}
 	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index ae7f5daeee4..bde009ed8d3 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -109,7 +109,7 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
 	struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conntrack_tuple t;
 	struct nf_conn *ct;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	if (info->flags & ~XT_CT_NOTRACK)
 		return -EINVAL;
@@ -149,6 +149,10 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
 
 	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
 	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
+	/* Overload tuple linked list to put us in template list. */
+	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+				 &par->net->ct.tmpl);
 out:
 	info->ct = ct;
 	return 0;
@@ -243,7 +247,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 	struct xt_ct_target_info_v1 *info = par->targinfo;
 	struct nf_conntrack_tuple t;
 	struct nf_conn *ct;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	if (info->flags & ~XT_CT_NOTRACK)
 		return -EINVAL;
@@ -289,6 +293,10 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 
 	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
 	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
+	/* Overload tuple linked list to put us in template list. */
+	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+				 &par->net->ct.tmpl);
 out:
 	info->ct = ct;
 	return 0;
@@ -377,14 +385,60 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
 	},
 };
 
+static unsigned int
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	/* Previously seen (loopback)? Ignore. */
+	if (skb->nfct != NULL)
+		return XT_CONTINUE;
+
+	skb->nfct = &nf_ct_untracked_get()->ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+
+	return XT_CONTINUE;
+}
+
+static int notrack_chk(const struct xt_tgchk_param *par)
+{
+	if (!par->net->xt.notrack_deprecated_warning) {
+		pr_info("netfilter: NOTRACK target is deprecated, "
+			"use CT instead or upgrade iptables\n");
+		par->net->xt.notrack_deprecated_warning = true;
+	}
+	return 0;
+}
+
+static struct xt_target notrack_tg_reg __read_mostly = {
+	.name		= "NOTRACK",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.checkentry	= notrack_chk,
+	.target		= notrack_tg,
+	.table		= "raw",
+	.me		= THIS_MODULE,
+};
+
 static int __init xt_ct_tg_init(void)
 {
-	return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+	int ret;
+
+	ret = xt_register_target(&notrack_tg_reg);
+	if (ret < 0)
+		return ret;
+
+	ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+	if (ret < 0) {
+		xt_unregister_target(&notrack_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit xt_ct_tg_exit(void)
 {
 	xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+	xt_unregister_target(&notrack_tg_reg);
 }
 
 module_init(xt_ct_tg_init);
@@ -394,3 +448,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Xtables: connection tracking target");
 MODULE_ALIAS("ipt_CT");
 MODULE_ALIAS("ip6t_CT");
+MODULE_ALIAS("ipt_NOTRACK");
+MODULE_ALIAS("ip6t_NOTRACK");
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
new file mode 100644
index 00000000000..12d4da8e6c7
--- /dev/null
+++ b/net/netfilter/xt_bpf.c
@@ -0,0 +1,73 @@
+/* Xtables module to match packets using a BPF filter.
+ * Copyright 2013 Google Inc.
+ * Written by Willem de Bruijn <willemb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+
+#include <linux/netfilter/xt_bpf.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
+MODULE_DESCRIPTION("Xtables: BPF filter match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_bpf");
+MODULE_ALIAS("ip6t_bpf");
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info *info = par->matchinfo;
+	struct sock_fprog program;
+
+	program.len = info->bpf_program_num_elem;
+	program.filter = (struct sock_filter __user *) info->bpf_program;
+	if (sk_unattached_filter_create(&info->filter, &program)) {
+		pr_info("bpf: check failed: parse error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+
+	return SK_RUN_FILTER(info->filter, skb);
+}
+
+static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+	sk_unattached_filter_destroy(info->filter);
+}
+
+static struct xt_match bpf_mt_reg __read_mostly = {
+	.name		= "bpf",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.checkentry	= bpf_mt_check,
+	.match		= bpf_mt,
+	.destroy	= bpf_mt_destroy,
+	.matchsize	= sizeof(struct xt_bpf_info),
+	.me		= THIS_MODULE,
+};
+
+static int __init bpf_mt_init(void)
+{
+	return xt_register_match(&bpf_mt_reg);
+}
+
+static void __exit bpf_mt_exit(void)
+{
+	xt_unregister_match(&bpf_mt_reg);
+}
+
+module_init(bpf_mt_init);
+module_exit(bpf_mt_exit);
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
new file mode 100644
index 00000000000..9f8719df200
--- /dev/null
+++ b/net/netfilter/xt_connlabel.c
@@ -0,0 +1,99 @@
+/*
+ * (C) 2013 Astaro GmbH & Co KG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: add/match connection trackling labels");
+MODULE_ALIAS("ipt_connlabel");
+MODULE_ALIAS("ip6t_connlabel");
+
+static bool
+connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_connlabel_mtinfo *info = par->matchinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	bool invert = info->options & XT_CONNLABEL_OP_INVERT;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return invert;
+
+	if (info->options & XT_CONNLABEL_OP_SET)
+		return (nf_connlabel_set(ct, info->bit) == 0) ^ invert;
+
+	return nf_connlabel_match(ct, info->bit) ^ invert;
+}
+
+static int connlabel_mt_check(const struct xt_mtchk_param *par)
+{
+	const int options = XT_CONNLABEL_OP_INVERT |
+			    XT_CONNLABEL_OP_SET;
+	struct xt_connlabel_mtinfo *info = par->matchinfo;
+	int ret;
+	size_t words;
+
+	if (info->bit > XT_CONNLABEL_MAXBIT)
+		return -ERANGE;
+
+	if (info->options & ~options) {
+		pr_err("Unknown options in mask %x\n", info->options);
+		return -EINVAL;
+	}
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
+		pr_info("cannot load conntrack support for proto=%u\n",
+							par->family);
+		return ret;
+	}
+
+	par->net->ct.labels_used++;
+	words = BITS_TO_LONGS(info->bit+1);
+	if (words > par->net->ct.label_words)
+		par->net->ct.label_words = words;
+
+	return ret;
+}
+
+static void connlabel_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	par->net->ct.labels_used--;
+	if (par->net->ct.labels_used == 0)
+		par->net->ct.label_words = 0;
+	nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_match connlabels_mt_reg __read_mostly = {
+	.name           = "connlabel",
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connlabel_mt_check,
+	.match          = connlabel_mt,
+	.matchsize      = sizeof(struct xt_connlabel_mtinfo),
+	.destroy        = connlabel_mt_destroy,
+	.me             = THIS_MODULE,
+};
+
+static int __init connlabel_mt_init(void)
+{
+	return xt_register_match(&connlabels_mt_reg);
+}
+
+static void __exit connlabel_mt_exit(void)
+{
+	xt_unregister_match(&connlabels_mt_reg);
+}
+
+module_init(connlabel_mt_init);
+module_exit(connlabel_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 26a668a84aa..a9d7af953ce 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -157,11 +157,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
 /* allocate dsthash_ent, initialize dst, put in htable and lock it */
 static struct dsthash_ent *
 dsthash_alloc_init(struct xt_hashlimit_htable *ht,
-		   const struct dsthash_dst *dst)
+		   const struct dsthash_dst *dst, bool *race)
 {
 	struct dsthash_ent *ent;
 
 	spin_lock(&ht->lock);
+
+	/* Two or more packets may race to create the same entry in the
+	 * hashtable, double check if this packet lost race.
+	 */
+	ent = dsthash_find(ht, dst);
+	if (ent != NULL) {
+		spin_unlock(&ht->lock);
+		*race = true;
+		return ent;
+	}
+
 	/* initialize hash with random val at the time we allocate
 	 * the first hashtable entry */
 	if (unlikely(!ht->rnd_initialized)) {
@@ -318,7 +329,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 		parent = hashlimit_net->ipt_hashlimit;
 	else
 		parent = hashlimit_net->ip6t_hashlimit;
-	remove_proc_entry(hinfo->pde->name, parent);
+
+	if(parent != NULL)
+		remove_proc_entry(hinfo->pde->name, parent);
+
 	htable_selective_cleanup(hinfo, select_all);
 	vfree(hinfo);
 }
@@ -585,6 +599,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
+	bool race = false;
 	u32 cost;
 
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
@@ -593,13 +608,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	rcu_read_lock_bh();
 	dh = dsthash_find(hinfo, &dst);
 	if (dh == NULL) {
-		dh = dsthash_alloc_init(hinfo, &dst);
+		dh = dsthash_alloc_init(hinfo, &dst, &race);
 		if (dh == NULL) {
 			rcu_read_unlock_bh();
 			goto hotdrop;
+		} else if (race) {
+			/* Already got an entry, update expiration timeout */
+			dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+			rateinfo_recalc(dh, now, hinfo->cfg.mode);
+		} else {
+			dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
+			rateinfo_init(dh, hinfo);
 		}
-		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-		rateinfo_init(dh, hinfo);
 	} else {
 		/* update expiration timeout */
 		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
@@ -856,6 +876,27 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
 
 static void __net_exit hashlimit_proc_net_exit(struct net *net)
 {
+	struct xt_hashlimit_htable *hinfo;
+	struct hlist_node *pos;
+	struct proc_dir_entry *pde;
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	/* recent_net_exit() is called before recent_mt_destroy(). Make sure
+	 * that the parent xt_recent proc entry is is empty before trying to
+	 * remove it.
+	 */
+	mutex_lock(&hashlimit_mutex);
+	pde = hashlimit_net->ipt_hashlimit;
+	if (pde == NULL)
+		pde = hashlimit_net->ip6t_hashlimit;
+
+	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node)
+		remove_proc_entry(hinfo->pde->name, pde);
+
+	hashlimit_net->ipt_hashlimit = NULL;
+	hashlimit_net->ip6t_hashlimit = NULL;
+	mutex_unlock(&hashlimit_mutex);
+
 	proc_net_remove(net, "ipt_hashlimit");
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	proc_net_remove(net, "ip6t_hashlimit");
@@ -872,9 +913,6 @@ static int __net_init hashlimit_net_init(struct net *net)
 
 static void __net_exit hashlimit_net_exit(struct net *net)
 {
-	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
-
-	BUG_ON(!hlist_empty(&hashlimit_net->htables));
 	hashlimit_proc_net_exit(net);
 }
 
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4635c9b0045..978efc9b555 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -29,6 +29,7 @@
 #include <linux/skbuff.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -310,6 +311,14 @@ out:
 	return ret;
 }
 
+static void recent_table_free(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+
 static int recent_mt_check(const struct xt_mtchk_param *par,
 			   const struct xt_recent_mtinfo_v1 *info)
 {
@@ -322,6 +331,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 #endif
 	unsigned int i;
 	int ret = -EINVAL;
+	size_t sz;
 
 	if (unlikely(!hash_rnd_inited)) {
 		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
@@ -360,8 +370,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 		goto out;
 	}
 
-	t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
-		    GFP_KERNEL);
+	sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
+	if (sz <= PAGE_SIZE)
+		t = kzalloc(sz, GFP_KERNEL);
+	else
+		t = vzalloc(sz);
 	if (t == NULL) {
 		ret = -ENOMEM;
 		goto out;
@@ -377,14 +390,14 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 	uid = make_kuid(&init_user_ns, ip_list_uid);
 	gid = make_kgid(&init_user_ns, ip_list_gid);
 	if (!uid_valid(uid) || !gid_valid(gid)) {
-		kfree(t);
+		recent_table_free(t);
 		ret = -EINVAL;
 		goto out;
 	}
 	pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
 		  &recent_mt_fops, t);
 	if (pde == NULL) {
-		kfree(t);
+		recent_table_free(t);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -431,10 +444,11 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par)
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
 #ifdef CONFIG_PROC_FS
-		remove_proc_entry(t->name, recent_net->xt_recent);
+		if (recent_net->xt_recent != NULL)
+			remove_proc_entry(t->name, recent_net->xt_recent);
 #endif
 		recent_table_flush(t);
-		kfree(t);
+		recent_table_free(t);
 	}
 	mutex_unlock(&recent_mutex);
 }
@@ -615,6 +629,20 @@ static int __net_init recent_proc_net_init(struct net *net)
 
 static void __net_exit recent_proc_net_exit(struct net *net)
 {
+	struct recent_net *recent_net = recent_pernet(net);
+	struct recent_table *t;
+
+	/* recent_net_exit() is called before recent_mt_destroy(). Make sure
+	 * that the parent xt_recent proc entry is is empty before trying to
+	 * remove it.
+	 */
+	spin_lock_bh(&recent_lock);
+	list_for_each_entry(t, &recent_net->tables, list)
+	        remove_proc_entry(t->name, recent_net->xt_recent);
+
+	recent_net->xt_recent = NULL;
+	spin_unlock_bh(&recent_lock);
+
 	proc_net_remove(net, "xt_recent");
 }
 #else
@@ -638,9 +666,6 @@ static int __net_init recent_net_init(struct net *net)
 
 static void __net_exit recent_net_exit(struct net *net)
 {
-	struct recent_net *recent_net = recent_pernet(net);
-
-	BUG_ON(!list_empty(&recent_net->tables));
 	recent_proc_net_exit(net);
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c0353d55d56..74827e3b26a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2185,7 +2185,6 @@ static struct pernet_operations __net_initdata netlink_net_ops = {
 
 static int __init netlink_proto_init(void)
 {
-	struct sk_buff *dummy_skb;
 	int i;
 	unsigned long limit;
 	unsigned int order;
@@ -2194,7 +2193,7 @@ static int __init netlink_proto_init(void)
 	if (err != 0)
 		goto out;
 
-	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
 	if (!nl_table)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f996db34324..9dc537df46c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -301,7 +301,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
 	struct sk_buff *segs, *nskb;
 	int err;
 
-	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 
@@ -1989,10 +1989,9 @@ static struct pernet_operations ovs_net_ops = {
 
 static int __init dp_init(void)
 {
-	struct sk_buff *dummy_skb;
 	int err;
 
-	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	pr_info("Open vSwitch switching datapath\n");
 
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 5d460c37df0..0531de6c7a4 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -69,7 +69,6 @@ static int internal_dev_mac_addr(struct net_device *dev, void *p)
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
-	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	return 0;
 }
@@ -98,7 +97,7 @@ static int internal_dev_stop(struct net_device *netdev)
 static void internal_dev_getinfo(struct net_device *netdev,
 				 struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, "openvswitch");
+	strlcpy(info->driver, "openvswitch", sizeof(info->driver));
 }
 
 static const struct ethtool_ops internal_dev_ethtool_ops = {
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index a9327e2e48c..670cbc3518d 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -35,10 +35,11 @@
 /* Must be called with rcu_read_lock. */
 static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 {
-	if (unlikely(!vport)) {
-		kfree_skb(skb);
-		return;
-	}
+	if (unlikely(!vport))
+		goto error;
+
+	if (unlikely(skb_warn_if_lro(skb)))
+		goto error;
 
 	/* Make our own copy of the packet.  Otherwise we will mangle the
 	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
@@ -50,6 +51,10 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 
 	skb_push(skb, ETH_HLEN);
 	ovs_vport_receive(vport, skb);
+	return;
+
+error:
+	kfree_skb(skb);
 }
 
 /* Called with rcu_read_lock and bottom-halves disabled. */
@@ -169,9 +174,6 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb)
 		goto error;
 	}
 
-	if (unlikely(skb_warn_if_lro(skb)))
-		goto error;
-
 	skb->dev = netdev_vport->dev;
 	len = skb->len;
 	dev_queue_xmit(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e639645e8fe..c111bd0e083 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2361,13 +2361,15 @@ static int packet_release(struct socket *sock)
 
 	packet_flush_mclist(sk);
 
-	memset(&req_u, 0, sizeof(req_u));
-
-	if (po->rx_ring.pg_vec)
+	if (po->rx_ring.pg_vec) {
+		memset(&req_u, 0, sizeof(req_u));
 		packet_set_ring(sk, &req_u, 1, 0);
+	}
 
-	if (po->tx_ring.pg_vec)
+	if (po->tx_ring.pg_vec) {
+		memset(&req_u, 0, sizeof(req_u));
 		packet_set_ring(sk, &req_u, 1, 1);
+	}
 
 	fanout_release(sk);
 
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index a1e11627747..31b74f5e61a 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -434,12 +434,11 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
 		version = RDS_PROTOCOL_3_0;
 		while ((common >>= 1) != 0)
 			version++;
-	}
-	printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using "
-			"incompatible protocol version %u.%u\n",
-			&dp->dp_saddr,
-			dp->dp_protocol_major,
-			dp->dp_protocol_minor);
+	} else
+		printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
+				&dp->dp_saddr,
+				dp->dp_protocol_major,
+				dp->dp_protocol_minor);
 	return version;
 }
 
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8c5bc857f04..8eb9501e3d6 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -339,8 +339,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
 	sge->length = sizeof(struct rds_header);
 
 	sge = &recv->r_sge[1];
-	sge->addr = sg_dma_address(&recv->r_frag->f_sg);
-	sge->length = sg_dma_len(&recv->r_frag->f_sg);
+	sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
+	sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
 
 	ret = 0;
 out:
@@ -381,7 +381,10 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
 		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
 		rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
 			 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
-			 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
+			 (long) ib_sg_dma_address(
+				ic->i_cm_id->device,
+				&recv->r_frag->f_sg),
+			ret);
 		if (ret) {
 			rds_ib_conn_error(conn, "recv post on "
 			       "%pI4 returned %d, disconnecting and "
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 05996d0dd82..5b0fd291bab 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <linux/net.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
@@ -792,10 +793,9 @@ static const struct net_proto_family rxrpc_family_ops = {
  */
 static int __init af_rxrpc_init(void)
 {
-	struct sk_buff *dummy_skb;
 	int ret = -1;
 
-	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	rxrpc_epoch = htonl(get_seconds());
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 65d240cbf74..8579c4bb20c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -485,8 +485,9 @@ errout:
 	return err;
 }
 
-struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind)
+struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
+				    struct nlattr *est, char *name, int ovr,
+				    int bind)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -542,9 +543,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
+		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
 	else
-		err = a_o->init(nla, est, a, ovr, bind);
+		err = a_o->init(net, nla, est, a, ovr, bind);
 	if (err < 0)
 		goto err_free;
 
@@ -566,8 +567,9 @@ err_out:
 	return ERR_PTR(err);
 }
 
-struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
-				  char *name, int ovr, int bind)
+struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla,
+				  struct nlattr *est, char *name, int ovr,
+				  int bind)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
@@ -579,7 +581,7 @@ struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 		return ERR_PTR(err);
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
 		if (IS_ERR(act))
 			goto err;
 		act->order = i;
@@ -960,7 +962,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	struct tc_action *a;
 	u32 seq = n->nlmsg_seq;
 
-	act = tcf_action_init(nla, NULL, NULL, ovr, 0);
+	act = tcf_action_init(net, nla, NULL, NULL, ovr, 0);
 	if (act == NULL)
 		goto done;
 	if (IS_ERR(act)) {
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 2c8ad7c86e4..08fa1e8a4ca 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -51,7 +51,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
 };
 
-static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
+static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
 			 struct tc_action *a, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_CSUM_MAX + 1];
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 05d60859d8e..fd2b3cff5fa 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -58,8 +58,9 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 	[TCA_GACT_PROB]		= { .len = sizeof(struct tc_gact_p) },
 };
 
-static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
-			 struct tc_action *a, int ovr, int bind)
+static int tcf_gact_init(struct net *net, struct nlattr *nla,
+			 struct nlattr *est, struct tc_action *a,
+			 int ovr, int bind)
 {
 	struct nlattr *tb[TCA_GACT_MAX + 1];
 	struct tc_gact *parm;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 58fb3c7aab9..e0f6de64afe 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -102,7 +102,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 	[TCA_IPT_TARG]	= { .len = sizeof(struct xt_entry_target) },
 };
 
-static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
+static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_ipt *ipt = a->priv;
 	struct xt_action_param par;
 
-	if (skb_cloned(skb)) {
-		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-			return TC_ACT_UNSPEC;
-	}
+	if (skb_unclone(skb, GFP_ATOMIC))
+		return TC_ACT_UNSPEC;
 
 	spin_lock(&ipt->tcf_lock);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 9c0fd0c7881..5d676edc22a 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -62,8 +62,9 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
 	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
 };
 
-static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
-			   struct tc_action *a, int ovr, int bind)
+static int tcf_mirred_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action *a, int ovr,
+			   int bind)
 {
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
 	struct tc_mirred *parm;
@@ -88,7 +89,7 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
-		dev = __dev_get_by_index(&init_net, parm->ifindex);
+		dev = __dev_get_by_index(net, parm->ifindex);
 		if (dev == NULL)
 			return -ENODEV;
 		switch (dev->type) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b5d029eb44f..876f0ef2969 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -44,7 +44,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
 };
 
-static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
+static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action *a, int ovr, int bind)
 {
 	struct nlattr *tb[TCA_NAT_MAX + 1];
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 45c53ab067a..7ed78c9e505 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -38,8 +38,9 @@ static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
 	[TCA_PEDIT_PARMS]	= { .len = sizeof(struct tc_pedit) },
 };
 
-static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
-			  struct tc_action *a, int ovr, int bind)
+static int tcf_pedit_init(struct net *net, struct nlattr *nla,
+			  struct nlattr *est, struct tc_action *a,
+			  int ovr, int bind)
 {
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
 	struct tc_pedit *parm;
@@ -130,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return p->tcf_action;
 
 	off = skb_network_offset(skb);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a9de23297d4..823463adbd2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,23 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
+struct tcf_police {
+	struct tcf_common	common;
+	int			tcfp_result;
+	u32			tcfp_ewma_rate;
+	s64			tcfp_burst;
+	u32			tcfp_mtu;
+	s64			tcfp_toks;
+	s64			tcfp_ptoks;
+	s64			tcfp_mtu_ptoks;
+	s64			tcfp_t_c;
+	struct psched_ratecfg	rate;
+	bool			rate_present;
+	struct psched_ratecfg	peak;
+	bool			peak_present;
+};
+#define to_police(pc)	\
+	container_of(pc, struct tcf_police, common)
 
 #define POL_TAB_MASK     15
 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -108,10 +123,6 @@ static void tcf_police_destroy(struct tcf_police *p)
 			write_unlock_bh(&police_lock);
 			gen_kill_estimator(&p->tcf_bstats,
 					   &p->tcf_rate_est);
-			if (p->tcfp_R_tab)
-				qdisc_put_rtab(p->tcfp_R_tab);
-			if (p->tcfp_P_tab)
-				qdisc_put_rtab(p->tcfp_P_tab);
 			/*
 			 * gen_estimator est_timer() might access p->tcf_lock
 			 * or bstats, wait a RCU grace period before freeing p
@@ -130,8 +141,9 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
 };
 
-static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
-				 struct tc_action *a, int ovr, int bind)
+static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
+				 struct nlattr *est, struct tc_action *a,
+				 int ovr, int bind)
 {
 	unsigned int h;
 	int ret = 0, err;
@@ -211,26 +223,36 @@ override:
 	}
 
 	/* No failure allowed after this point */
-	if (R_tab != NULL) {
-		qdisc_put_rtab(police->tcfp_R_tab);
-		police->tcfp_R_tab = R_tab;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (R_tab)
+			police->tcfp_mtu = 255 << R_tab->rate.cell_log;
+	}
+	if (R_tab) {
+		police->rate_present = true;
+		psched_ratecfg_precompute(&police->rate, R_tab->rate.rate);
+		qdisc_put_rtab(R_tab);
+	} else {
+		police->rate_present = false;
 	}
-	if (P_tab != NULL) {
-		qdisc_put_rtab(police->tcfp_P_tab);
-		police->tcfp_P_tab = P_tab;
+	if (P_tab) {
+		police->peak_present = true;
+		psched_ratecfg_precompute(&police->peak, P_tab->rate.rate);
+		qdisc_put_rtab(P_tab);
+	} else {
+		police->peak_present = false;
 	}
 
 	if (tb[TCA_POLICE_RESULT])
 		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
-	police->tcfp_toks = police->tcfp_burst = parm->burst;
-	police->tcfp_mtu = parm->mtu;
-	if (police->tcfp_mtu == 0) {
-		police->tcfp_mtu = ~0;
-		if (police->tcfp_R_tab)
-			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
+	police->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
+	police->tcfp_toks = police->tcfp_burst;
+	if (police->peak_present) {
+		police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak,
+							     police->tcfp_mtu);
+		police->tcfp_ptoks = police->tcfp_mtu_ptoks;
 	}
-	if (police->tcfp_P_tab)
-		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
 	police->tcf_action = parm->action;
 
 	if (tb[TCA_POLICE_AVRATE])
@@ -240,7 +262,7 @@ override:
 	if (ret != ACT_P_CREATED)
 		return ret;
 
-	police->tcfp_t_c = psched_get_time();
+	police->tcfp_t_c = ktime_to_ns(ktime_get());
 	police->tcf_index = parm->index ? parm->index :
 		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -286,9 +308,9 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
 	struct tcf_police *police = a->priv;
-	psched_time_t now;
-	long toks;
-	long ptoks = 0;
+	s64 now;
+	s64 toks;
+	s64 ptoks = 0;
 
 	spin_lock(&police->tcf_lock);
 
@@ -304,24 +326,25 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
-		if (police->tcfp_R_tab == NULL) {
+		if (!police->rate_present) {
 			spin_unlock(&police->tcf_lock);
 			return police->tcfp_result;
 		}
 
-		now = psched_get_time();
-		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
-					    police->tcfp_burst);
-		if (police->tcfp_P_tab) {
+		now = ktime_to_ns(ktime_get());
+		toks = min_t(s64, now - police->tcfp_t_c,
+			     police->tcfp_burst);
+		if (police->peak_present) {
 			ptoks = toks + police->tcfp_ptoks;
-			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
-				ptoks = (long)L2T_P(police, police->tcfp_mtu);
-			ptoks -= L2T_P(police, qdisc_pkt_len(skb));
+			if (ptoks > police->tcfp_mtu_ptoks)
+				ptoks = police->tcfp_mtu_ptoks;
+			ptoks -= (s64) psched_l2t_ns(&police->peak,
+						     qdisc_pkt_len(skb));
 		}
 		toks += police->tcfp_toks;
-		if (toks > (long)police->tcfp_burst)
+		if (toks > police->tcfp_burst)
 			toks = police->tcfp_burst;
-		toks -= L2T(police, qdisc_pkt_len(skb));
+		toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
@@ -347,15 +370,15 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 		.index = police->tcf_index,
 		.action = police->tcf_action,
 		.mtu = police->tcfp_mtu,
-		.burst = police->tcfp_burst,
+		.burst = PSCHED_NS2TICKS(police->tcfp_burst),
 		.refcnt = police->tcf_refcnt - ref,
 		.bindcnt = police->tcf_bindcnt - bind,
 	};
 
-	if (police->tcfp_R_tab)
-		opt.rate = police->tcfp_R_tab->rate;
-	if (police->tcfp_P_tab)
-		opt.peakrate = police->tcfp_P_tab->rate;
+	if (police->rate_present)
+		opt.rate.rate = psched_ratecfg_getrate(&police->rate);
+	if (police->peak_present)
+		opt.peakrate.rate = psched_ratecfg_getrate(&police->peak);
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if (police->tcfp_result &&
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3714f60f0b3..7725eb4ab75 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -95,8 +95,9 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 	[TCA_DEF_DATA]	= { .type = NLA_STRING, .len = SIMP_MAX_DATA },
 };
 
-static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
-			 struct tc_action *a, int ovr, int bind)
+static int tcf_simp_init(struct net *net, struct nlattr *nla,
+			 struct nlattr *est, struct tc_action *a,
+			 int ovr, int bind)
 {
 	struct nlattr *tb[TCA_DEF_MAX + 1];
 	struct tc_defact *parm;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 476e0fac671..cb4221171f9 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -67,8 +67,9 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
 };
 
-static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
-			 struct tc_action *a, int ovr, int bind)
+static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
+			    struct nlattr *est, struct tc_action *a,
+			    int ovr, int bind)
 {
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
 	struct tc_skbedit *parm;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ff55ed6c49b..964f5e4f4b8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -321,7 +321,7 @@ replay:
 		}
 	}
 
-	err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh);
+	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
 	if (err == 0) {
 		if (tp_created) {
 			spin_lock_bh(root_lock);
@@ -508,7 +508,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
 
-int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
+int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		  struct nlattr *rate_tlv, struct tcf_exts *exts,
 		  const struct tcf_ext_map *map)
 {
@@ -519,7 +519,7 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 		struct tc_action *act;
 
 		if (map->police && tb[map->police]) {
-			act = tcf_action_init_1(tb[map->police], rate_tlv,
+			act = tcf_action_init_1(net, tb[map->police], rate_tlv,
 						"police", TCA_ACT_NOREPLACE,
 						TCA_ACT_BIND);
 			if (IS_ERR(act))
@@ -528,8 +528,9 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
 			act->type = TCA_OLD_COMPAT;
 			exts->action = act;
 		} else if (map->action && tb[map->action]) {
-			act = tcf_action_init(tb[map->action], rate_tlv, NULL,
-					      TCA_ACT_NOREPLACE, TCA_ACT_BIND);
+			act = tcf_action_init(net, tb[map->action], rate_tlv,
+					      NULL, TCA_ACT_NOREPLACE,
+					      TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 344a11b342e..d76a35d0dc8 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -132,15 +132,16 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 	[TCA_BASIC_EMATCHES]	= { .type = NLA_NESTED },
 };
 
-static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-			   unsigned long base, struct nlattr **tb,
+static int basic_set_parms(struct net *net, struct tcf_proto *tp,
+			   struct basic_filter *f, unsigned long base,
+			   struct nlattr **tb,
 			   struct nlattr *est)
 {
 	int err = -EINVAL;
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map);
 	if (err < 0)
 		return err;
 
@@ -162,7 +163,7 @@ errout:
 	return err;
 }
 
-static int basic_change(struct sk_buff *in_skb,
+static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
 			struct nlattr **tca, unsigned long *arg)
 {
@@ -182,7 +183,7 @@ static int basic_change(struct sk_buff *in_skb,
 	if (f != NULL) {
 		if (handle && f->handle != handle)
 			return -EINVAL;
-		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
+		return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
 	}
 
 	err = -ENOBUFS;
@@ -208,7 +209,7 @@ static int basic_change(struct sk_buff *in_skb,
 		f->handle = head->hgenerator;
 	}
 
-	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
+	err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 6db7855b902..3a294eb98d6 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -178,7 +178,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
 };
 
-static int cls_cgroup_change(struct sk_buff *in_skb,
+static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
 			     unsigned long *arg)
@@ -215,7 +215,8 @@ static int cls_cgroup_change(struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e,
+				&cgroup_ext_map);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index ce82d0cb1b4..aa36a8c8b33 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -351,7 +351,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
 };
 
-static int flow_change(struct sk_buff *in_skb, 
+static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
 		       unsigned long *arg)
@@ -397,7 +397,7 @@ static int flow_change(struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 	}
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 4075a0aef2a..1135d8227f9 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -192,7 +192,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 };
 
 static int
-fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
+fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	struct nlattr **tb, struct nlattr **tca, unsigned long base)
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
@@ -200,7 +200,7 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	u32 mask;
 	int err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
 	if (err < 0)
 		return err;
 
@@ -233,7 +233,7 @@ errout:
 	return err;
 }
 
-static int fw_change(struct sk_buff *in_skb,
+static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle,
 		     struct nlattr **tca,
@@ -255,7 +255,7 @@ static int fw_change(struct sk_buff *in_skb,
 	if (f != NULL) {
 		if (f->id != handle && handle)
 			return -EINVAL;
-		return fw_change_attrs(tp, f, tb, tca, base);
+		return fw_change_attrs(net, tp, f, tb, tca, base);
 	}
 
 	if (!handle)
@@ -282,7 +282,7 @@ static int fw_change(struct sk_buff *in_skb,
 
 	f->id = handle;
 
-	err = fw_change_attrs(tp, f, tb, tca, base);
+	err = fw_change_attrs(net, tp, f, tb, tca, base);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index c10d57bf98f..37da567d833 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -335,9 +335,10 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
 	[TCA_ROUTE4_IIF]	= { .type = NLA_U32 },
 };
 
-static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
-	struct route4_filter *f, u32 handle, struct route4_head *head,
-	struct nlattr **tb, struct nlattr *est, int new)
+static int route4_set_parms(struct net *net, struct tcf_proto *tp,
+			    unsigned long base, struct route4_filter *f,
+			    u32 handle, struct route4_head *head,
+			    struct nlattr **tb, struct nlattr *est, int new)
 {
 	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
@@ -346,7 +347,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	struct route4_bucket *b;
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map);
 	if (err < 0)
 		return err;
 
@@ -427,7 +428,7 @@ errout:
 	return err;
 }
 
-static int route4_change(struct sk_buff *in_skb,
+static int route4_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
@@ -457,7 +458,7 @@ static int route4_change(struct sk_buff *in_skb,
 		if (f->bkt)
 			old_handle = f->handle;
 
-		err = route4_set_parms(tp, base, f, handle, head, tb,
+		err = route4_set_parms(net, tp, base, f, handle, head, tb,
 			tca[TCA_RATE], 0);
 		if (err < 0)
 			return err;
@@ -480,7 +481,7 @@ static int route4_change(struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout;
 
-	err = route4_set_parms(tp, base, f, handle, head, tb,
+	err = route4_set_parms(net, tp, base, f, handle, head, tb,
 		tca[TCA_RATE], 1);
 	if (err < 0)
 		goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 494bbb90924..252d8b05872 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -416,7 +416,7 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
 };
 
-static int rsvp_change(struct sk_buff *in_skb,
+static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
@@ -440,7 +440,7 @@ static int rsvp_change(struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index a1293b4ab7a..b86535a4016 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -197,9 +197,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
 };
 
 static int
-tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
-		  struct tcindex_data *p, struct tcindex_filter_result *r,
-		  struct nlattr **tb, struct nlattr *est)
+tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+		  u32 handle, struct tcindex_data *p,
+		  struct tcindex_filter_result *r, struct nlattr **tb,
+		 struct nlattr *est)
 {
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -208,7 +209,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	struct tcindex_filter *f = NULL; /* make gcc behave */
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map);
 	if (err < 0)
 		return err;
 
@@ -332,7 +333,7 @@ errout:
 }
 
 static int
-tcindex_change(struct sk_buff *in_skb,
+tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
 	       struct nlattr **tca, unsigned long *arg)
 {
@@ -353,7 +354,8 @@ tcindex_change(struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
+	return tcindex_set_parms(net, tp, base, handle, p, r, tb,
+				 tca[TCA_RATE]);
 }
 
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c7c27bc91b5..eb07a1e536e 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -488,15 +488,15 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
 };
 
-static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
-			 struct tc_u_hnode *ht,
+static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+			 unsigned long base, struct tc_u_hnode *ht,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est)
 {
 	int err;
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map);
+	err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map);
 	if (err < 0)
 		return err;
 
@@ -544,7 +544,7 @@ errout:
 	return err;
 }
 
-static int u32_change(struct sk_buff *in_skb,
+static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
 		      struct nlattr **tca,
 		      unsigned long *arg)
@@ -570,7 +570,8 @@ static int u32_change(struct sk_buff *in_skb,
 		if (TC_U32_KEY(n->handle) == 0)
 			return -EINVAL;
 
-		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
+		return u32_set_parms(net, tp, base, n->ht_up, n, tb,
+				     tca[TCA_RATE]);
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
@@ -656,7 +657,7 @@ static int u32_change(struct sk_buff *in_skb,
 	}
 #endif
 
-	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
+	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]);
 	if (err == 0) {
 		struct tc_u_knode **ins;
 		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d84f7e734cd..fe1ba54b93f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -493,7 +493,7 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
-void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
 {
 	if (test_bit(__QDISC_STATE_DEACTIVATED,
 		     &qdisc_root_sleeping(wd->qdisc)->state))
@@ -502,10 +502,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	qdisc_throttled(wd->qdisc);
 
 	hrtimer_start(&wd->timer,
-		      ns_to_ktime(PSCHED_TICKS2NS(expires)),
+		      ns_to_ktime(expires),
 		      HRTIMER_MODE_ABS);
 }
-EXPORT_SYMBOL(qdisc_watchdog_schedule);
+EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d81a447851..ffad48109a2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
 
@@ -896,3 +897,39 @@ void dev_shutdown(struct net_device *dev)
 
 	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
+
+void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate)
+{
+	u64 factor;
+	u64 mult;
+	int shift;
+
+	r->rate_bps = rate << 3;
+	r->shift = 0;
+	r->mult = 1;
+	/*
+	 * Calibrate mult, shift so that token counting is accurate
+	 * for smallest packet size (64 bytes).  Token (time in ns) is
+	 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps.  It will
+	 * work as long as the smallest packet transfer time can be
+	 * accurately represented in nanosec.
+	 */
+	if (r->rate_bps > 0) {
+		/*
+		 * Higher shift gives better accuracy.  Find the largest
+		 * shift such that mult fits in 32 bits.
+		 */
+		for (shift = 0; shift < 16; shift++) {
+			r->shift = shift;
+			factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
+			mult = div64_u64(factor, r->rate_bps);
+			if (mult > UINT_MAX)
+				break;
+		}
+
+		r->shift = shift - 1;
+		factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
+		r->mult = div64_u64(factor, r->rate_bps);
+	}
+}
+EXPORT_SYMBOL(psched_ratecfg_precompute);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d2922c0ef57..03c2692ca01 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -38,6 +38,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <net/netlink.h>
+#include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
 /* HTB algorithm.
@@ -71,12 +72,6 @@ enum htb_cmode {
 	HTB_CAN_SEND		/* class can send */
 };
 
-struct htb_rate_cfg {
-	u64 rate_bps;
-	u32 mult;
-	u32 shift;
-};
-
 /* interior & leaf nodes; props specific to leaves are marked L: */
 struct htb_class {
 	struct Qdisc_class_common common;
@@ -124,8 +119,8 @@ struct htb_class {
 	int filter_cnt;
 
 	/* token bucket parameters */
-	struct htb_rate_cfg rate;
-	struct htb_rate_cfg ceil;
+	struct psched_ratecfg rate;
+	struct psched_ratecfg ceil;
 	s64 buffer, cbuffer;	/* token bucket depth/rate */
 	psched_tdiff_t mbuffer;	/* max wait time */
 	s64 tokens, ctokens;	/* current number of tokens */
@@ -168,45 +163,6 @@ struct htb_sched {
 	struct work_struct work;
 };
 
-static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len)
-{
-	return ((u64)len * r->mult) >> r->shift;
-}
-
-static void htb_precompute_ratedata(struct htb_rate_cfg *r)
-{
-	u64 factor;
-	u64 mult;
-	int shift;
-
-	r->shift = 0;
-	r->mult = 1;
-	/*
-	 * Calibrate mult, shift so that token counting is accurate
-	 * for smallest packet size (64 bytes).  Token (time in ns) is
-	 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps.  It will
-	 * work as long as the smallest packet transfer time can be
-	 * accurately represented in nanosec.
-	 */
-	if (r->rate_bps > 0) {
-		/*
-		 * Higher shift gives better accuracy.  Find the largest
-		 * shift such that mult fits in 32 bits.
-		 */
-		for (shift = 0; shift < 16; shift++) {
-			r->shift = shift;
-			factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
-			mult = div64_u64(factor, r->rate_bps);
-			if (mult > UINT_MAX)
-				break;
-		}
-
-		r->shift = shift - 1;
-		factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
-		r->mult = div64_u64(factor, r->rate_bps);
-	}
-}
-
 /* find class in global hash table using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
@@ -632,7 +588,7 @@ static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 
 	if (toks > cl->buffer)
 		toks = cl->buffer;
-	toks -= (s64) l2t_ns(&cl->rate, bytes);
+	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 	if (toks <= -cl->mbuffer)
 		toks = 1 - cl->mbuffer;
 
@@ -645,7 +601,7 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 
 	if (toks > cl->cbuffer)
 		toks = cl->cbuffer;
-	toks -= (s64) l2t_ns(&cl->ceil, bytes);
+	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 	if (toks <= -cl->mbuffer)
 		toks = 1 - cl->mbuffer;
 
@@ -919,7 +875,7 @@ ok:
 	q->now = ktime_to_ns(ktime_get());
 	start_at = jiffies;
 
-	next_event = q->now + 5 * NSEC_PER_SEC;
+	next_event = q->now + 5LLU * NSEC_PER_SEC;
 
 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 		/* common case optimization - skip event handler quickly */
@@ -1134,10 +1090,10 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 
 	memset(&opt, 0, sizeof(opt));
 
-	opt.rate.rate = cl->rate.rate_bps >> 3;
-	opt.buffer = cl->buffer;
-	opt.ceil.rate = cl->ceil.rate_bps >> 3;
-	opt.cbuffer = cl->cbuffer;
+	opt.rate.rate = psched_ratecfg_getrate(&cl->rate);
+	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
+	opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil);
+	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
 	opt.quantum = cl->quantum;
 	opt.prio = cl->prio;
 	opt.level = cl->level;
@@ -1459,8 +1415,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
-		cl->tokens = hopt->buffer;
-		cl->ctokens = hopt->cbuffer;
+		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
+		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
 		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
 		cl->t_c = psched_get_time();
 		cl->cmode = HTB_CAN_SEND;
@@ -1503,17 +1459,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			cl->prio = TC_HTB_NUMPRIO - 1;
 	}
 
-	cl->buffer = hopt->buffer;
-	cl->cbuffer = hopt->cbuffer;
-
-	cl->rate.rate_bps = (u64)hopt->rate.rate << 3;
-	cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3;
-
-	htb_precompute_ratedata(&cl->rate);
-	htb_precompute_ratedata(&cl->ceil);
+	psched_ratecfg_precompute(&cl->rate, hopt->rate.rate);
+	psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate);
 
-	cl->buffer = hopt->buffer << PSCHED_SHIFT;
-	cl->cbuffer = hopt->buffer << PSCHED_SHIFT;
+	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
+	cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
 
 	sch_tree_unlock(sch);
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 298c0ddfb57..3d2acc7a9c8 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -438,18 +438,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		if (q->rate) {
 			struct sk_buff_head *list = &sch->q;
 
-			delay += packet_len_2_sched_time(skb->len, q);
-
 			if (!skb_queue_empty(list)) {
 				/*
-				 * Last packet in queue is reference point (now).
-				 * First packet in queue is already in flight,
-				 * calculate this time bonus and substract
+				 * Last packet in queue is reference point (now),
+				 * calculate this time bonus and subtract
 				 * from delay.
 				 */
-				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+				delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now;
+				delay = max_t(psched_tdiff_t, 0, delay);
 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
 			}
+
+			delay += packet_len_2_sched_time(skb->len, q);
 		}
 
 		cb->time_to_send = now + delay;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 4b056c15e90..c8388f3c342 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <net/netlink.h>
+#include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
 
@@ -100,23 +101,21 @@
 struct tbf_sched_data {
 /* Parameters */
 	u32		limit;		/* Maximal length of backlog: bytes */
-	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
-	u32		mtu;
+	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
+	s64		mtu;
 	u32		max_size;
-	struct qdisc_rate_table	*R_tab;
-	struct qdisc_rate_table	*P_tab;
+	struct psched_ratecfg rate;
+	struct psched_ratecfg peak;
+	bool peak_present;
 
 /* Variables */
-	long	tokens;			/* Current number of B tokens */
-	long	ptokens;		/* Current number of P tokens */
-	psched_time_t	t_c;		/* Time check-point */
+	s64	tokens;			/* Current number of B tokens */
+	s64	ptokens;		/* Current number of P tokens */
+	s64	t_c;			/* Time check-point */
 	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
 	struct qdisc_watchdog watchdog;	/* Watchdog timer */
 };
 
-#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
-#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
-
 static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 	skb = q->qdisc->ops->peek(q->qdisc);
 
 	if (skb) {
-		psched_time_t now;
-		long toks;
-		long ptoks = 0;
+		s64 now;
+		s64 toks;
+		s64 ptoks = 0;
 		unsigned int len = qdisc_pkt_len(skb);
 
-		now = psched_get_time();
-		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
+		now = ktime_to_ns(ktime_get());
+		toks = min_t(s64, now - q->t_c, q->buffer);
 
-		if (q->P_tab) {
+		if (q->peak_present) {
 			ptoks = toks + q->ptokens;
-			if (ptoks > (long)q->mtu)
+			if (ptoks > q->mtu)
 				ptoks = q->mtu;
-			ptoks -= L2T_P(q, len);
+			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
 		}
 		toks += q->tokens;
-		if (toks > (long)q->buffer)
+		if (toks > q->buffer)
 			toks = q->buffer;
-		toks -= L2T(q, len);
+		toks -= (s64) psched_l2t_ns(&q->rate, len);
 
 		if ((toks|ptoks) >= 0) {
 			skb = qdisc_dequeue_peeked(q->qdisc);
@@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 			return skb;
 		}
 
-		qdisc_watchdog_schedule(&q->watchdog,
-					now + max_t(long, -toks, -ptoks));
+		qdisc_watchdog_schedule_ns(&q->watchdog,
+					   now + max_t(long, -toks, -ptoks));
 
 		/* Maybe we have a shorter packet in the queue,
 		   which can be sent now. It sounds cool,
@@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch)
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	q->t_c = psched_get_time();
+	q->t_c = ktime_to_ns(ktime_get());
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 	qdisc_watchdog_cancel(&q->watchdog);
@@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 		q->qdisc = child;
 	}
 	q->limit = qopt->limit;
-	q->mtu = qopt->mtu;
+	q->mtu = PSCHED_TICKS2NS(qopt->mtu);
 	q->max_size = max_size;
-	q->buffer = qopt->buffer;
+	q->buffer = PSCHED_TICKS2NS(qopt->buffer);
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 
-	swap(q->R_tab, rtab);
-	swap(q->P_tab, ptab);
+	psched_ratecfg_precompute(&q->rate, rtab->rate.rate);
+	if (ptab) {
+		psched_ratecfg_precompute(&q->peak, ptab->rate.rate);
+		q->peak_present = true;
+	} else {
+		q->peak_present = false;
+	}
 
 	sch_tree_unlock(sch);
 	err = 0;
@@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	q->t_c = psched_get_time();
+	q->t_c = ktime_to_ns(ktime_get());
 	qdisc_watchdog_init(&q->watchdog, sch);
 	q->qdisc = &noop_qdisc;
 
@@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
 	qdisc_watchdog_cancel(&q->watchdog);
-
-	if (q->P_tab)
-		qdisc_put_rtab(q->P_tab);
-	if (q->R_tab)
-		qdisc_put_rtab(q->R_tab);
-
 	qdisc_destroy(q->qdisc);
 }
 
@@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	opt.limit = q->limit;
-	opt.rate = q->R_tab->rate;
-	if (q->P_tab)
-		opt.peakrate = q->P_tab->rate;
+	opt.rate.rate = psched_ratecfg_getrate(&q->rate);
+	if (q->peak_present)
+		opt.peakrate.rate = psched_ratecfg_getrate(&q->peak);
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
-	opt.mtu = q->mtu;
-	opt.buffer = q->buffer;
+	opt.mtu = PSCHED_NS2TICKS(q->mtu);
+	opt.buffer = PSCHED_NS2TICKS(q->buffer);
 	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index c26210618e1..7521d944c0f 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -68,7 +68,7 @@ config SCTP_DBG_OBJCNT
 	  If unsure, say N
 choice
 	prompt "Default SCTP cookie HMAC encoding"
-	default SCTP_COOKIE_HMAC_MD5
+	default SCTP_DEFAULT_COOKIE_HMAC_MD5
 	help
 	  This option sets the default sctp cookie hmac algorithm
 	  when in doubt select 'md5'
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b45ed1f9692..2f95f5a5145 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -434,8 +434,7 @@ void sctp_association_free(struct sctp_association *asoc)
 	 * on our state.
 	 */
 	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) {
-		if (timer_pending(&asoc->timers[i]) &&
-		    del_timer(&asoc->timers[i]))
+		if (del_timer(&asoc->timers[i]))
 			sctp_association_put(asoc);
 	}
 
@@ -1497,7 +1496,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
 
 		/* Stop the SACK timer.  */
 		timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
-		if (timer_pending(timer) && del_timer(timer))
+		if (del_timer(timer))
 			sctp_association_put(asoc);
 	}
 }
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 159b9bc5d63..ba1dfc3f8de 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -71,7 +71,7 @@ void sctp_auth_key_put(struct sctp_auth_bytes *key)
 		return;
 
 	if (atomic_dec_and_test(&key->refcnt)) {
-		kfree(key);
+		kzfree(key);
 		SCTP_DBG_OBJCNT_DEC(keys);
 	}
 }
@@ -200,27 +200,28 @@ static struct sctp_auth_bytes *sctp_auth_make_key_vector(
 	struct sctp_auth_bytes *new;
 	__u32	len;
 	__u32	offset = 0;
+	__u16	random_len, hmacs_len, chunks_len = 0;
 
-	len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length);
-        if (chunks)
-		len += ntohs(chunks->param_hdr.length);
+	random_len = ntohs(random->param_hdr.length);
+	hmacs_len = ntohs(hmacs->param_hdr.length);
+	if (chunks)
+		chunks_len = ntohs(chunks->param_hdr.length);
 
-	new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp);
+	len = random_len + hmacs_len + chunks_len;
+
+	new = sctp_auth_create_key(len, gfp);
 	if (!new)
 		return NULL;
 
-	new->len = len;
-
-	memcpy(new->data, random, ntohs(random->param_hdr.length));
-	offset += ntohs(random->param_hdr.length);
+	memcpy(new->data, random, random_len);
+	offset += random_len;
 
 	if (chunks) {
-		memcpy(new->data + offset, chunks,
-			ntohs(chunks->param_hdr.length));
-		offset += ntohs(chunks->param_hdr.length);
+		memcpy(new->data + offset, chunks, chunks_len);
+		offset += chunks_len;
 	}
 
-	memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length));
+	memcpy(new->data + offset, hmacs, hmacs_len);
 
 	return new;
 }
@@ -350,8 +351,8 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret(
 	secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector,
 					    gfp);
 out:
-	kfree(local_key_vector);
-	kfree(peer_key_vector);
+	sctp_auth_key_put(local_key_vector);
+	sctp_auth_key_put(peer_key_vector);
 
 	return secret;
 }
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 17a001bac2c..73aad3d16a4 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -151,9 +151,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	ep->rcvbuf_policy = net->sctp.rcvbuf_policy;
 
 	/* Initialize the secret key used with cookie. */
-	get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE);
-	ep->last_key = ep->current_key = 0;
-	ep->key_changed_at = jiffies;
+	get_random_bytes(ep->secret_key, sizeof(ep->secret_key));
 
 	/* SCTP-AUTH extensions*/
 	INIT_LIST_HEAD(&ep->endpoint_shared_keys);
@@ -271,6 +269,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 	sctp_inq_free(&ep->base.inqueue);
 	sctp_bind_addr_free(&ep->base.bind_addr);
 
+	memset(ep->secret_key, 0, sizeof(ep->secret_key));
+
 	/* Remove and free the port */
 	if (sctp_sk(ep->base.sk)->bind_hash)
 		sctp_put_port(ep->base.sk);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 8bd3c279427..965bbbbe48d 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -468,8 +468,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 	} else {
 		struct net *net = sock_net(sk);
 
-		if (timer_pending(&t->proto_unreach_timer) &&
-		    del_timer(&t->proto_unreach_timer))
+		if (del_timer(&t->proto_unreach_timer))
 			sctp_association_put(asoc);
 
 		sctp_do_sm(net, SCTP_EVENT_T_OTHER,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 379c81dee9d..01dca753db1 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -224,7 +224,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 
 /* Free the outqueue structure and any related pending chunks.
  */
-void sctp_outq_teardown(struct sctp_outq *q)
+static void __sctp_outq_teardown(struct sctp_outq *q)
 {
 	struct sctp_transport *transport;
 	struct list_head *lchunk, *temp;
@@ -277,8 +277,6 @@ void sctp_outq_teardown(struct sctp_outq *q)
 		sctp_chunk_free(chunk);
 	}
 
-	q->error = 0;
-
 	/* Throw away any leftover control chunks. */
 	list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
 		list_del_init(&chunk->list);
@@ -286,11 +284,17 @@ void sctp_outq_teardown(struct sctp_outq *q)
 	}
 }
 
+void sctp_outq_teardown(struct sctp_outq *q)
+{
+	__sctp_outq_teardown(q);
+	sctp_outq_init(q->asoc, q);
+}
+
 /* Free the outqueue structure and any related pending chunks.  */
 void sctp_outq_free(struct sctp_outq *q)
 {
 	/* Throw away leftover chunks. */
-	sctp_outq_teardown(q);
+	__sctp_outq_teardown(q);
 
 	/* If we were kmalloc()'d, free the memory.  */
 	if (q->malloced)
@@ -1696,10 +1700,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 		 * address.
 		 */
 		if (!transport->flight_size) {
-			if (timer_pending(&transport->T3_rtx_timer) &&
-			    del_timer(&transport->T3_rtx_timer)) {
+			if (del_timer(&transport->T3_rtx_timer))
 				sctp_transport_put(transport);
-			}
 		} else if (restart_timer) {
 			if (!mod_timer(&transport->T3_rtx_timer,
 				       jiffies + transport->rto))
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 5f7518de2fd..261b7b9858a 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -122,12 +122,12 @@ static const struct file_operations sctpprobe_fops = {
 	.llseek = noop_llseek,
 };
 
-sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const sctp_subtype_t type,
+					    void *arg,
+					    sctp_cmd_seq_t *commands)
 {
 	struct sctp_transport *sp;
 	static __u32 lcwnd = 0;
@@ -183,6 +183,13 @@ static __init int sctpprobe_init(void)
 {
 	int ret = -ENOMEM;
 
+	/* Warning: if the function signature of sctp_sf_eat_sack_6_2,
+	 * has been changed, you also have to change the signature of
+	 * jsctp_sf_eat_sack, otherwise you end up right here!
+	 */
+	BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2,
+				 jsctp_sf_eat_sack) == 0);
+
 	init_waitqueue_head(&sctpw.wait);
 	spin_lock_init(&sctpw.lock);
 	if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f898b1c58bd..1c2e46cb919 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -595,7 +595,7 @@ static void sctp_v4_ecn_capable(struct sock *sk)
 	INET_ECN_xmit(sk);
 }
 
-void sctp_addr_wq_timeout_handler(unsigned long arg)
+static void sctp_addr_wq_timeout_handler(unsigned long arg)
 {
 	struct net *net = (struct net *)arg;
 	struct sctp_sockaddr_entry *addrw, *temp;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e1c5fc2be6b..a193f3bc814 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1589,8 +1589,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 	struct sctp_signed_cookie *cookie;
 	struct scatterlist sg;
 	int headersize, bodysize;
-	unsigned int keylen;
-	char *key;
 
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
@@ -1650,12 +1648,11 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 
 		/* Sign the message.  */
 		sg_init_one(&sg, &cookie->c, bodysize);
-		keylen = SCTP_SECRET_SIZE;
-		key = (char *)ep->secret_key[ep->current_key];
 		desc.tfm = sctp_sk(ep->base.sk)->hmac;
 		desc.flags = 0;
 
-		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+		if (crypto_hash_setkey(desc.tfm, ep->secret_key,
+				       sizeof(ep->secret_key)) ||
 		    crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
 			goto free_cookie;
 	}
@@ -1682,8 +1679,7 @@ struct sctp_association *sctp_unpack_cookie(
 	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
 	struct scatterlist sg;
-	unsigned int keylen, len;
-	char *key;
+	unsigned int len;
 	sctp_scope_t scope;
 	struct sk_buff *skb = chunk->skb;
 	struct timeval tv;
@@ -1718,34 +1714,21 @@ struct sctp_association *sctp_unpack_cookie(
 		goto no_hmac;
 
 	/* Check the signature.  */
-	keylen = SCTP_SECRET_SIZE;
 	sg_init_one(&sg, bear_cookie, bodysize);
-	key = (char *)ep->secret_key[ep->current_key];
 	desc.tfm = sctp_sk(ep->base.sk)->hmac;
 	desc.flags = 0;
 
 	memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-	if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+	if (crypto_hash_setkey(desc.tfm, ep->secret_key,
+			       sizeof(ep->secret_key)) ||
 	    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
 		*error = -SCTP_IERROR_NOMEM;
 		goto fail;
 	}
 
 	if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
-		/* Try the previous key. */
-		key = (char *)ep->secret_key[ep->last_key];
-		memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
-		    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
-			*error = -SCTP_IERROR_NOMEM;
-			goto fail;
-		}
-
-		if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
-			/* Yikes!  Still bad signature! */
-			*error = -SCTP_IERROR_BAD_SIG;
-			goto fail;
-		}
+		*error = -SCTP_IERROR_BAD_SIG;
+		goto fail;
 	}
 
 no_hmac:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index c9577754a70..8aab894aeab 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -674,10 +674,8 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
 
 	list_for_each_entry(t, &asoc->peer.transport_addr_list,
 			transports) {
-		if (timer_pending(&t->T3_rtx_timer) &&
-		    del_timer(&t->T3_rtx_timer)) {
+		if (del_timer(&t->T3_rtx_timer))
 			sctp_transport_put(t);
-		}
 	}
 }
 
@@ -1517,7 +1515,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 
 		case SCTP_CMD_TIMER_STOP:
 			timer = &asoc->timers[cmd->obj.to];
-			if (timer_pending(timer) && del_timer(timer))
+			if (del_timer(timer))
 				sctp_association_put(asoc);
 			break;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 618ec7e216c..5131fcfedb0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1779,8 +1779,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 
 	/* Update the content of current association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem_ev:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9e65758cb03..cedd9bf67b8 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3390,7 +3390,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 
 	ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey);
 out:
-	kfree(authkey);
+	kzfree(authkey);
 	return ret;
 }
 
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 043889ac86c..bf3c6e8fc40 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -366,7 +366,11 @@ int sctp_sysctl_net_register(struct net *net)
 
 void sctp_sysctl_net_unregister(struct net *net)
 {
+	struct ctl_table *table;
+
+	table = net->sctp.sysctl_header->ctl_table_arg;
 	unregister_net_sysctl_table(net->sctp.sysctl_header);
+	kfree(table);
 }
 
 static struct ctl_table_header * sctp_sysctl_header;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4e45bb68aef..fafd2a461ba 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -151,13 +151,11 @@ void sctp_transport_free(struct sctp_transport *transport)
 	 * structure hang around in memory since we know
 	 * the tranport is going away.
 	 */
-	if (timer_pending(&transport->T3_rtx_timer) &&
-	    del_timer(&transport->T3_rtx_timer))
+	if (del_timer(&transport->T3_rtx_timer))
 		sctp_transport_put(transport);
 
 	/* Delete the ICMP proto unreachable timer if it's active. */
-	if (timer_pending(&transport->proto_unreach_timer) &&
-	    del_timer(&transport->proto_unreach_timer))
+	if (del_timer(&transport->proto_unreach_timer))
 		sctp_association_put(transport->asoc);
 
 	sctp_transport_put(transport);
@@ -168,10 +166,6 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
 	struct sctp_transport *transport;
 
 	transport = container_of(head, struct sctp_transport, rcu);
-	if (transport->asoc)
-		sctp_association_put(transport->asoc);
-
-	sctp_packet_free(&transport->packet);
 
 	dst_release(transport->dst);
 	kfree(transport);
@@ -186,6 +180,11 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 	SCTP_ASSERT(transport->dead, "Transport is not dead", return);
 
 	call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
+
+	sctp_packet_free(&transport->packet);
+
+	if (transport->asoc)
+		sctp_association_put(transport->asoc);
 }
 
 /* Start T3_rtx timer if it is not already running and update the heartbeat
@@ -654,10 +653,9 @@ void sctp_transport_reset(struct sctp_transport *t)
 void sctp_transport_immediate_rtx(struct sctp_transport *t)
 {
 	/* Stop pending T3_rtx_timer */
-	if (timer_pending(&t->T3_rtx_timer)) {
-		(void)del_timer(&t->T3_rtx_timer);
+	if (del_timer(&t->T3_rtx_timer))
 		sctp_transport_put(t);
-	}
+
 	sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX);
 	if (!timer_pending(&t->T3_rtx_timer)) {
 		if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
diff --git a/net/socket.c b/net/socket.c
index 2ca51c719ef..ee0d029e513 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -69,7 +69,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
-#include <linux/wanrouter.h>
 #include <linux/if_bridge.h>
 #include <linux/if_frad.h>
 #include <linux/if_vlan.h>
@@ -2838,7 +2837,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 	}
 
 	ifr = compat_alloc_user_space(buf_size);
-	rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
+	rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
 
 	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
 		return -EFAULT;
@@ -2862,12 +2861,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
 
 		if (copy_in_user(rxnfc, compat_rxnfc,
-				 (void *)(&rxnfc->fs.m_ext + 1) -
-				 (void *)rxnfc) ||
+				 (void __user *)(&rxnfc->fs.m_ext + 1) -
+				 (void __user *)rxnfc) ||
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
-				 (void *)(&rxnfc->fs.location + 1) -
-				 (void *)&rxnfc->fs.ring_cookie) ||
+				 (void __user *)(&rxnfc->fs.location + 1) -
+				 (void __user *)&rxnfc->fs.ring_cookie) ||
 		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
 				 sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
@@ -2879,12 +2878,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 
 	if (convert_out) {
 		if (copy_in_user(compat_rxnfc, rxnfc,
-				 (const void *)(&rxnfc->fs.m_ext + 1) -
-				 (const void *)rxnfc) ||
+				 (const void __user *)(&rxnfc->fs.m_ext + 1) -
+				 (const void __user *)rxnfc) ||
 		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
 				 &rxnfc->fs.ring_cookie,
-				 (const void *)(&rxnfc->fs.location + 1) -
-				 (const void *)&rxnfc->fs.ring_cookie) ||
+				 (const void __user *)(&rxnfc->fs.location + 1) -
+				 (const void __user *)&rxnfc->fs.ring_cookie) ||
 		    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
 				 sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 822f020fa7f..507b5e84fbd 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -555,7 +555,7 @@ EXPORT_SYMBOL_GPL(rpc_clone_client);
  * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth
  *
  * @clnt: RPC client whose parameters are copied
- * @auth: security flavor for new client
+ * @flavor: security flavor for new client
  *
  * Returns a fresh RPC client or an ERR_PTR.
  */
@@ -610,11 +610,6 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks);
  */
 void rpc_shutdown_client(struct rpc_clnt *clnt)
 {
-	/*
-	 * To avoid deadlock, never call rpc_shutdown_client from a
-	 * workqueue context!
-	 */
-	WARN_ON_ONCE(current->flags & PF_WQ_WORKER);
 	might_sleep();
 
 	dprintk_rcu("RPC:       shutting down %s client for %s\n",
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d17a704aaf5..fb20f25ddec 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -98,9 +98,25 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
+{
+	struct list_head *q = &queue->tasks[queue->priority];
+	struct rpc_task *task;
+
+	if (!list_empty(q)) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		if (task->tk_owner == queue->owner)
+			list_move_tail(&task->u.tk_wait.list, q);
+	}
+}
+
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
-	queue->priority = priority;
+	if (queue->priority != priority) {
+		/* Fairness: rotate the list when changing priority */
+		rpc_rotate_queue_owner(queue);
+		queue->priority = priority;
+	}
 }
 
 static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
@@ -934,16 +950,35 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 	return task;
 }
 
+/*
+ * rpc_free_task - release rpc task and perform cleanups
+ *
+ * Note that we free up the rpc_task _after_ rpc_release_calldata()
+ * in order to work around a workqueue dependency issue.
+ *
+ * Tejun Heo states:
+ * "Workqueue currently considers two work items to be the same if they're
+ * on the same address and won't execute them concurrently - ie. it
+ * makes a work item which is queued again while being executed wait
+ * for the previous execution to complete.
+ *
+ * If a work function frees the work item, and then waits for an event
+ * which should be performed by another work item and *that* work item
+ * recycles the freed work item, it can create a false dependency loop.
+ * There really is no reliable way to detect this short of verifying
+ * every memory free."
+ *
+ */
 static void rpc_free_task(struct rpc_task *task)
 {
-	const struct rpc_call_ops *tk_ops = task->tk_ops;
-	void *calldata = task->tk_calldata;
+	unsigned short tk_flags = task->tk_flags;
+
+	rpc_release_calldata(task->tk_ops, task->tk_calldata);
 
-	if (task->tk_flags & RPC_TASK_DYNAMIC) {
+	if (tk_flags & RPC_TASK_DYNAMIC) {
 		dprintk("RPC: %5u freeing task\n", task->tk_pid);
 		mempool_free(task, rpc_task_mempool);
 	}
-	rpc_release_calldata(tk_ops, calldata);
 }
 
 static void rpc_async_release(struct work_struct *work)
@@ -953,8 +988,7 @@ static void rpc_async_release(struct work_struct *work)
 
 static void rpc_release_resources_task(struct rpc_task *task)
 {
-	if (task->tk_rqstp)
-		xprt_release(task);
+	xprt_release(task);
 	if (task->tk_msg.rpc_cred) {
 		put_rpccred(task->tk_msg.rpc_cred);
 		task->tk_msg.rpc_cred = NULL;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0a148c9d2a5..0f679df7d07 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -465,7 +465,7 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
 }
 
 /*
- * See net/ipv6/datagram.c : datagram_recv_ctl
+ * See net/ipv6/datagram.c : ip6_datagram_recv_ctl
  */
 static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
 				     struct cmsghdr *cmh)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index bd462a532ac..33811db8788 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1136,10 +1136,18 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 void xprt_release(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt;
-	struct rpc_rqst	*req;
+	struct rpc_rqst	*req = task->tk_rqstp;
 
-	if (!(req = task->tk_rqstp))
+	if (req == NULL) {
+		if (task->tk_client) {
+			rcu_read_lock();
+			xprt = rcu_dereference(task->tk_client->cl_xprt);
+			if (xprt->snd_task == task)
+				xprt_release_write(xprt, task);
+			rcu_read_unlock();
+		}
 		return;
+	}
 
 	xprt = req->rq_xprt;
 	if (task->tk_ops->rpc_count_stats != NULL)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b4e4833a48..a9622b6cd91 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -43,7 +43,8 @@
 #define SS_LISTENING	-1	/* socket is listening */
 #define SS_READY	-2	/* socket is connectionless */
 
-#define OVERLOAD_LIMIT_BASE	10000
+#define CONN_OVERLOAD_LIMIT	((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
+				SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
 #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
 
 struct tipc_sock {
@@ -129,19 +130,6 @@ static void advance_rx_queue(struct sock *sk)
 }
 
 /**
- * discard_rx_queue - discard all buffers in socket receive queue
- *
- * Caller must hold socket lock
- */
-static void discard_rx_queue(struct sock *sk)
-{
-	struct sk_buff *buf;
-
-	while ((buf = __skb_dequeue(&sk->sk_receive_queue)))
-		kfree_skb(buf);
-}
-
-/**
  * reject_rx_queue - reject all buffers in socket receive queue
  *
  * Caller must hold socket lock
@@ -215,7 +203,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
 
 	sock_init_data(sock, sk);
 	sk->sk_backlog_rcv = backlog_rcv;
-	sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2;
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
 	tipc_sk(sk)->p = tp_ptr;
@@ -292,7 +279,7 @@ static int release(struct socket *sock)
 	res = tipc_deleteport(tport->ref);
 
 	/* Discard any remaining (connection-based) messages in receive queue */
-	discard_rx_queue(sk);
+	__skb_queue_purge(&sk->sk_receive_queue);
 
 	/* Reject any messages that accumulated in backlog queue */
 	sock->state = SS_DISCONNECTING;
@@ -516,8 +503,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 		     (dest->family != AF_TIPC)))
 		return -EINVAL;
-	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX))
+	if (total_len > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
 	if (iocb)
@@ -625,8 +611,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 	if (unlikely(dest))
 		return send_msg(iocb, sock, m, total_len);
 
-	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX))
+	if (total_len > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
 	if (iocb)
@@ -711,8 +696,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
-	if ((total_len > (unsigned int)INT_MAX) ||
-	    (m->msg_iovlen > (unsigned int)INT_MAX)) {
+	if (total_len > (unsigned int)INT_MAX) {
 		res = -EMSGSIZE;
 		goto exit;
 	}
@@ -1155,34 +1139,6 @@ static void tipc_data_ready(struct sock *sk, int len)
 }
 
 /**
- * rx_queue_full - determine if receive queue can accept another message
- * @msg: message to be added to queue
- * @queue_size: current size of queue
- * @base: nominal maximum size of queue
- *
- * Returns 1 if queue is unable to accept message, 0 otherwise
- */
-static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
-{
-	u32 threshold;
-	u32 imp = msg_importance(msg);
-
-	if (imp == TIPC_LOW_IMPORTANCE)
-		threshold = base;
-	else if (imp == TIPC_MEDIUM_IMPORTANCE)
-		threshold = base * 2;
-	else if (imp == TIPC_HIGH_IMPORTANCE)
-		threshold = base * 100;
-	else
-		return 0;
-
-	if (msg_connected(msg))
-		threshold *= 4;
-
-	return queue_size >= threshold;
-}
-
-/**
  * filter_connect - Handle all incoming messages for a connection-based socket
  * @tsock: TIPC socket
  * @msg: message
@@ -1260,6 +1216,36 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
 }
 
 /**
+ * rcvbuf_limit - get proper overload limit of socket receive queue
+ * @sk: socket
+ * @buf: message
+ *
+ * For all connection oriented messages, irrespective of importance,
+ * the default overload value (i.e. 67MB) is set as limit.
+ *
+ * For all connectionless messages, by default new queue limits are
+ * as belows:
+ *
+ * TIPC_LOW_IMPORTANCE       (5MB)
+ * TIPC_MEDIUM_IMPORTANCE    (10MB)
+ * TIPC_HIGH_IMPORTANCE      (20MB)
+ * TIPC_CRITICAL_IMPORTANCE  (40MB)
+ *
+ * Returns overload limit according to corresponding message importance
+ */
+static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	unsigned int limit;
+
+	if (msg_connected(msg))
+		limit = CONN_OVERLOAD_LIMIT;
+	else
+		limit = sk->sk_rcvbuf << (msg_importance(msg) + 5);
+	return limit;
+}
+
+/**
  * filter_rcv - validate incoming message
  * @sk: socket
  * @buf: message
@@ -1275,7 +1261,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_msg *msg = buf_msg(buf);
-	u32 recv_q_len;
+	unsigned int limit = rcvbuf_limit(sk, buf);
 	u32 res = TIPC_OK;
 
 	/* Reject message if it is wrong sort of message for socket */
@@ -1292,15 +1278,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 	}
 
 	/* Reject message if there isn't room to queue it */
-	recv_q_len = skb_queue_len(&sk->sk_receive_queue);
-	if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
-		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
-			return TIPC_ERR_OVERLOAD;
-	}
+	if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
+		return TIPC_ERR_OVERLOAD;
 
-	/* Enqueue message (finally!) */
+	/* Enqueue message */
 	TIPC_SKB_CB(buf)->handle = 0;
 	__skb_queue_tail(&sk->sk_receive_queue, buf);
+	skb_set_owner_r(buf, sk);
 
 	sk->sk_data_ready(sk, 0);
 	return TIPC_OK;
@@ -1349,7 +1333,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog(sk, buf, sk->sk_rcvbuf))
+		if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
@@ -1583,6 +1567,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 	} else {
 		__skb_dequeue(&sk->sk_receive_queue);
 		__skb_queue_head(&new_sk->sk_receive_queue, buf);
+		skb_set_owner_r(buf, new_sk);
 	}
 	release_sock(new_sk);
 
@@ -1637,7 +1622,7 @@ restart:
 	case SS_DISCONNECTING:
 
 		/* Discard any unreceived messages */
-		discard_rx_queue(sk);
+		__skb_queue_purge(&sk->sk_receive_queue);
 
 		/* Wake up anyone sleeping in poll */
 		sk->sk_state_change(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5b5c876c80e..0c612361c15 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2426,9 +2426,8 @@ static struct pernet_operations unix_net_ops = {
 static int __init af_unix_init(void)
 {
 	int rc = -1;
-	struct sk_buff *dummy_skb;
 
-	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
+	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
 
 	rc = proto_register(&unix_proto, 1);
 	if (rc != 0) {
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
new file mode 100644
index 00000000000..b5fa7e40cdc
--- /dev/null
+++ b/net/vmw_vsock/Kconfig
@@ -0,0 +1,28 @@
+#
+# Vsock protocol
+#
+
+config VSOCKETS
+	tristate "Virtual Socket protocol"
+	help
+	  Virtual Socket Protocol is a socket protocol similar to TCP/IP
+	  allowing comunication between Virtual Machines and hypervisor
+	  or host.
+
+	  You should also select one or more hypervisor-specific transports
+	  below.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vsock. If unsure, say N.
+
+config VMWARE_VMCI_VSOCKETS
+	tristate "VMware VMCI transport for Virtual Sockets"
+	depends on VSOCKETS && VMWARE_VMCI
+	help
+	  This module implements a VMCI transport for Virtual Sockets.
+
+	  Enable this transport if your Virtual Machine runs on a VMware
+	  hypervisor.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vmw_vsock_vmci_transport. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
new file mode 100644
index 00000000000..2ce52d70f22
--- /dev/null
+++ b/net/vmw_vsock/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+
+vsock-y += af_vsock.o vsock_addr.o
+
+vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
+	vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
new file mode 100644
index 00000000000..54bb7bdf92d
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.c
@@ -0,0 +1,2015 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+/* Implementation notes:
+ *
+ * - There are two kinds of sockets: those created by user action (such as
+ * calling socket(2)) and those created by incoming connection request packets.
+ *
+ * - There are two "global" tables, one for bound sockets (sockets that have
+ * specified an address that they are responsible for) and one for connected
+ * sockets (sockets that have established a connection with another socket).
+ * These tables are "global" in that all sockets on the system are placed
+ * within them. - Note, though, that the bound table contains an extra entry
+ * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
+ * that list. The bound table is used solely for lookup of sockets when packets
+ * are received and that's not necessary for SOCK_DGRAM sockets since we create
+ * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
+ * sockets out of the bound hash buckets will reduce the chance of collisions
+ * when looking for SOCK_STREAM sockets and prevents us from having to check the
+ * socket type in the hash table lookups.
+ *
+ * - Sockets created by user action will either be "client" sockets that
+ * initiate a connection or "server" sockets that listen for connections; we do
+ * not support simultaneous connects (two "client" sockets connecting).
+ *
+ * - "Server" sockets are referred to as listener sockets throughout this
+ * implementation because they are in the SS_LISTEN state.  When a connection
+ * request is received (the second kind of socket mentioned above), we create a
+ * new socket and refer to it as a pending socket.  These pending sockets are
+ * placed on the pending connection list of the listener socket.  When future
+ * packets are received for the address the listener socket is bound to, we
+ * check if the source of the packet is from one that has an existing pending
+ * connection.  If it does, we process the packet for the pending socket.  When
+ * that socket reaches the connected state, it is removed from the listener
+ * socket's pending list and enqueued in the listener socket's accept queue.
+ * Callers of accept(2) will accept connected sockets from the listener socket's
+ * accept queue.  If the socket cannot be accepted for some reason then it is
+ * marked rejected.  Once the connection is accepted, it is owned by the user
+ * process and the responsibility for cleanup falls with that user process.
+ *
+ * - It is possible that these pending sockets will never reach the connected
+ * state; in fact, we may never receive another packet after the connection
+ * request.  Because of this, we must schedule a cleanup function to run in the
+ * future, after some amount of time passes where a connection should have been
+ * established.  This function ensures that the socket is off all lists so it
+ * cannot be retrieved, then drops all references to the socket so it is cleaned
+ * up (sock_put() -> sk_free() -> our sk_destruct implementation).  Note this
+ * function will also cleanup rejected sockets, those that reach the connected
+ * state but leave it before they have been accepted.
+ *
+ * - Sockets created by user action will be cleaned up when the user process
+ * calls close(2), causing our release implementation to be called. Our release
+ * implementation will perform some cleanup then drop the last reference so our
+ * sk_destruct implementation is invoked.  Our sk_destruct implementation will
+ * perform additional cleanup that's common for both types of sockets.
+ *
+ * - A socket's reference count is what ensures that the structure won't be
+ * freed.  Each entry in a list (such as the "global" bound and connected tables
+ * and the listener socket's pending list and connected queue) ensures a
+ * reference.  When we defer work until process context and pass a socket as our
+ * argument, we must ensure the reference count is increased to ensure the
+ * socket isn't freed before the function is run; the deferred function will
+ * then drop the reference.
+ */
+
+#include <linux/types.h>
+
+#define EXPORT_SYMTAB
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vsock_version.h"
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
+static void vsock_sk_destruct(struct sock *sk);
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+/* Protocol family. */
+static struct proto vsock_proto = {
+	.name = "AF_VSOCK",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct vsock_sock),
+};
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+static const struct vsock_transport *transport;
+static DEFINE_MUTEX(vsock_register_mutex);
+
+/**** EXPORTS ****/
+
+/* Get the ID of the local context.  This is transport dependent. */
+
+int vm_sockets_get_local_cid(void)
+{
+	return transport->get_local_cid();
+}
+EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
+
+/**** UTILS ****/
+
+/* Each bound VSocket is stored in the bind hash table and each connected
+ * VSocket is stored in the connected hash table.
+ *
+ * Unbound sockets are all put on the same list attached to the end of the hash
+ * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
+ * the bucket that their local address hashes to (vsock_bound_sockets(addr)
+ * represents the list that addr hashes to).
+ *
+ * Specifically, we initialize the vsock_bind_table array to a size of
+ * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
+ * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
+ * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
+ * mods with VSOCK_HASH_SIZE - 1 to ensure this.
+ */
+#define VSOCK_HASH_SIZE         251
+#define MAX_PORT_RETRIES        24
+
+#define VSOCK_HASH(addr)        ((addr)->svm_port % (VSOCK_HASH_SIZE - 1))
+#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
+#define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
+
+/* XXX This can probably be implemented in a better way. */
+#define VSOCK_CONN_HASH(src, dst)				\
+	(((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1))
+#define vsock_connected_sockets(src, dst)		\
+	(&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
+#define vsock_connected_sockets_vsk(vsk)				\
+	vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
+
+static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+static DEFINE_SPINLOCK(vsock_table_lock);
+
+static __init void vsock_init_tables(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
+		INIT_LIST_HEAD(&vsock_bind_table[i]);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
+		INIT_LIST_HEAD(&vsock_connected_table[i]);
+}
+
+static void __vsock_insert_bound(struct list_head *list,
+				 struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->bound_table, list);
+}
+
+static void __vsock_insert_connected(struct list_head *list,
+				     struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->connected_table, list);
+}
+
+static void __vsock_remove_bound(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->bound_table);
+	sock_put(&vsk->sk);
+}
+
+static void __vsock_remove_connected(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->connected_table);
+	sock_put(&vsk->sk);
+}
+
+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
+		if (vsock_addr_equals_addr_any(addr, &vsk->local_addr))
+			return sk_vsock(vsk);
+
+	return NULL;
+}
+
+static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
+						  struct sockaddr_vm *dst)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
+			    connected_table) {
+		if (vsock_addr_equals_addr(src, &vsk->remote_addr)
+		    && vsock_addr_equals_addr(dst, &vsk->local_addr)) {
+			return sk_vsock(vsk);
+		}
+	}
+
+	return NULL;
+}
+
+static bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->bound_table);
+}
+
+static bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->connected_table);
+}
+
+static void vsock_insert_unbound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_bound(vsock_unbound_sockets, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+
+void vsock_insert_connected(struct vsock_sock *vsk)
+{
+	struct list_head *list = vsock_connected_sockets(
+		&vsk->remote_addr, &vsk->local_addr);
+
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_connected(list, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_insert_connected);
+
+void vsock_remove_bound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_bound(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_bound);
+
+void vsock_remove_connected(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_connected(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_connected);
+
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_bound_socket(addr);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
+
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_connected_socket(src, dst);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
+
+static bool vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_bound_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+static bool vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_connected_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+{
+	int i;
+
+	spin_lock_bh(&vsock_table_lock);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
+		struct vsock_sock *vsk;
+		list_for_each_entry(vsk, &vsock_connected_table[i],
+				    connected_table);
+			fn(sk_vsock(vsk));
+	}
+
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
+
+void vsock_add_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+
+	vlistener = vsock_sk(listener);
+	vpending = vsock_sk(pending);
+
+	sock_hold(pending);
+	sock_hold(listener);
+	list_add_tail(&vpending->pending_links, &vlistener->pending_links);
+}
+EXPORT_SYMBOL_GPL(vsock_add_pending);
+
+void vsock_remove_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vpending = vsock_sk(pending);
+
+	list_del_init(&vpending->pending_links);
+	sock_put(listener);
+	sock_put(pending);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_pending);
+
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+	vconnected = vsock_sk(connected);
+
+	sock_hold(connected);
+	sock_hold(listener);
+	list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
+}
+EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+
+static struct sock *vsock_dequeue_accept(struct sock *listener)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+
+	if (list_empty(&vlistener->accept_queue))
+		return NULL;
+
+	vconnected = list_entry(vlistener->accept_queue.next,
+				struct vsock_sock, accept_queue);
+
+	list_del_init(&vconnected->accept_queue);
+	sock_put(listener);
+	/* The caller will need a reference on the connected socket so we let
+	 * it call sock_put().
+	 */
+
+	return sk_vsock(vconnected);
+}
+
+static bool vsock_is_accept_queue_empty(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return list_empty(&vsk->accept_queue);
+}
+
+static bool vsock_is_pending(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return !list_empty(&vsk->pending_links);
+}
+
+static int vsock_send_shutdown(struct sock *sk, int mode)
+{
+	return transport->shutdown(vsock_sk(sk), mode);
+}
+
+void vsock_pending_work(struct work_struct *work)
+{
+	struct sock *sk;
+	struct sock *listener;
+	struct vsock_sock *vsk;
+	bool cleanup;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+	listener = vsk->listener;
+	cleanup = true;
+
+	lock_sock(listener);
+	lock_sock(sk);
+
+	if (vsock_is_pending(sk)) {
+		vsock_remove_pending(listener, sk);
+	} else if (!vsk->rejected) {
+		/* We are not on the pending list and accept() did not reject
+		 * us, so we must have been accepted by our user process.  We
+		 * just need to drop our references to the sockets and be on
+		 * our way.
+		 */
+		cleanup = false;
+		goto out;
+	}
+
+	listener->sk_ack_backlog--;
+
+	/* We need to remove ourself from the global connected sockets list so
+	 * incoming packets can't find this socket, and to reduce the reference
+	 * count.
+	 */
+	if (vsock_in_connected_table(vsk))
+		vsock_remove_connected(vsk);
+
+	sk->sk_state = SS_FREE;
+
+out:
+	release_sock(sk);
+	release_sock(listener);
+	if (cleanup)
+		sock_put(sk);
+
+	sock_put(sk);
+	sock_put(listener);
+}
+EXPORT_SYMBOL_GPL(vsock_pending_work);
+
+/**** SOCKET OPERATIONS ****/
+
+static int __vsock_bind_stream(struct vsock_sock *vsk,
+			       struct sockaddr_vm *addr)
+{
+	static u32 port = LAST_RESERVED_PORT + 1;
+	struct sockaddr_vm new_addr;
+
+	vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
+
+	if (addr->svm_port == VMADDR_PORT_ANY) {
+		bool found = false;
+		unsigned int i;
+
+		for (i = 0; i < MAX_PORT_RETRIES; i++) {
+			if (port <= LAST_RESERVED_PORT)
+				port = LAST_RESERVED_PORT + 1;
+
+			new_addr.svm_port = port++;
+
+			if (!__vsock_find_bound_socket(&new_addr)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			return -EADDRNOTAVAIL;
+	} else {
+		/* If port is in reserved range, ensure caller
+		 * has necessary privileges.
+		 */
+		if (addr->svm_port <= LAST_RESERVED_PORT &&
+		    !capable(CAP_NET_BIND_SERVICE)) {
+			return -EACCES;
+		}
+
+		if (__vsock_find_bound_socket(&new_addr))
+			return -EADDRINUSE;
+	}
+
+	vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
+
+	/* Remove stream sockets from the unbound list and add them to the hash
+	 * table for easy lookup by its address.  The unbound list is simply an
+	 * extra entry at the end of the hash table, a trick used by AF_UNIX.
+	 */
+	__vsock_remove_bound(vsk);
+	__vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
+
+	return 0;
+}
+
+static int __vsock_bind_dgram(struct vsock_sock *vsk,
+			      struct sockaddr_vm *addr)
+{
+	return transport->dgram_bind(vsk, addr);
+}
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	u32 cid;
+	int retval;
+
+	/* First ensure this socket isn't already bound. */
+	if (vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	/* Now bind to the provided address or select appropriate values if
+	 * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY).  Note that
+	 * like AF_INET prevents binding to a non-local IP address (in most
+	 * cases), we only allow binding to the local CID.
+	 */
+	cid = transport->get_local_cid();
+	if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
+		return -EADDRNOTAVAIL;
+
+	switch (sk->sk_socket->type) {
+	case SOCK_STREAM:
+		spin_lock_bh(&vsock_table_lock);
+		retval = __vsock_bind_stream(vsk, addr);
+		spin_unlock_bh(&vsock_table_lock);
+		break;
+
+	case SOCK_DGRAM:
+		retval = __vsock_bind_dgram(vsk, addr);
+		break;
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	return retval;
+}
+
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority,
+			    unsigned short type)
+{
+	struct sock *sk;
+	struct vsock_sock *psk;
+	struct vsock_sock *vsk;
+
+	sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+
+	/* sk->sk_type is normally set in sock_init_data, but only if sock is
+	 * non-NULL. We make sure that our sockets always have a type by
+	 * setting it here if needed.
+	 */
+	if (!sock)
+		sk->sk_type = type;
+
+	vsk = vsock_sk(sk);
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	sk->sk_destruct = vsock_sk_destruct;
+	sk->sk_backlog_rcv = vsock_queue_rcv_skb;
+	sk->sk_state = 0;
+	sock_reset_flag(sk, SOCK_DONE);
+
+	INIT_LIST_HEAD(&vsk->bound_table);
+	INIT_LIST_HEAD(&vsk->connected_table);
+	vsk->listener = NULL;
+	INIT_LIST_HEAD(&vsk->pending_links);
+	INIT_LIST_HEAD(&vsk->accept_queue);
+	vsk->rejected = false;
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+	vsk->peer_shutdown = 0;
+
+	psk = parent ? vsock_sk(parent) : NULL;
+	if (parent) {
+		vsk->trusted = psk->trusted;
+		vsk->owner = get_cred(psk->owner);
+		vsk->connect_timeout = psk->connect_timeout;
+	} else {
+		vsk->trusted = capable(CAP_NET_ADMIN);
+		vsk->owner = get_current_cred();
+		vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
+	}
+
+	if (transport->init(vsk, psk) < 0) {
+		sk_free(sk);
+		return NULL;
+	}
+
+	if (sock)
+		vsock_insert_unbound(vsk);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(__vsock_create);
+
+static void __vsock_release(struct sock *sk)
+{
+	if (sk) {
+		struct sk_buff *skb;
+		struct sock *pending;
+		struct vsock_sock *vsk;
+
+		vsk = vsock_sk(sk);
+		pending = NULL;	/* Compiler warning. */
+
+		if (vsock_in_bound_table(vsk))
+			vsock_remove_bound(vsk);
+
+		if (vsock_in_connected_table(vsk))
+			vsock_remove_connected(vsk);
+
+		transport->release(vsk);
+
+		lock_sock(sk);
+		sock_orphan(sk);
+		sk->sk_shutdown = SHUTDOWN_MASK;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)))
+			kfree_skb(skb);
+
+		/* Clean up any sockets that never were accepted. */
+		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+			__vsock_release(pending);
+			sock_put(pending);
+		}
+
+		release_sock(sk);
+		sock_put(sk);
+	}
+}
+
+static void vsock_sk_destruct(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	transport->destruct(vsk);
+
+	/* When clearing these addresses, there's no need to set the family and
+	 * possibly register the address family with the kernel.
+	 */
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	put_cred(vsk->owner);
+}
+
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+
+	return err;
+}
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk)
+{
+	return transport->stream_has_data(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_data);
+
+s64 vsock_stream_has_space(struct vsock_sock *vsk)
+{
+	return transport->stream_has_space(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+
+static int vsock_release(struct socket *sock)
+{
+	__vsock_release(sock->sk);
+	sock->sk = NULL;
+	sock->state = SS_FREE;
+
+	return 0;
+}
+
+static int
+vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	int err;
+	struct sock *sk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+
+	if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+	err = __vsock_bind(sk, vm_addr);
+	release_sock(sk);
+
+	return err;
+}
+
+static int vsock_getname(struct socket *sock,
+			 struct sockaddr *addr, int *addr_len, int peer)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (peer) {
+		if (sock->state != SS_CONNECTED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+		vm_addr = &vsk->remote_addr;
+	} else {
+		vm_addr = &vsk->local_addr;
+	}
+
+	if (!vm_addr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* sys_getsockname() and sys_getpeername() pass us a
+	 * MAX_SOCK_ADDR-sized buffer and don't set addr_len.  Unfortunately
+	 * that macro is defined in socket.c instead of .h, so we hardcode its
+	 * value here.
+	 */
+	BUILD_BUG_ON(sizeof(*vm_addr) > 128);
+	memcpy(addr, vm_addr, sizeof(*vm_addr));
+	*addr_len = sizeof(*vm_addr);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_shutdown(struct socket *sock, int mode)
+{
+	int err;
+	struct sock *sk;
+
+	/* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
+	 * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode
+	 * here like the other address families do.  Note also that the
+	 * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3),
+	 * which is what we want.
+	 */
+	mode++;
+
+	if ((mode & ~SHUTDOWN_MASK) || !mode)
+		return -EINVAL;
+
+	/* If this is a STREAM socket and it is not connected then bail out
+	 * immediately.  If it is a DGRAM socket then we must first kick the
+	 * socket so that it wakes up from any sleeping calls, for example
+	 * recv(), and then afterwards return the error.
+	 */
+
+	sk = sock->sk;
+	if (sock->state == SS_UNCONNECTED) {
+		err = -ENOTCONN;
+		if (sk->sk_type == SOCK_STREAM)
+			return err;
+	} else {
+		sock->state = SS_DISCONNECTING;
+		err = 0;
+	}
+
+	/* Receive and send shutdowns are treated alike. */
+	mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
+	if (mode) {
+		lock_sock(sk);
+		sk->sk_shutdown |= mode;
+		sk->sk_state_change(sk);
+		release_sock(sk);
+
+		if (sk->sk_type == SOCK_STREAM) {
+			sock_reset_flag(sk, SOCK_DONE);
+			vsock_send_shutdown(sk, mode);
+		}
+	}
+
+	return err;
+}
+
+static unsigned int vsock_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct sock *sk;
+	unsigned int mask;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (sk->sk_err)
+		/* Signify that there has been an error on this socket. */
+		mask |= POLLERR;
+
+	/* INET sockets treat local write shutdown and peer write shutdown as a
+	 * case of POLLHUP set.
+	 */
+	if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+	    ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+	     (vsk->peer_shutdown & SEND_SHUTDOWN))) {
+		mask |= POLLHUP;
+	}
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN ||
+	    vsk->peer_shutdown & SEND_SHUTDOWN) {
+		mask |= POLLRDHUP;
+	}
+
+	if (sock->type == SOCK_DGRAM) {
+		/* For datagram sockets we can read if there is something in
+		 * the queue and write as long as the socket isn't shutdown for
+		 * sending.
+		 */
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN)) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	} else if (sock->type == SOCK_STREAM) {
+		lock_sock(sk);
+
+		/* Listening sockets that have connections in their accept
+		 * queue can be read.
+		 */
+		if (sk->sk_state == SS_LISTEN
+		    && !vsock_is_accept_queue_empty(sk))
+			mask |= POLLIN | POLLRDNORM;
+
+		/* If there is something in the queue then we can read. */
+		if (transport->stream_is_active(vsk) &&
+		    !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+			bool data_ready_now = false;
+			int ret = transport->notify_poll_in(
+					vsk, 1, &data_ready_now);
+			if (ret < 0) {
+				mask |= POLLERR;
+			} else {
+				if (data_ready_now)
+					mask |= POLLIN | POLLRDNORM;
+
+			}
+		}
+
+		/* Sockets whose connections have been closed, reset, or
+		 * terminated should also be considered read, and we check the
+		 * shutdown flag for that.
+		 */
+		if (sk->sk_shutdown & RCV_SHUTDOWN ||
+		    vsk->peer_shutdown & SEND_SHUTDOWN) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		/* Connected sockets that can produce data can be written. */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+				bool space_avail_now = false;
+				int ret = transport->notify_poll_out(
+						vsk, 1, &space_avail_now);
+				if (ret < 0) {
+					mask |= POLLERR;
+				} else {
+					if (space_avail_now)
+						/* Remove POLLWRBAND since INET
+						 * sockets are not setting it.
+						 */
+						mask |= POLLOUT | POLLWRNORM;
+
+				}
+			}
+		}
+
+		/* Simulate INET socket poll behaviors, which sets
+		 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+		 * but local send is not shutdown.
+		 */
+		if (sk->sk_state == SS_UNCONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+				mask |= POLLOUT | POLLWRNORM;
+
+		}
+
+		release_sock(sk);
+	}
+
+	return mask;
+}
+
+static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	/* For now, MSG_DONTWAIT is always assumed... */
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	/* If the provided message contains an address, use that.  Otherwise
+	 * fall back on the socket's remote handle (if it has been connected).
+	 */
+	if (msg->msg_name &&
+	    vsock_addr_cast(msg->msg_name, msg->msg_namelen,
+			    &remote_addr) == 0) {
+		/* Ensure this address is of the right type and is a valid
+		 * destination.
+		 */
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		if (!vsock_addr_bound(remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else if (sock->state == SS_CONNECTED) {
+		remote_addr = &vsk->remote_addr;
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		/* XXX Should connect() or this function ensure remote_addr is
+		 * bound?
+		 */
+		if (!vsock_addr_bound(&vsk->remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_connect(struct socket *sock,
+			       struct sockaddr *addr, int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	err = vsock_addr_cast(addr, addr_len, &remote_addr);
+	if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
+		lock_sock(sk);
+		vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
+				VMADDR_PORT_ANY);
+		sock->state = SS_UNCONNECTED;
+		release_sock(sk);
+		return 0;
+	} else if (err != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
+	sock->state = SS_CONNECTED;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len, int flags)
+{
+	return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len,
+					flags);
+}
+
+static const struct proto_ops vsock_dgram_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_dgram_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = vsock_dgram_sendmsg,
+	.recvmsg = vsock_dgram_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static void vsock_connect_timeout(struct work_struct *work)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+
+	lock_sock(sk);
+	if (sk->sk_state == SS_CONNECTING &&
+	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
+		sk->sk_state = SS_UNCONNECTED;
+		sk->sk_err = ETIMEDOUT;
+		sk->sk_error_report(sk);
+	}
+	release_sock(sk);
+
+	sock_put(sk);
+}
+
+static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
+				int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	/* XXX AF_UNSPEC should make us disconnect like AF_INET. */
+	switch (sock->state) {
+	case SS_CONNECTED:
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+		err = -EINVAL;
+		goto out;
+	case SS_CONNECTING:
+		/* This continues on so we can move sock into the SS_CONNECTED
+		 * state once the connection has completed (at which point err
+		 * will be set to zero also).  Otherwise, we will either wait
+		 * for the connection or return -EALREADY should this be a
+		 * non-blocking call.
+		 */
+		err = -EALREADY;
+		break;
+	default:
+		if ((sk->sk_state == SS_LISTEN) ||
+		    vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* The hypervisor and well-known contexts do not have socket
+		 * endpoints.
+		 */
+		if (!transport->stream_allow(remote_addr->svm_cid,
+					     remote_addr->svm_port)) {
+			err = -ENETUNREACH;
+			goto out;
+		}
+
+		/* Set the remote address that we are connecting to. */
+		memcpy(&vsk->remote_addr, remote_addr,
+		       sizeof(vsk->remote_addr));
+
+		/* Autobind this socket to the local address if necessary. */
+		if (!vsock_addr_bound(&vsk->local_addr)) {
+			struct sockaddr_vm local_addr;
+
+			vsock_addr_init(&local_addr, VMADDR_CID_ANY,
+					VMADDR_PORT_ANY);
+			err = __vsock_bind(sk, &local_addr);
+			if (err != 0)
+				goto out;
+
+		}
+
+		sk->sk_state = SS_CONNECTING;
+
+		err = transport->connect(vsk);
+		if (err < 0)
+			goto out;
+
+		/* Mark sock as connecting and set the error code to in
+		 * progress in case this is a non-blocking connect.
+		 */
+		sock->state = SS_CONNECTING;
+		err = -EINPROGRESS;
+	}
+
+	/* The receive path will handle all communication until we are able to
+	 * enter the connected state.  Here we wait for the connection to be
+	 * completed or a notification of an error.
+	 */
+	timeout = vsk->connect_timeout;
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+		if (flags & O_NONBLOCK) {
+			/* If we're not going to block, we schedule a timeout
+			 * function to generate a timeout on the connection
+			 * attempt, in case the peer doesn't respond in a
+			 * timely manner. We hold on to the socket until the
+			 * timeout fires.
+			 */
+			sock_hold(sk);
+			INIT_DELAYED_WORK(&vsk->dwork,
+					  vsock_connect_timeout);
+			schedule_delayed_work(&vsk->dwork, timeout);
+
+			/* Skip ahead to preserve error code set above. */
+			goto out_wait;
+		}
+
+		release_sock(sk);
+		timeout = schedule_timeout(timeout);
+		lock_sock(sk);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait_error;
+		} else if (timeout == 0) {
+			err = -ETIMEDOUT;
+			goto out_wait_error;
+		}
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (sk->sk_err) {
+		err = -sk->sk_err;
+		goto out_wait_error;
+	} else
+		err = 0;
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+
+out_wait_error:
+	sk->sk_state = SS_UNCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	goto out_wait;
+}
+
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *listener;
+	int err;
+	struct sock *connected;
+	struct vsock_sock *vconnected;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	listener = sock->sk;
+
+	lock_sock(listener);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (listener->sk_state != SS_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Wait for children sockets to appear; these are the new sockets
+	 * created upon connection establishment.
+	 */
+	timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+	prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+
+	while ((connected = vsock_dequeue_accept(listener)) == NULL &&
+	       listener->sk_err == 0) {
+		release_sock(listener);
+		timeout = schedule_timeout(timeout);
+		lock_sock(listener);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait;
+		} else if (timeout == 0) {
+			err = -EAGAIN;
+			goto out_wait;
+		}
+
+		prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (listener->sk_err)
+		err = -listener->sk_err;
+
+	if (connected) {
+		listener->sk_ack_backlog--;
+
+		lock_sock(connected);
+		vconnected = vsock_sk(connected);
+
+		/* If the listener socket has received an error, then we should
+		 * reject this socket and return.  Note that we simply mark the
+		 * socket rejected, drop our reference, and let the cleanup
+		 * function handle the cleanup; the fact that we found it in
+		 * the listener's accept queue guarantees that the cleanup
+		 * function hasn't run yet.
+		 */
+		if (err) {
+			vconnected->rejected = true;
+			release_sock(connected);
+			sock_put(connected);
+			goto out_wait;
+		}
+
+		newsock->state = SS_CONNECTED;
+		sock_graft(connected, newsock);
+		release_sock(connected);
+		sock_put(connected);
+	}
+
+out_wait:
+	finish_wait(sk_sleep(listener), &wait);
+out:
+	release_sock(listener);
+	return err;
+}
+
+static int vsock_listen(struct socket *sock, int backlog)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+
+	lock_sock(sk);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sock->state != SS_UNCONNECTED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_state = SS_LISTEN;
+
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_setsockopt(struct socket *sock,
+				   int level,
+				   int optname,
+				   char __user *optval,
+				   unsigned int optlen)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+#define COPY_IN(_v)                                       \
+	do {						  \
+		if (optlen < sizeof(_v)) {		  \
+			err = -EINVAL;			  \
+			goto exit;			  \
+		}					  \
+		if (copy_from_user(&_v, optval, sizeof(_v)) != 0) {	\
+			err = -EFAULT;					\
+			goto exit;					\
+		}							\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		COPY_IN(val);
+		transport->set_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		COPY_IN(val);
+		transport->set_max_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		COPY_IN(val);
+		transport->set_min_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		COPY_IN(tv);
+		if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
+		    tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
+			vsk->connect_timeout = tv.tv_sec * HZ +
+			    DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ));
+			if (vsk->connect_timeout == 0)
+				vsk->connect_timeout =
+				    VSOCK_DEFAULT_CONNECT_TIMEOUT;
+
+		} else {
+			err = -ERANGE;
+		}
+		break;
+	}
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+#undef COPY_IN
+
+exit:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_getsockopt(struct socket *sock,
+				   int level, int optname,
+				   char __user *optval,
+				   int __user *optlen)
+{
+	int err;
+	int len;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+	err = get_user(len, optlen);
+	if (err != 0)
+		return err;
+
+#define COPY_OUT(_v)                            \
+	do {					\
+		if (len < sizeof(_v))		\
+			return -EINVAL;		\
+						\
+		len = sizeof(_v);		\
+		if (copy_to_user(optval, &_v, len) != 0)	\
+			return -EFAULT;				\
+								\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		val = transport->get_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		val = transport->get_max_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		val = transport->get_min_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		tv.tv_sec = vsk->connect_timeout / HZ;
+		tv.tv_usec =
+		    (vsk->connect_timeout -
+		     tv.tv_sec * HZ) * (1000000 / HZ);
+		COPY_OUT(tv);
+		break;
+	}
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	err = put_user(len, optlen);
+	if (err != 0)
+		return -EFAULT;
+
+#undef COPY_OUT
+
+	return 0;
+}
+
+static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	ssize_t total_written;
+	long timeout;
+	int err;
+	struct vsock_transport_send_notify_data send_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	total_written = 0;
+	err = 0;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	/* Callers should not provide a destination with stream sockets. */
+	if (msg->msg_namelen) {
+		err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* Send data only if both sides are not shutdown in the direction. */
+	if (sk->sk_shutdown & SEND_SHUTDOWN ||
+	    vsk->peer_shutdown & RCV_SHUTDOWN) {
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (sk->sk_state != SS_CONNECTED ||
+	    !vsock_addr_bound(&vsk->local_addr)) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	if (!vsock_addr_bound(&vsk->remote_addr)) {
+		err = -EDESTADDRREQ;
+		goto out;
+	}
+
+	/* Wait for room in the produce queue to enqueue our user's data. */
+	timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	err = transport->notify_send_init(vsk, &send_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (total_written < len) {
+		ssize_t written;
+
+		while (vsock_stream_has_space(vsk) == 0 &&
+		       sk->sk_err == 0 &&
+		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+		       !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
+
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			err = transport->notify_send_pre_block(vsk, &send_data);
+			if (err < 0)
+				goto out_wait;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				goto out_wait;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+
+		/* These checks occur both as part of and after the loop
+		 * conditional since we need to check before and after
+		 * sleeping.
+		 */
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_wait;
+		} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+			   (vsk->peer_shutdown & RCV_SHUTDOWN)) {
+			err = -EPIPE;
+			goto out_wait;
+		}
+
+		err = transport->notify_send_pre_enqueue(vsk, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+		/* Note that enqueue will only write as many bytes as are free
+		 * in the produce queue, so we don't need to ensure len is
+		 * smaller than the queue size.  It is the caller's
+		 * responsibility to check how many bytes we were able to send.
+		 */
+
+		written = transport->stream_enqueue(
+				vsk, msg->msg_iov,
+				len - total_written);
+		if (written < 0) {
+			err = -ENOMEM;
+			goto out_wait;
+		}
+
+		total_written += written;
+
+		err = transport->notify_send_post_enqueue(
+				vsk, written, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+	}
+
+out_wait:
+	if (total_written > 0)
+		err = total_written;
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+
+static int
+vsock_stream_recvmsg(struct kiocb *kiocb,
+		     struct socket *sock,
+		     struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	int err;
+	size_t target;
+	ssize_t copied;
+	long timeout;
+	struct vsock_transport_recv_notify_data recv_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != SS_CONNECTED) {
+		/* Recvmsg is supposed to return 0 if a peer performs an
+		 * orderly shutdown. Differentiate between that case and when a
+		 * peer has not connected or a local shutdown occured with the
+		 * SOCK_DONE flag.
+		 */
+		if (sock_flag(sk, SOCK_DONE))
+			err = 0;
+		else
+			err = -ENOTCONN;
+
+		goto out;
+	}
+
+	if (flags & MSG_OOB) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* We don't check peer_shutdown flag here since peer may actually shut
+	 * down, but there can be data in the queue that a local socket can
+	 * receive.
+	 */
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		err = 0;
+		goto out;
+	}
+
+	/* It is valid on Linux to pass in a zero-length receive buffer.  This
+	 * is not an error.  We may as well bail out now.
+	 */
+	if (!len) {
+		err = 0;
+		goto out;
+	}
+
+	/* We must not copy less than target bytes into the user's buffer
+	 * before returning successfully, so we wait for the consume queue to
+	 * have that much data to consume before dequeueing.  Note that this
+	 * makes it impossible to handle cases where target is greater than the
+	 * queue size.
+	 */
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	if (target >= transport->stream_rcvhiwat(vsk)) {
+		err = -ENOMEM;
+		goto out;
+	}
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	copied = 0;
+
+	err = transport->notify_recv_init(vsk, target, &recv_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (1) {
+		s64 ready = vsock_stream_has_data(vsk);
+
+		if (ready < 0) {
+			/* Invalid queue pair content. XXX This should be
+			 * changed to a connection reset in a later change.
+			 */
+
+			err = -ENOMEM;
+			goto out_wait;
+		} else if (ready > 0) {
+			ssize_t read;
+
+			err = transport->notify_recv_pre_dequeue(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			read = transport->stream_dequeue(
+					vsk, msg->msg_iov,
+					len - copied, flags);
+			if (read < 0) {
+				err = -ENOMEM;
+				break;
+			}
+
+			copied += read;
+
+			err = transport->notify_recv_post_dequeue(
+					vsk, target, read,
+					!(flags & MSG_PEEK), &recv_data);
+			if (err < 0)
+				goto out_wait;
+
+			if (read >= target || flags & MSG_PEEK)
+				break;
+
+			target -= read;
+		} else {
+			if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
+			    || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+				break;
+			}
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			err = transport->notify_recv_pre_block(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				break;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+	}
+
+	if (sk->sk_err)
+		err = -sk->sk_err;
+	else if (sk->sk_shutdown & RCV_SHUTDOWN)
+		err = 0;
+
+	if (copied > 0) {
+		/* We only do these additional bookkeeping/notification steps
+		 * if we actually copied something out of the queue pair
+		 * instead of just peeking ahead.
+		 */
+
+		if (!(flags & MSG_PEEK)) {
+			/* If the other side has shutdown for sending and there
+			 * is nothing more to read, then modify the socket
+			 * state.
+			 */
+			if (vsk->peer_shutdown & SEND_SHUTDOWN) {
+				if (vsock_stream_has_data(vsk) <= 0) {
+					sk->sk_state = SS_UNCONNECTED;
+					sock_set_flag(sk, SOCK_DONE);
+					sk->sk_state_change(sk);
+				}
+			}
+		}
+		err = copied;
+	}
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+static const struct proto_ops vsock_stream_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_stream_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = vsock_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = vsock_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = vsock_stream_setsockopt,
+	.getsockopt = vsock_stream_getsockopt,
+	.sendmsg = vsock_stream_sendmsg,
+	.recvmsg = vsock_stream_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static int vsock_create(struct net *net, struct socket *sock,
+			int protocol, int kern)
+{
+	if (!sock)
+		return -EINVAL;
+
+	if (protocol)
+		return -EPROTONOSUPPORT;
+
+	switch (sock->type) {
+	case SOCK_DGRAM:
+		sock->ops = &vsock_dgram_ops;
+		break;
+	case SOCK_STREAM:
+		sock->ops = &vsock_stream_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	sock->state = SS_UNCONNECTED;
+
+	return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM;
+}
+
+static const struct net_proto_family vsock_family_ops = {
+	.family = AF_VSOCK,
+	.create = vsock_create,
+	.owner = THIS_MODULE,
+};
+
+static long vsock_dev_do_ioctl(struct file *filp,
+			       unsigned int cmd, void __user *ptr)
+{
+	u32 __user *p = ptr;
+	int retval = 0;
+
+	switch (cmd) {
+	case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
+		if (put_user(transport->get_local_cid(), p) != 0)
+			retval = -EFAULT;
+		break;
+
+	default:
+		pr_err("Unknown ioctl %d\n", cmd);
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static long vsock_dev_ioctl(struct file *filp,
+			    unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+static long vsock_dev_compat_ioctl(struct file *filp,
+				   unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations vsock_device_ops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vsock_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vsock_dev_compat_ioctl,
+#endif
+	.open		= nonseekable_open,
+};
+
+static struct miscdevice vsock_device = {
+	.name		= "vsock",
+	.minor		= MISC_DYNAMIC_MINOR,
+	.fops		= &vsock_device_ops,
+};
+
+static int __vsock_core_init(void)
+{
+	int err;
+
+	vsock_init_tables();
+
+	err = misc_register(&vsock_device);
+	if (err) {
+		pr_err("Failed to register misc device\n");
+		return -ENOENT;
+	}
+
+	err = proto_register(&vsock_proto, 1);	/* we want our slab */
+	if (err) {
+		pr_err("Cannot register vsock protocol\n");
+		goto err_misc_deregister;
+	}
+
+	err = sock_register(&vsock_family_ops);
+	if (err) {
+		pr_err("could not register af_vsock (%d) address family: %d\n",
+		       AF_VSOCK, err);
+		goto err_unregister_proto;
+	}
+
+	return 0;
+
+err_unregister_proto:
+	proto_unregister(&vsock_proto);
+err_misc_deregister:
+	misc_deregister(&vsock_device);
+	return err;
+}
+
+int vsock_core_init(const struct vsock_transport *t)
+{
+	int retval = mutex_lock_interruptible(&vsock_register_mutex);
+	if (retval)
+		return retval;
+
+	if (transport) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	transport = t;
+	retval = __vsock_core_init();
+	if (retval)
+		transport = NULL;
+
+out:
+	mutex_unlock(&vsock_register_mutex);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vsock_core_init);
+
+void vsock_core_exit(void)
+{
+	mutex_lock(&vsock_register_mutex);
+
+	misc_deregister(&vsock_device);
+	sock_unregister(AF_VSOCK);
+	proto_unregister(&vsock_proto);
+
+	/* We do not want the assignment below re-ordered. */
+	mb();
+	transport = NULL;
+
+	mutex_unlock(&vsock_register_mutex);
+}
+EXPORT_SYMBOL_GPL(vsock_core_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Socket Family");
+MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING);
+MODULE_LICENSE("GPL v2");
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
new file mode 100644
index 00000000000..7d64d3609ec
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.h
@@ -0,0 +1,175 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __AF_VSOCK_H__
+#define __AF_VSOCK_H__
+
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/vm_sockets.h>
+
+#include "vsock_addr.h"
+
+#define LAST_RESERVED_PORT 1023
+
+#define vsock_sk(__sk)    ((struct vsock_sock *)__sk)
+#define sk_vsock(__vsk)   (&(__vsk)->sk)
+
+struct vsock_sock {
+	/* sk must be the first member. */
+	struct sock sk;
+	struct sockaddr_vm local_addr;
+	struct sockaddr_vm remote_addr;
+	/* Links for the global tables of bound and connected sockets. */
+	struct list_head bound_table;
+	struct list_head connected_table;
+	/* Accessed without the socket lock held. This means it can never be
+	 * modified outsided of socket create or destruct.
+	 */
+	bool trusted;
+	bool cached_peer_allow_dgram;	/* Dgram communication allowed to
+					 * cached peer?
+					 */
+	u32 cached_peer;  /* Context ID of last dgram destination check. */
+	const struct cred *owner;
+	/* Rest are SOCK_STREAM only. */
+	long connect_timeout;
+	/* Listening socket that this came from. */
+	struct sock *listener;
+	/* Used for pending list and accept queue during connection handshake.
+	 * The listening socket is the head for both lists.  Sockets created
+	 * for connection requests are placed in the pending list until they
+	 * are connected, at which point they are put in the accept queue list
+	 * so they can be accepted in accept().  If accept() cannot accept the
+	 * connection, it is marked as rejected so the cleanup function knows
+	 * to clean up the socket.
+	 */
+	struct list_head pending_links;
+	struct list_head accept_queue;
+	bool rejected;
+	struct delayed_work dwork;
+	u32 peer_shutdown;
+	bool sent_request;
+	bool ignore_connecting_rst;
+
+	/* Private to transport. */
+	void *trans;
+};
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk);
+s64 vsock_stream_has_space(struct vsock_sock *vsk);
+void vsock_pending_work(struct work_struct *work);
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority, unsigned short type);
+
+/**** TRANSPORT ****/
+
+struct vsock_transport_recv_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+	bool notify_on_block;
+};
+
+struct vsock_transport_send_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+};
+
+struct vsock_transport {
+	/* Initialize/tear-down socket. */
+	int (*init)(struct vsock_sock *, struct vsock_sock *);
+	void (*destruct)(struct vsock_sock *);
+	void (*release)(struct vsock_sock *);
+
+	/* Connections. */
+	int (*connect)(struct vsock_sock *);
+
+	/* DGRAM. */
+	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
+	int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
+			     struct msghdr *msg, size_t len, int flags);
+	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
+			     struct iovec *, size_t len);
+	bool (*dgram_allow)(u32 cid, u32 port);
+
+	/* STREAM. */
+	/* TODO: stream_bind() */
+	ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
+				  size_t len, int flags);
+	ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
+				  size_t len);
+	s64 (*stream_has_data)(struct vsock_sock *);
+	s64 (*stream_has_space)(struct vsock_sock *);
+	u64 (*stream_rcvhiwat)(struct vsock_sock *);
+	bool (*stream_is_active)(struct vsock_sock *);
+	bool (*stream_allow)(u32 cid, u32 port);
+
+	/* Notification. */
+	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
+	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
+	int (*notify_recv_init)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
+		ssize_t, bool, struct vsock_transport_recv_notify_data *);
+	int (*notify_send_init)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_block)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_enqueue)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
+		struct vsock_transport_send_notify_data *);
+
+	/* Shutdown. */
+	int (*shutdown)(struct vsock_sock *, int);
+
+	/* Buffer sizes. */
+	void (*set_buffer_size)(struct vsock_sock *, u64);
+	void (*set_min_buffer_size)(struct vsock_sock *, u64);
+	void (*set_max_buffer_size)(struct vsock_sock *, u64);
+	u64 (*get_buffer_size)(struct vsock_sock *);
+	u64 (*get_min_buffer_size)(struct vsock_sock *);
+	u64 (*get_max_buffer_size)(struct vsock_sock *);
+
+	/* Addressing. */
+	u32 (*get_local_cid)(void);
+};
+
+/**** CORE ****/
+
+int vsock_core_init(const struct vsock_transport *t);
+void vsock_core_exit(void);
+
+/**** UTILS ****/
+
+void vsock_release_pending(struct sock *pending);
+void vsock_add_pending(struct sock *listener, struct sock *pending);
+void vsock_remove_pending(struct sock *listener, struct sock *pending);
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
+void vsock_insert_connected(struct vsock_sock *vsk);
+void vsock_remove_bound(struct vsock_sock *vsk);
+void vsock_remove_connected(struct vsock_sock *vsk);
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst);
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+
+#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
new file mode 100644
index 00000000000..e8a87cf3707
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.c
@@ -0,0 +1,2157 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#define EXPORT_SYMTAB
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vmci_transport_notify.h"
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_server(
+					struct sock *sk,
+					struct sock *pending,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_invalid(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt);
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
+static u16 vmci_transport_new_proto_supported_versions(void);
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
+						  bool old_pkt_proto);
+
+struct vmci_transport_recv_pkt_info {
+	struct work_struct work;
+	struct sock *sk;
+	struct vmci_transport_packet pkt;
+};
+
+static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
+							   VMCI_INVALID_ID };
+static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+
+static int PROTOCOL_OVERRIDE = -1;
+
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+/* Helper function to convert from a VMCI error code to a VSock error code. */
+
+static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
+{
+	int err;
+
+	switch (vmci_error) {
+	case VMCI_ERROR_NO_MEM:
+		err = ENOMEM;
+		break;
+	case VMCI_ERROR_DUPLICATE_ENTRY:
+	case VMCI_ERROR_ALREADY_EXISTS:
+		err = EADDRINUSE;
+		break;
+	case VMCI_ERROR_NO_ACCESS:
+		err = EPERM;
+		break;
+	case VMCI_ERROR_NO_RESOURCES:
+		err = ENOBUFS;
+		break;
+	case VMCI_ERROR_INVALID_RESOURCE:
+		err = EHOSTUNREACH;
+		break;
+	case VMCI_ERROR_INVALID_ARGS:
+	default:
+		err = EINVAL;
+	}
+
+	return err > 0 ? -err : err;
+}
+
+static inline void
+vmci_transport_packet_init(struct vmci_transport_packet *pkt,
+			   struct sockaddr_vm *src,
+			   struct sockaddr_vm *dst,
+			   u8 type,
+			   u64 size,
+			   u64 mode,
+			   struct vmci_transport_waiting_info *wait,
+			   u16 proto,
+			   struct vmci_handle handle)
+{
+	/* We register the stream control handler as an any cid handle so we
+	 * must always send from a source address of VMADDR_CID_ANY
+	 */
+	pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.dst = vmci_make_handle(dst->svm_cid,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
+	pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
+	pkt->type = type;
+	pkt->src_port = src->svm_port;
+	pkt->dst_port = dst->svm_port;
+	memset(&pkt->proto, 0, sizeof(pkt->proto));
+	memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+		pkt->u.size = size;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		pkt->u.handle = handle;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		pkt->u.mode = mode;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		pkt->u.size = size;
+		pkt->proto = proto;
+		break;
+	}
+}
+
+static inline void
+vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
+				    struct sockaddr_vm *local,
+				    struct sockaddr_vm *remote)
+{
+	vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
+	vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
+}
+
+static int
+__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
+				  struct sockaddr_vm *src,
+				  struct sockaddr_vm *dst,
+				  enum vmci_transport_packet_type type,
+				  u64 size,
+				  u64 mode,
+				  struct vmci_transport_waiting_info *wait,
+				  u16 proto,
+				  struct vmci_handle handle,
+				  bool convert_error)
+{
+	int err;
+
+	vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
+				   proto, handle);
+	err = vmci_datagram_send(&pkt->dg);
+	if (convert_error && (err < 0))
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err;
+}
+
+static int
+vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
+				      enum vmci_transport_packet_type type,
+				      u64 size,
+				      u64 mode,
+				      struct vmci_transport_waiting_info *wait,
+				      struct vmci_handle handle)
+{
+	struct vmci_transport_packet reply;
+	struct sockaddr_vm src, dst;
+
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
+		return 0;
+	} else {
+		vmci_transport_packet_get_addresses(pkt, &src, &dst);
+		return __vmci_transport_send_control_pkt(&reply, &src, &dst,
+							 type,
+							 size, mode, wait,
+							 VSOCK_PROTO_INVALID,
+							 handle, true);
+	}
+}
+
+static int
+vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
+				   struct sockaddr_vm *dst,
+				   enum vmci_transport_packet_type type,
+				   u64 size,
+				   u64 mode,
+				   struct vmci_transport_waiting_info *wait,
+				   struct vmci_handle handle)
+{
+	/* Note that it is safe to use a single packet across all CPUs since
+	 * two tasklets of the same type are guaranteed to not ever run
+	 * simultaneously. If that ever changes, or VMCI stops using tasklets,
+	 * we can use per-cpu packets.
+	 */
+	static struct vmci_transport_packet pkt;
+
+	return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
+						 size, mode, wait,
+						 VSOCK_PROTO_INVALID, handle,
+						 false);
+}
+
+static int
+vmci_transport_send_control_pkt(struct sock *sk,
+				enum vmci_transport_packet_type type,
+				u64 size,
+				u64 mode,
+				struct vmci_transport_waiting_info *wait,
+				u16 proto,
+				struct vmci_handle handle)
+{
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	if (!vsock_addr_bound(&vsk->remote_addr))
+		return -EINVAL;
+
+	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
+						&vsk->remote_addr, type, size,
+						mode, wait, proto, handle,
+						true);
+	kfree(pkt);
+
+	return err;
+}
+
+static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
+					struct sockaddr_vm *src,
+					struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_reset(struct sock *sk,
+				     struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_RST,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
+					  u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_qp_offer(struct sock *sk,
+					struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
+					0, NULL,
+					VSOCK_PROTO_INVALID, handle);
+}
+
+static int vmci_transport_send_attach(struct sock *sk,
+				      struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					handle);
+}
+
+static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
+{
+	return vmci_transport_reply_control_pkt_fast(
+						pkt,
+						VMCI_TRANSPORT_PACKET_TYPE_RST,
+						0, 0, NULL,
+						VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
+					  struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_INVALID,
+					0, 0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
+{
+	return vmci_transport_send_control_pkt(
+					&vsk->sk,
+					VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+					0, mode, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
+					     u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static struct sock *vmci_transport_get_pending(
+					struct sock *listener,
+					struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+	struct sock *pending;
+
+	vlistener = vsock_sk(listener);
+
+	list_for_each_entry(vpending, &vlistener->pending_links,
+			    pending_links) {
+		struct sockaddr_vm src;
+		struct sockaddr_vm dst;
+
+		vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+		vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+		if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
+		    vsock_addr_equals_addr(&dst, &vpending->local_addr)) {
+			pending = sk_vsock(vpending);
+			sock_hold(pending);
+			goto found;
+		}
+	}
+
+	pending = NULL;
+found:
+	return pending;
+
+}
+
+static void vmci_transport_release_pending(struct sock *pending)
+{
+	sock_put(pending);
+}
+
+/* We allow two kinds of sockets to communicate with a restricted VM: 1)
+ * trusted sockets 2) sockets from applications running as the same user as the
+ * VM (this is only true for the host side and only when using hosted products)
+ */
+
+static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
+{
+	return vsock->trusted ||
+	       vmci_is_context_owner(peer_cid, vsock->owner->uid);
+}
+
+/* We allow sending datagrams to and receiving datagrams from a restricted VM
+ * only if it is trusted as described in vmci_transport_is_trusted.
+ */
+
+static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
+{
+	if (vsock->cached_peer != peer_cid) {
+		vsock->cached_peer = peer_cid;
+		if (!vmci_transport_is_trusted(vsock, peer_cid) &&
+		    (vmci_context_get_priv_flags(peer_cid) &
+		     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
+			vsock->cached_peer_allow_dgram = false;
+		} else {
+			vsock->cached_peer_allow_dgram = true;
+		}
+	}
+
+	return vsock->cached_peer_allow_dgram;
+}
+
+static int
+vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
+				struct vmci_handle *handle,
+				u64 produce_size,
+				u64 consume_size,
+				u32 peer, u32 flags, bool trusted)
+{
+	int err = 0;
+
+	if (trusted) {
+		/* Try to allocate our queue pair as trusted. This will only
+		 * work if vsock is running in the host.
+		 */
+
+		err = vmci_qpair_alloc(qpair, handle, produce_size,
+				       consume_size,
+				       peer, flags,
+				       VMCI_PRIVILEGE_FLAG_TRUSTED);
+		if (err != VMCI_ERROR_NO_ACCESS)
+			goto out;
+
+	}
+
+	err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
+			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
+out:
+	if (err < 0) {
+		pr_err("Could not attach to queue pair with %d\n",
+		       err);
+		err = vmci_transport_error_to_vsock_error(err);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_datagram_create_hnd(u32 resource_id,
+				   u32 flags,
+				   vmci_datagram_recv_cb recv_cb,
+				   void *client_data,
+				   struct vmci_handle *out_handle)
+{
+	int err = 0;
+
+	/* Try to allocate our datagram handler as trusted. This will only work
+	 * if vsock is running in the host.
+	 */
+
+	err = vmci_datagram_create_handle_priv(resource_id, flags,
+					       VMCI_PRIVILEGE_FLAG_TRUSTED,
+					       recv_cb,
+					       client_data, out_handle);
+
+	if (err == VMCI_ERROR_NO_ACCESS)
+		err = vmci_datagram_create_handle(resource_id, flags,
+						  recv_cb, client_data,
+						  out_handle);
+
+	return err;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context and if it ever needs to
+ * sleep it should defer that work to a work queue.
+ */
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	size_t size;
+	struct sk_buff *skb;
+	struct vsock_sock *vsk;
+
+	sk = (struct sock *)data;
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagrams from all endpoints (even VMs that are in a
+	 * restricted context). If we get one from a restricted context then
+	 * the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, dg->src.context))
+		return VMCI_ERROR_NO_ACCESS;
+
+	size = VMCI_DG_SIZE(dg);
+
+	/* Attach the packet to the socket's receive queue as an sk_buff. */
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb) {
+		/* sk_receive_skb() will do a sock_put(), so hold here. */
+		sock_hold(sk);
+		skb_put(skb, size);
+		memcpy(skb->data, dg, size);
+		sk_receive_skb(sk, skb, 0);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+static bool vmci_transport_stream_allow(u32 cid, u32 port)
+{
+	static const u32 non_socket_contexts[] = {
+		VMADDR_CID_HYPERVISOR,
+		VMADDR_CID_RESERVED,
+	};
+	int i;
+
+	BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
+
+	for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
+		if (cid == non_socket_contexts[i])
+			return false;
+	}
+
+	return true;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context but it defers most of
+ * its work to the packet handling work queue.
+ */
+
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	struct sockaddr_vm dst;
+	struct sockaddr_vm src;
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	bool bh_process_pkt;
+	int err;
+
+	sk = NULL;
+	err = VMCI_SUCCESS;
+	bh_process_pkt = false;
+
+	/* Ignore incoming packets from contexts without sockets, or resources
+	 * that aren't vsock implementations.
+	 */
+
+	if (!vmci_transport_stream_allow(dg->src.context, -1)
+	    || VMCI_TRANSPORT_PACKET_RID != dg->src.resource)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
+		/* Drop datagrams that do not contain full VSock packets. */
+		return VMCI_ERROR_INVALID_ARGS;
+
+	pkt = (struct vmci_transport_packet *)dg;
+
+	/* Find the socket that should handle this packet.  First we look for a
+	 * connected socket and if there is none we look for a socket bound to
+	 * the destintation address.
+	 */
+	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+	sk = vsock_find_connected_socket(&src, &dst);
+	if (!sk) {
+		sk = vsock_find_bound_socket(&dst);
+		if (!sk) {
+			/* We could not find a socket for this specified
+			 * address.  If this packet is a RST, we just drop it.
+			 * If it is another packet, we send a RST.  Note that
+			 * we do not send a RST reply to RSTs so that we do not
+			 * continually send RSTs between two endpoints.
+			 *
+			 * Note that since this is a reply, dst is src and src
+			 * is dst.
+			 */
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NOT_FOUND;
+			goto out;
+		}
+	}
+
+	/* If the received packet type is beyond all types known to this
+	 * implementation, reply with an invalid message.  Hopefully this will
+	 * help when implementing backwards compatibility in the future.
+	 */
+	if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
+		vmci_transport_send_invalid_bh(&dst, &src);
+		err = VMCI_ERROR_INVALID_ARGS;
+		goto out;
+	}
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagram connect requests from all endpoints (even VMs
+	 * that are in a restricted context). If we get one from a restricted
+	 * context then the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
+		err = VMCI_ERROR_NO_ACCESS;
+		goto out;
+	}
+
+	/* We do most everything in a work queue, but let's fast path the
+	 * notification of reads and writes to help data transfer performance.
+	 * We can only do this if there is no process context code executing
+	 * for this socket since that may change the state.
+	 */
+	bh_lock_sock(sk);
+
+	if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED)
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, true, &dst, &src,
+				&bh_process_pkt);
+
+	bh_unlock_sock(sk);
+
+	if (!bh_process_pkt) {
+		struct vmci_transport_recv_pkt_info *recv_pkt_info;
+
+		recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
+		if (!recv_pkt_info) {
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NO_MEM;
+			goto out;
+		}
+
+		recv_pkt_info->sk = sk;
+		memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
+		INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
+
+		schedule_work(&recv_pkt_info->work);
+		/* Clear sk so that the reference count incremented by one of
+		 * the Find functions above is not decremented below.  We need
+		 * that reference count for the packet handler we've scheduled
+		 * to run.
+		 */
+		sk = NULL;
+	}
+
+out:
+	if (sk)
+		sock_put(sk);
+
+	return err;
+}
+
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+
+	vsk = vsock_sk(sk);
+
+	/* We don't ask for delayed CBs when we subscribe to this event (we
+	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
+	 * guarantees in that case about what context we might be running in,
+	 * so it could be BH or process, blockable or non-blockable.  So we
+	 * need to account for all possible contexts here.
+	 */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle)) {
+		/* XXX This doesn't do anything, but in the future we may want
+		 * to set a flag here to verify the attach really did occur and
+		 * we weren't just sent a datagram claiming it was.
+		 */
+		goto out;
+	}
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_handle_detach(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		sock_set_flag(sk, SOCK_DONE);
+
+		/* On a detach the peer will not be sending or receiving
+		 * anymore.
+		 */
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+
+		/* We should not be sending anymore since the peer won't be
+		 * there to receive, but we can still receive if there is data
+		 * left in our consume queue.
+		 */
+		if (vsock_stream_has_data(vsk) <= 0) {
+			if (sk->sk_state == SS_CONNECTING) {
+				/* The peer may detach from a queue pair while
+				 * we are still in the connecting state, i.e.,
+				 * if the peer VM is killed after attaching to
+				 * a queue pair, but before we complete the
+				 * handshake. In that case, we treat the detach
+				 * event like a reset.
+				 */
+
+				sk->sk_state = SS_UNCONNECTED;
+				sk->sk_err = ECONNRESET;
+				sk->sk_error_report(sk);
+				return;
+			}
+			sk->sk_state = SS_UNCONNECTED;
+		}
+		sk->sk_state_change(sk);
+	}
+}
+
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+	vsk = vsock_sk(sk);
+	if (vmci_handle_is_invalid(e_payload->handle))
+		return;
+
+	/* Same rules for locking as for peer_attach_cb(). */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle))
+		vmci_transport_handle_detach(sk);
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_qp_resumed_cb(u32 sub_id,
+					 const struct vmci_event_data *e_data,
+					 void *client_data)
+{
+	vsock_for_each_connected_socket(vmci_transport_handle_detach);
+}
+
+static void vmci_transport_recv_pkt_work(struct work_struct *work)
+{
+	struct vmci_transport_recv_pkt_info *recv_pkt_info;
+	struct vmci_transport_packet *pkt;
+	struct sock *sk;
+
+	recv_pkt_info =
+		container_of(work, struct vmci_transport_recv_pkt_info, work);
+	sk = recv_pkt_info->sk;
+	pkt = &recv_pkt_info->pkt;
+
+	lock_sock(sk);
+
+	switch (sk->sk_state) {
+	case SS_LISTEN:
+		vmci_transport_recv_listen(sk, pkt);
+		break;
+	case SS_CONNECTING:
+		/* Processing of pending connections for servers goes through
+		 * the listening socket, so see vmci_transport_recv_listen()
+		 * for that path.
+		 */
+		vmci_transport_recv_connecting_client(sk, pkt);
+		break;
+	case SS_CONNECTED:
+		vmci_transport_recv_connected(sk, pkt);
+		break;
+	default:
+		/* Because this function does not run in the same context as
+		 * vmci_transport_recv_stream_cb it is possible that the
+		 * socket has closed. We need to let the other side know or it
+		 * could be sitting in a connect and hang forever. Send a
+		 * reset to prevent that.
+		 */
+		vmci_transport_send_reset(sk, pkt);
+		goto out;
+	}
+
+out:
+	release_sock(sk);
+	kfree(recv_pkt_info);
+	/* Release reference obtained in the stream callback when we fetched
+	 * this socket out of the bound or connected list.
+	 */
+	sock_put(sk);
+}
+
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct sock *pending;
+	struct vsock_sock *vpending;
+	int err;
+	u64 qp_size;
+	bool old_request = false;
+	bool old_pkt_proto = false;
+
+	err = 0;
+
+	/* Because we are in the listen state, we could be receiving a packet
+	 * for ourself or any previous connection requests that we received.
+	 * If it's the latter, we try to find a socket in our list of pending
+	 * connections and, if we do, call the appropriate handler for the
+	 * state that that socket is in.  Otherwise we try to service the
+	 * connection request.
+	 */
+	pending = vmci_transport_get_pending(sk, pkt);
+	if (pending) {
+		lock_sock(pending);
+		switch (pending->sk_state) {
+		case SS_CONNECTING:
+			err = vmci_transport_recv_connecting_server(sk,
+								    pending,
+								    pkt);
+			break;
+		default:
+			vmci_transport_send_reset(pending, pkt);
+			err = -EINVAL;
+		}
+
+		if (err < 0)
+			vsock_remove_pending(sk, pending);
+
+		release_sock(pending);
+		vmci_transport_release_pending(pending);
+
+		return err;
+	}
+
+	/* The listen state only accepts connection requests.  Reply with a
+	 * reset unless we received a reset.
+	 */
+
+	if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
+	      pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	if (pkt->u.size == 0) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	/* If this socket can't accommodate this connection request, we send a
+	 * reset.  Otherwise we create and initialize a child socket and reply
+	 * with a connection negotiation.
+	 */
+	if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
+		vmci_transport_reply_reset(pkt);
+		return -ECONNREFUSED;
+	}
+
+	pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+				 sk->sk_type);
+	if (!pending) {
+		vmci_transport_send_reset(sk, pkt);
+		return -ENOMEM;
+	}
+
+	vpending = vsock_sk(pending);
+
+	vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
+			pkt->dst_port);
+	vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
+			pkt->src_port);
+
+	/* If the proposed size fits within our min/max, accept it. Otherwise
+	 * propose our own size.
+	 */
+	if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
+	    pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
+		qp_size = pkt->u.size;
+	} else {
+		qp_size = vmci_trans(vpending)->queue_pair_size;
+	}
+
+	/* Figure out if we are using old or new requests based on the
+	 * overrides pkt types sent by our peer.
+	 */
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_request = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
+			old_request = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
+			old_request = false;
+
+	}
+
+	if (old_request) {
+		/* Handle a REQUEST (or override) */
+		u16 version = VSOCK_PROTO_INVALID;
+		if (vmci_transport_proto_to_notify_struct(
+			pending, &version, true))
+			err = vmci_transport_send_negotiate(pending, qp_size);
+		else
+			err = -EINVAL;
+
+	} else {
+		/* Handle a REQUEST2 (or override) */
+		int proto_int = pkt->proto;
+		int pos;
+		u16 active_proto_version = 0;
+
+		/* The list of possible protocols is the intersection of all
+		 * protocols the client supports ... plus all the protocols we
+		 * support.
+		 */
+		proto_int &= vmci_transport_new_proto_supported_versions();
+
+		/* We choose the highest possible protocol version and use that
+		 * one.
+		 */
+		pos = fls(proto_int);
+		if (pos) {
+			active_proto_version = (1 << (pos - 1));
+			if (vmci_transport_proto_to_notify_struct(
+				pending, &active_proto_version, false))
+				err = vmci_transport_send_negotiate2(pending,
+							qp_size,
+							active_proto_version);
+			else
+				err = -EINVAL;
+
+		} else {
+			err = -EINVAL;
+		}
+	}
+
+	if (err < 0) {
+		vmci_transport_send_reset(sk, pkt);
+		sock_put(pending);
+		err = vmci_transport_error_to_vsock_error(err);
+		goto out;
+	}
+
+	vsock_add_pending(sk, pending);
+	sk->sk_ack_backlog++;
+
+	pending->sk_state = SS_CONNECTING;
+	vmci_trans(vpending)->produce_size =
+		vmci_trans(vpending)->consume_size = qp_size;
+	vmci_trans(vpending)->queue_pair_size = qp_size;
+
+	vmci_trans(vpending)->notify_ops->process_request(pending);
+
+	/* We might never receive another message for this socket and it's not
+	 * connected to any process, so we have to ensure it gets cleaned up
+	 * ourself.  Our delayed work function will take care of that.  Note
+	 * that we do not ever cancel this function since we have few
+	 * guarantees about its state when calling cancel_delayed_work().
+	 * Instead we hold a reference on the socket for that function and make
+	 * it capable of handling cases where it needs to do nothing but
+	 * release that reference.
+	 */
+	vpending->listener = sk;
+	sock_hold(sk);
+	sock_hold(pending);
+	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
+	schedule_delayed_work(&vpending->dwork, HZ);
+
+out:
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_server(struct sock *listener,
+				      struct sock *pending,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vpending;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	bool is_local;
+	u32 flags;
+	u32 detach_sub_id;
+	int err;
+	int skerr;
+
+	vpending = vsock_sk(pending);
+	detach_sub_id = VMCI_INVALID_ID;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+		if (vmci_handle_is_invalid(pkt->u.handle)) {
+			vmci_transport_send_reset(pending, pkt);
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		vmci_transport_send_reset(pending, pkt);
+		skerr = EPROTO;
+		err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
+		goto destroy;
+	}
+
+	/* In order to complete the connection we need to attach to the offered
+	 * queue pair and send an attach notification.  We also subscribe to the
+	 * detach event so we know when our peer goes away, and we do that
+	 * before attaching so we don't miss an event.  If all this succeeds,
+	 * we update our state and wakeup anything waiting in accept() for a
+	 * connection.
+	 */
+
+	/* We don't care about attach since we ensure the other side has
+	 * attached by specifying the ATTACH_ONLY flag below.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   pending, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->detach_sub_id = detach_sub_id;
+
+	/* Now attach to the queue pair the client created. */
+	handle = pkt->u.handle;
+
+	/* vpending->local_addr always has a context id so we do not need to
+	 * worry about VMADDR_CID_ANY in this case.
+	 */
+	is_local =
+	    vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
+	flags = VMCI_QPFLAG_ATTACH_ONLY;
+	flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(
+					&qpair,
+					&handle,
+					vmci_trans(vpending)->produce_size,
+					vmci_trans(vpending)->consume_size,
+					pkt->dg.src.context,
+					flags,
+					vmci_transport_is_trusted(
+						vpending,
+						vpending->remote_addr.svm_cid));
+	if (err < 0) {
+		vmci_transport_send_reset(pending, pkt);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->qp_handle = handle;
+	vmci_trans(vpending)->qpair = qpair;
+
+	/* When we send the attach message, we must be ready to handle incoming
+	 * control messages on the newly connected socket. So we move the
+	 * pending socket to the connected state before sending the attach
+	 * message. Otherwise, an incoming packet triggered by the attach being
+	 * received by the peer may be processed concurrently with what happens
+	 * below after sending the attach message, and that incoming packet
+	 * will find the listening socket instead of the (currently) pending
+	 * socket. Note that enqueueing the socket increments the reference
+	 * count, so even if a reset comes before the connection is accepted,
+	 * the socket will be valid until it is removed from the queue.
+	 *
+	 * If we fail sending the attach below, we remove the socket from the
+	 * connected list and move the socket to SS_UNCONNECTED before
+	 * releasing the lock, so a pending slow path processing of an incoming
+	 * packet will not see the socket in the connected state in that case.
+	 */
+	pending->sk_state = SS_CONNECTED;
+
+	vsock_insert_connected(vpending);
+
+	/* Notify our peer of our attach. */
+	err = vmci_transport_send_attach(pending, handle);
+	if (err < 0) {
+		vsock_remove_connected(vpending);
+		pr_err("Could not send attach\n");
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	/* We have a connection. Move the now connected socket from the
+	 * listener's pending list to the accept queue so callers of accept()
+	 * can find it.
+	 */
+	vsock_remove_pending(listener, pending);
+	vsock_enqueue_accept(listener, pending);
+
+	/* Callers of accept() will be be waiting on the listening socket, not
+	 * the pending socket.
+	 */
+	listener->sk_state_change(listener);
+
+	return 0;
+
+destroy:
+	pending->sk_err = skerr;
+	pending->sk_state = SS_UNCONNECTED;
+	/* As long as we drop our reference, all necessary cleanup will handle
+	 * when the cleanup function drops its reference and our destruct
+	 * implementation is called.  Note that since the listen handler will
+	 * remove pending from the pending list upon our failure, the cleanup
+	 * function won't drop the additional reference, which is why we do it
+	 * here.
+	 */
+	sock_put(pending);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	int err;
+	int skerr;
+
+	vsk = vsock_sk(sk);
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		if (vmci_handle_is_invalid(pkt->u.handle) ||
+		    !vmci_handle_is_equal(pkt->u.handle,
+					  vmci_trans(vsk)->qp_handle)) {
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+
+		/* Signify the socket is connected and wakeup the waiter in
+		 * connect(). Also place the socket in the connected table for
+		 * accounting (it can already be found since it's in the bound
+		 * table).
+		 */
+		sk->sk_state = SS_CONNECTED;
+		sk->sk_socket->state = SS_CONNECTED;
+		vsock_insert_connected(vsk);
+		sk->sk_state_change(sk);
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		if (pkt->u.size == 0
+		    || pkt->dg.src.context != vsk->remote_addr.svm_cid
+		    || pkt->src_port != vsk->remote_addr.svm_port
+		    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
+		    || vmci_trans(vsk)->qpair
+		    || vmci_trans(vsk)->produce_size != 0
+		    || vmci_trans(vsk)->consume_size != 0
+		    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
+		    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+			skerr = EPROTO;
+			err = -EINVAL;
+
+			goto destroy;
+		}
+
+		err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		/* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
+		 * continue processing here after they sent an INVALID packet.
+		 * This meant that we got a RST after the INVALID. We ignore a
+		 * RST after an INVALID. The common code doesn't send the RST
+		 * ... so we can hang if an old version of the common code
+		 * fails between getting a REQUEST and sending an OFFER back.
+		 * Not much we can do about it... except hope that it doesn't
+		 * happen.
+		 */
+		if (vsk->ignore_connecting_rst) {
+			vsk->ignore_connecting_rst = false;
+		} else {
+			skerr = ECONNRESET;
+			err = 0;
+			goto destroy;
+		}
+
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		skerr = EPROTO;
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	return 0;
+
+destroy:
+	vmci_transport_send_reset(sk, pkt);
+
+	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_err = skerr;
+	sk->sk_error_report(sk);
+	return err;
+}
+
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt)
+{
+	int err;
+	struct vsock_sock *vsk;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	bool is_local;
+	u32 flags;
+	bool old_proto = true;
+	bool old_pkt_proto;
+	u16 version;
+
+	vsk = vsock_sk(sk);
+	handle = VMCI_INVALID_HANDLE;
+	attach_sub_id = VMCI_INVALID_ID;
+	detach_sub_id = VMCI_INVALID_ID;
+
+	/* If we have gotten here then we should be past the point where old
+	 * linux vsock could have sent the bogus rst.
+	 */
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+
+	/* Verify that we're OK with the proposed queue pair size */
+	if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
+	    pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* At this point we know the CID the peer is using to talk to us. */
+
+	if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
+		vsk->local_addr.svm_cid = pkt->dg.dst.context;
+
+	/* Setup the notify ops to be the highest supported version that both
+	 * the server and the client support.
+	 */
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_proto = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
+			old_proto = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
+			old_proto = false;
+
+	}
+
+	if (old_proto)
+		version = VSOCK_PROTO_INVALID;
+	else
+		version = pkt->proto;
+
+	if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* Subscribe to attach and detach events first.
+	 *
+	 * XXX We attach once for each queue pair created for now so it is easy
+	 * to find the socket (it's provided), but later we should only
+	 * subscribe once and add a way to lookup sockets by queue pair handle.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
+				   vmci_transport_peer_attach_cb,
+				   sk, &attach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   sk, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	/* Make VMCI select the handle for us. */
+	handle = VMCI_INVALID_HANDLE;
+	is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
+	flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(&qpair,
+					      &handle,
+					      pkt->u.size,
+					      pkt->u.size,
+					      vsk->remote_addr.svm_cid,
+					      flags,
+					      vmci_transport_is_trusted(
+						  vsk,
+						  vsk->
+						  remote_addr.svm_cid));
+	if (err < 0)
+		goto destroy;
+
+	err = vmci_transport_send_qp_offer(sk, handle);
+	if (err < 0) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	vmci_trans(vsk)->qp_handle = handle;
+	vmci_trans(vsk)->qpair = qpair;
+
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
+		pkt->u.size;
+
+	vmci_trans(vsk)->attach_sub_id = attach_sub_id;
+	vmci_trans(vsk)->detach_sub_id = detach_sub_id;
+
+	vmci_trans(vsk)->notify_ops->process_negotiate(sk);
+
+	return 0;
+
+destroy:
+	if (attach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(attach_sub_id);
+
+	if (detach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(detach_sub_id);
+
+	if (!vmci_handle_is_invalid(handle))
+		vmci_qpair_detach(&qpair);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client_invalid(struct sock *sk,
+					      struct vmci_transport_packet *pkt)
+{
+	int err = 0;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsk->sent_request) {
+		vsk->sent_request = false;
+		vsk->ignore_connecting_rst = true;
+
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0)
+			err = vmci_transport_error_to_vsock_error(err);
+		else
+			err = 0;
+
+	}
+
+	return err;
+}
+
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	bool pkt_processed = false;
+
+	/* In cases where we are closing the connection, it's sufficient to
+	 * mark the state change (and maybe error) and wake up any waiting
+	 * threads. Since this is a connected socket, it's owned by a user
+	 * process and will be cleaned up when the failure is passed back on
+	 * the current or next system call.  Our system call implementations
+	 * must therefore check for error and state changes on entry and when
+	 * being awoken.
+	 */
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		if (pkt->u.mode) {
+			vsk = vsock_sk(sk);
+
+			vsk->peer_shutdown |= pkt->u.mode;
+			sk->sk_state_change(sk);
+		}
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		vsk = vsock_sk(sk);
+		/* It is possible that we sent our peer a message (e.g a
+		 * WAITING_READ) right before we got notified that the peer had
+		 * detached. If that happens then we can get a RST pkt back
+		 * from our peer even though there is data available for us to
+		 * read. In that case, don't shutdown the socket completely but
+		 * instead allow the local client to finish reading data off
+		 * the queuepair. Always treat a RST pkt in connected mode like
+		 * a clean shutdown.
+		 */
+		sock_set_flag(sk, SOCK_DONE);
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+		if (vsock_stream_has_data(vsk) <= 0)
+			sk->sk_state = SS_DISCONNECTING;
+
+		sk->sk_state_change(sk);
+		break;
+
+	default:
+		vsk = vsock_sk(sk);
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, false, NULL, NULL,
+				&pkt_processed);
+		if (!pkt_processed)
+			return -EINVAL;
+
+		break;
+	}
+
+	return 0;
+}
+
+static int vmci_transport_socket_init(struct vsock_sock *vsk,
+				      struct vsock_sock *psk)
+{
+	vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
+	if (!vsk->trans)
+		return -ENOMEM;
+
+	vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qpair = NULL;
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
+	vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
+		VMCI_INVALID_ID;
+	vmci_trans(vsk)->notify_ops = NULL;
+	if (psk) {
+		vmci_trans(vsk)->queue_pair_size =
+			vmci_trans(psk)->queue_pair_size;
+		vmci_trans(vsk)->queue_pair_min_size =
+			vmci_trans(psk)->queue_pair_min_size;
+		vmci_trans(vsk)->queue_pair_max_size =
+			vmci_trans(psk)->queue_pair_max_size;
+	} else {
+		vmci_trans(vsk)->queue_pair_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE;
+		vmci_trans(vsk)->queue_pair_min_size =
+			 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
+		vmci_trans(vsk)->queue_pair_max_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
+	}
+
+	return 0;
+}
+
+static void vmci_transport_destruct(struct vsock_sock *vsk)
+{
+	if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
+		vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
+		vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		vmci_qpair_detach(&vmci_trans(vsk)->qpair);
+		vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+		vmci_trans(vsk)->produce_size = 0;
+		vmci_trans(vsk)->consume_size = 0;
+	}
+
+	if (vmci_trans(vsk)->notify_ops)
+		vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
+
+	kfree(vsk->trans);
+	vsk->trans = NULL;
+}
+
+static void vmci_transport_release(struct vsock_sock *vsk)
+{
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
+		vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
+		vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	}
+}
+
+static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
+				     struct sockaddr_vm *addr)
+{
+	u32 port;
+	u32 flags;
+	int err;
+
+	/* VMCI will select a resource ID for us if we provide
+	 * VMCI_INVALID_ID.
+	 */
+	port = addr->svm_port == VMADDR_PORT_ANY ?
+			VMCI_INVALID_ID : addr->svm_port;
+
+	if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	flags = addr->svm_cid == VMADDR_CID_ANY ?
+				VMCI_FLAG_ANYCID_DG_HND : 0;
+
+	err = vmci_transport_datagram_create_hnd(port, flags,
+						 vmci_transport_recv_dgram_cb,
+						 &vsk->sk,
+						 &vmci_trans(vsk)->dg_handle);
+	if (err < VMCI_SUCCESS)
+		return vmci_transport_error_to_vsock_error(err);
+	vsock_addr_init(&vsk->local_addr, addr->svm_cid,
+			vmci_trans(vsk)->dg_handle.resource);
+
+	return 0;
+}
+
+static int vmci_transport_dgram_enqueue(
+	struct vsock_sock *vsk,
+	struct sockaddr_vm *remote_addr,
+	struct iovec *iov,
+	size_t len)
+{
+	int err;
+	struct vmci_datagram *dg;
+
+	if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
+		return -EMSGSIZE;
+
+	if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
+		return -EPERM;
+
+	/* Allocate a buffer for the user's message and our packet header. */
+	dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
+	if (!dg)
+		return -ENOMEM;
+
+	memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
+
+	dg->dst = vmci_make_handle(remote_addr->svm_cid,
+				   remote_addr->svm_port);
+	dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
+				   vsk->local_addr.svm_port);
+	dg->payload_size = len;
+
+	err = vmci_datagram_send(dg);
+	kfree(dg);
+	if (err < 0)
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err - sizeof(*dg);
+}
+
+static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
+					struct vsock_sock *vsk,
+					struct msghdr *msg, size_t len,
+					int flags)
+{
+	int err;
+	int noblock;
+	struct vmci_datagram *dg;
+	size_t payload_len;
+	struct sk_buff *skb;
+
+	noblock = flags & MSG_DONTWAIT;
+
+	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
+		return -EOPNOTSUPP;
+
+	/* Retrieve the head sk_buff from the socket's receive queue. */
+	err = 0;
+	skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+	if (err)
+		return err;
+
+	if (!skb)
+		return -EAGAIN;
+
+	dg = (struct vmci_datagram *)skb->data;
+	if (!dg)
+		/* err is 0, meaning we read zero bytes. */
+		goto out;
+
+	payload_len = dg->payload_size;
+	/* Ensure the sk_buff matches the payload size claimed in the packet. */
+	if (payload_len != skb->len - sizeof(*dg)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (payload_len > len) {
+		payload_len = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	/* Place the datagram payload in the user's iovec. */
+	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
+		payload_len);
+	if (err)
+		goto out;
+
+	msg->msg_namelen = 0;
+	if (msg->msg_name) {
+		struct sockaddr_vm *vm_addr;
+
+		/* Provide the address of the sender. */
+		vm_addr = (struct sockaddr_vm *)msg->msg_name;
+		vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
+		msg->msg_namelen = sizeof(*vm_addr);
+	}
+	err = payload_len;
+
+out:
+	skb_free_datagram(&vsk->sk, skb);
+	return err;
+}
+
+static bool vmci_transport_dgram_allow(u32 cid, u32 port)
+{
+	if (cid == VMADDR_CID_HYPERVISOR) {
+		/* Registrations of PBRPC Servers do not modify VMX/Hypervisor
+		 * state and are allowed.
+		 */
+		return port == VMCI_UNITY_PBRPC_REGISTER;
+	}
+
+	return true;
+}
+
+static int vmci_transport_connect(struct vsock_sock *vsk)
+{
+	int err;
+	bool old_pkt_proto = false;
+	struct sock *sk = &vsk->sk;
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto) &&
+		old_pkt_proto) {
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+	} else {
+		int supported_proto_versions =
+			vmci_transport_new_proto_supported_versions();
+		err = vmci_transport_send_conn_request2(
+				sk, vmci_trans(vsk)->queue_pair_size,
+				supported_proto_versions);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+
+		vsk->sent_request = true;
+	}
+
+	return err;
+}
+
+static ssize_t vmci_transport_stream_dequeue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len,
+	int flags)
+{
+	if (flags & MSG_PEEK)
+		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
+	else
+		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static ssize_t vmci_transport_stream_enqueue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len)
+{
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
+{
+	return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
+}
+
+static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
+{
+	return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
+}
+
+static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->consume_size;
+}
+
+static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
+{
+	return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
+}
+
+static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_size;
+}
+
+static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_min_size;
+}
+
+static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_max_size;
+}
+
+static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_min_size)
+		vmci_trans(vsk)->queue_pair_min_size = val;
+	if (val > vmci_trans(vsk)->queue_pair_max_size)
+		vmci_trans(vsk)->queue_pair_max_size = val;
+	vmci_trans(vsk)->queue_pair_size = val;
+}
+
+static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val > vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_min_size = val;
+}
+
+static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_max_size = val;
+}
+
+static int vmci_transport_notify_poll_in(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *data_ready_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_in(
+			&vsk->sk, target, data_ready_now);
+}
+
+static int vmci_transport_notify_poll_out(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *space_available_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_out(
+			&vsk->sk, target, space_available_now);
+}
+
+static int vmci_transport_notify_recv_init(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_init(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_block(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_block(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_post_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	ssize_t copied,
+	bool data_read,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
+			&vsk->sk, target, copied, data_read,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_init(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_init(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_block(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_block(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_enqueue(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_post_enqueue(
+	struct vsock_sock *vsk,
+	ssize_t written,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_post_enqueue(
+			&vsk->sk, written,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
+{
+	if (PROTOCOL_OVERRIDE != -1) {
+		if (PROTOCOL_OVERRIDE == 0)
+			*old_pkt_proto = true;
+		else
+			*old_pkt_proto = false;
+
+		pr_info("Proto override in use\n");
+		return true;
+	}
+
+	return false;
+}
+
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
+						  u16 *proto,
+						  bool old_pkt_proto)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (old_pkt_proto) {
+		if (*proto != VSOCK_PROTO_INVALID) {
+			pr_err("Can't set both an old and new protocol\n");
+			return false;
+		}
+		vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
+		goto exit;
+	}
+
+	switch (*proto) {
+	case VSOCK_PROTO_PKT_ON_NOTIFY:
+		vmci_trans(vsk)->notify_ops =
+			&vmci_transport_notify_pkt_q_state_ops;
+		break;
+	default:
+		pr_err("Unknown notify protocol version\n");
+		return false;
+	}
+
+exit:
+	vmci_trans(vsk)->notify_ops->socket_init(sk);
+	return true;
+}
+
+static u16 vmci_transport_new_proto_supported_versions(void)
+{
+	if (PROTOCOL_OVERRIDE != -1)
+		return PROTOCOL_OVERRIDE;
+
+	return VSOCK_PROTO_ALL_SUPPORTED;
+}
+
+static u32 vmci_transport_get_local_cid(void)
+{
+	return vmci_get_context_id();
+}
+
+static struct vsock_transport vmci_transport = {
+	.init = vmci_transport_socket_init,
+	.destruct = vmci_transport_destruct,
+	.release = vmci_transport_release,
+	.connect = vmci_transport_connect,
+	.dgram_bind = vmci_transport_dgram_bind,
+	.dgram_dequeue = vmci_transport_dgram_dequeue,
+	.dgram_enqueue = vmci_transport_dgram_enqueue,
+	.dgram_allow = vmci_transport_dgram_allow,
+	.stream_dequeue = vmci_transport_stream_dequeue,
+	.stream_enqueue = vmci_transport_stream_enqueue,
+	.stream_has_data = vmci_transport_stream_has_data,
+	.stream_has_space = vmci_transport_stream_has_space,
+	.stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
+	.stream_is_active = vmci_transport_stream_is_active,
+	.stream_allow = vmci_transport_stream_allow,
+	.notify_poll_in = vmci_transport_notify_poll_in,
+	.notify_poll_out = vmci_transport_notify_poll_out,
+	.notify_recv_init = vmci_transport_notify_recv_init,
+	.notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
+	.notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
+	.notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
+	.notify_send_init = vmci_transport_notify_send_init,
+	.notify_send_pre_block = vmci_transport_notify_send_pre_block,
+	.notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
+	.notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
+	.shutdown = vmci_transport_shutdown,
+	.set_buffer_size = vmci_transport_set_buffer_size,
+	.set_min_buffer_size = vmci_transport_set_min_buffer_size,
+	.set_max_buffer_size = vmci_transport_set_max_buffer_size,
+	.get_buffer_size = vmci_transport_get_buffer_size,
+	.get_min_buffer_size = vmci_transport_get_min_buffer_size,
+	.get_max_buffer_size = vmci_transport_get_max_buffer_size,
+	.get_local_cid = vmci_transport_get_local_cid,
+};
+
+static int __init vmci_transport_init(void)
+{
+	int err;
+
+	/* Create the datagram handle that we will use to send and receive all
+	 * VSocket control messages for this context.
+	 */
+	err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
+						 VMCI_FLAG_ANYCID_DG_HND,
+						 vmci_transport_recv_stream_cb,
+						 NULL,
+						 &vmci_transport_stream_handle);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to create datagram handle. (%d)\n", err);
+		return vmci_transport_error_to_vsock_error(err);
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
+				   vmci_transport_qp_resumed_cb,
+				   NULL, &vmci_transport_qp_resumed_sub_id);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to subscribe to resumed event. (%d)\n", err);
+		err = vmci_transport_error_to_vsock_error(err);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+		goto err_destroy_stream_handle;
+	}
+
+	err = vsock_core_init(&vmci_transport);
+	if (err < 0)
+		goto err_unsubscribe;
+
+	return 0;
+
+err_unsubscribe:
+	vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+err_destroy_stream_handle:
+	vmci_datagram_destroy_handle(vmci_transport_stream_handle);
+	return err;
+}
+module_init(vmci_transport_init);
+
+static void __exit vmci_transport_exit(void)
+{
+	if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
+		if (vmci_datagram_destroy_handle(
+			vmci_transport_stream_handle) != VMCI_SUCCESS)
+			pr_err("Couldn't destroy datagram handle\n");
+		vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
+	}
+
+	if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+	}
+
+	vsock_core_exit();
+}
+module_exit(vmci_transport_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("vmware_vsock");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
new file mode 100644
index 00000000000..1bf991803ec
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.h
@@ -0,0 +1,139 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VMCI_TRANSPORT_H_
+#define _VMCI_TRANSPORT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vsock_addr.h"
+#include "af_vsock.h"
+
+/* If the packet format changes in a release then this should change too. */
+#define VMCI_TRANSPORT_PACKET_VERSION 1
+
+/* The resource ID on which control packets are sent. */
+#define VMCI_TRANSPORT_PACKET_RID 1
+
+#define VSOCK_PROTO_INVALID        0
+#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
+#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
+
+#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans))
+
+enum vmci_transport_packet_type {
+	VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+	VMCI_TRANSPORT_PACKET_TYPE_OFFER,
+	VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+	VMCI_TRANSPORT_PACKET_TYPE_WROTE,
+	VMCI_TRANSPORT_PACKET_TYPE_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_RST,
+	VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+	VMCI_TRANSPORT_PACKET_TYPE_MAX
+};
+
+struct vmci_transport_waiting_info {
+	u64 generation;
+	u64 offset;
+};
+
+/* Control packet type for STREAM sockets.  DGRAMs have no control packets nor
+ * special packet header for data packets, they are just raw VMCI DGRAM
+ * messages.  For STREAMs, control packets are sent over the control channel
+ * while data is written and read directly from queue pairs with no packet
+ * format.
+ */
+struct vmci_transport_packet {
+	struct vmci_datagram dg;
+	u8 version;
+	u8 type;
+	u16 proto;
+	u32 src_port;
+	u32 dst_port;
+	u32 _reserved2;
+	union {
+		u64 size;
+		u64 mode;
+		struct vmci_handle handle;
+		struct vmci_transport_waiting_info wait;
+	} u;
+};
+
+struct vmci_transport_notify_pkt {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_read;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+	bool sent_waiting_read;
+	bool sent_waiting_write;
+	struct vmci_transport_waiting_info peer_waiting_read_info;
+	struct vmci_transport_waiting_info peer_waiting_write_info;
+	u64 produce_q_generation;
+	u64 consume_q_generation;
+};
+
+struct vmci_transport_notify_pkt_q_state {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+};
+
+union vmci_transport_notify {
+	struct vmci_transport_notify_pkt pkt;
+	struct vmci_transport_notify_pkt_q_state pkt_q_state;
+};
+
+/* Our transport-specific data. */
+struct vmci_transport {
+	/* For DGRAMs. */
+	struct vmci_handle dg_handle;
+	/* For STREAMs. */
+	struct vmci_handle qp_handle;
+	struct vmci_qp *qpair;
+	u64 produce_size;
+	u64 consume_size;
+	u64 queue_pair_size;
+	u64 queue_pair_min_size;
+	u64 queue_pair_max_size;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	union vmci_transport_notify notify;
+	struct vmci_transport_notify_ops *notify_ops;
+};
+
+int vmci_transport_register(void);
+void vmci_transport_unregister(void);
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src);
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src);
+int vmci_transport_send_wrote(struct sock *sk);
+int vmci_transport_send_read(struct sock *sk);
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait);
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait);
+
+#endif
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
new file mode 100644
index 00000000000..9a730744e7b
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -0,0 +1,680 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+#else
+	notify_limit = 0;
+#endif
+
+	/* For now we ignore the wait information and just see if the free
+	 * space exceeds the notify limit.  Note that improving this function
+	 * to be more intelligent will not require a protocol change and will
+	 * retain compatibility between endpoints with mixed versions of this
+	 * function.
+	 *
+	 * The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (retval) {
+		/*
+		 * Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+#endif
+	return retval;
+#else
+	return true;
+#endif
+}
+
+static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	if (!PKT_FIELD(vsk, peer_waiting_read))
+		return false;
+
+	/* For now we ignore the wait information and just see if there is any
+	 * data for our peer to read.  Note that improving this function to be
+	 * more intelligent will not require a protocol change and will retain
+	 * compatibility between endpoints with mixed versions of this
+	 * function.
+	 */
+	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
+#else
+	return true;
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_read(struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half,
+				   struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_read) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_wrote(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_write(struct sock *sk,
+				    struct vmci_transport_packet *pkt,
+				    bool bottom_half,
+				    struct sockaddr_vm *dst,
+				    struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_write) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_read_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_read(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+#endif
+
+	sk->sk_write_space(sk);
+}
+
+static bool send_waiting_read(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_read))
+		return true;
+
+	if (PKT_FIELD(vsk, write_notify_window) <
+			vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->consume_size - head;
+	if (room_needed >= room_left) {
+		waiting_info.offset = room_needed - room_left;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, consume_q_generation) + 1;
+	} else {
+		waiting_info.offset = head + room_needed;
+		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
+	}
+
+	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_read) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static bool send_waiting_write(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_write))
+		return true;
+
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->produce_size - tail;
+	if (room_needed + 1 >= room_left) {
+		/* Wraps around to current generation. */
+		waiting_info.offset = room_needed + 1 - room_left;
+		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
+	} else {
+		waiting_info.offset = tail + room_needed + 1;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, produce_q_generation) - 1;
+	}
+
+	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_write) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
+			pr_err("%p unable to send read notify to peer\n", sk);
+		else
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+#endif
+
+	}
+	return err;
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+#endif
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_read) = false;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+	PKT_FIELD(vsk, produce_q_generation) = 0;
+	PKT_FIELD(vsk, consume_q_generation) = 0;
+
+	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!send_waiting_read(sk, 1))
+				return -1;
+
+		}
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Notify the peer that we are waiting if the queue is full. We
+		 * only send a waiting write if the queue is full because
+		 * otherwise we end up in an infinite WAITING_WRITE, READ,
+		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
+		 * notification as a socket error, passing that back through
+		 * the mask.
+		 */
+		if (!send_waiting_write(sk, 1))
+			return -1;
+
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+			struct sock *sk,
+			size_t target,
+			struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+#endif
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	/* Notify our peer that we are waiting for data to read. */
+	if (!send_waiting_read(sk, target)) {
+		err = -EHOSTUNREACH;
+		return err;
+	}
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		data->notify_on_block = false;
+	}
+#endif
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	/* Now consume up to len bytes from the queue.  Note that since we have
+	 * the socket locked we should copy at least ready bytes.
+	 */
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+		/* Detect a wrap-around to maintain queue generation.  Note
+		 * that this is safe since we hold the socket lock across the
+		 * two queue pair operations.
+		 */
+		if (copied >=
+			vmci_trans(vsk)->consume_size - data->consume_head)
+			PKT_FIELD(vsk, consume_q_generation)++;
+#endif
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+	}
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+			struct sock *sk,
+			struct vmci_transport_send_notify_data *data)
+{
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	/* Notify our peer that we are waiting for room to write. */
+	if (!send_waiting_write(sk, 1))
+		return -EHOSTUNREACH;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	/* Detect a wrap-around to maintain queue generation.  Note that this
+	 * is safe since we hold the socket lock across the two queue pair
+	 * operations.
+	 */
+	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
+		PKT_FIELD(vsk, produce_q_generation)++;
+
+#endif
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		/* Notify the peer that we have written, retrying the send on
+		 * failure up to our maximum value. See the XXX comment for the
+		 * corresponding piece of code in StreamRecvmsg() for potential
+		 * improvements.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			pr_err("%p unable to send wrote notify to peer\n", sk);
+			return err;
+		} else {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+#endif
+		}
+	}
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+			struct sock *sk,
+			struct vmci_transport_packet *pkt,
+			bool bottom_half,
+			struct sockaddr_vm *dst,
+			struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
+						    dst, src);
+		processed = true;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
+						   dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+/* Socket control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
new file mode 100644
index 00000000000..7df793249b6
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -0,0 +1,83 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VMCI_TRANSPORT_NOTIFY_H__
+#define __VMCI_TRANSPORT_NOTIFY_H__
+
+#include <linux/types.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/vm_sockets.h>
+
+#include "vmci_transport.h"
+
+/* Comment this out to compare with old protocol. */
+#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+/* Comment this out to remove flow control for "new" protocol */
+#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1
+#endif
+
+#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS       10
+
+struct vmci_transport_recv_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+	bool notify_on_block;
+};
+
+struct vmci_transport_send_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+};
+
+/* Socket notification callbacks. */
+struct vmci_transport_notify_ops {
+	void (*socket_init) (struct sock *sk);
+	void (*socket_destruct) (struct vsock_sock *vsk);
+	int (*poll_in) (struct sock *sk, size_t target,
+			  bool *data_ready_now);
+	int (*poll_out) (struct sock *sk, size_t target,
+			   bool *space_avail_now);
+	void (*handle_notify_pkt) (struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half, struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src,
+				   bool *pkt_processed);
+	int (*recv_init) (struct sock *sk, size_t target,
+			  struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_block) (struct sock *sk, size_t target,
+			       struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_dequeue) (struct sock *sk, size_t target,
+				 struct vmci_transport_recv_notify_data *data);
+	int (*recv_post_dequeue) (struct sock *sk, size_t target,
+				  ssize_t copied, bool data_read,
+				  struct vmci_transport_recv_notify_data *data);
+	int (*send_init) (struct sock *sk,
+			  struct vmci_transport_send_notify_data *data);
+	int (*send_pre_block) (struct sock *sk,
+			       struct vmci_transport_send_notify_data *data);
+	int (*send_pre_enqueue) (struct sock *sk,
+				 struct vmci_transport_send_notify_data *data);
+	int (*send_post_enqueue) (struct sock *sk, ssize_t written,
+				  struct vmci_transport_send_notify_data *data);
+	void (*process_request) (struct sock *sk);
+	void (*process_negotiate) (struct sock *sk);
+};
+
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+
+#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
new file mode 100644
index 00000000000..622bd7aa101
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -0,0 +1,438 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) \
+	(vmci_trans(vsk)->notify.pkt_q_state.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+
+	/* The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+
+	if (retval) {
+		/* Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+	return retval;
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_write_space(sk);
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vsock_block_update_write_window(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
+			pr_err("%p unable to send read notification to peer\n",
+			       sk);
+		else
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+
+	}
+	return err;
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED)
+			vsock_block_update_write_window(sk);
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Nothing else to do.
+		 */
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	data->consume_head = 0;
+	data->produce_tail = 0;
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	vsock_block_update_write_window(sk);
+
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+		data->notify_on_block = false;
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+	bool was_full = false;
+	u64 free_space;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+		smp_mb();
+
+		free_space =
+			vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
+		was_full = free_space == copied;
+
+		if (was_full)
+			PKT_FIELD(vsk, peer_waiting_write) = true;
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		/* See the comment in
+		 * vmci_transport_notify_pkt_send_post_enqueue().
+		 */
+		sk->sk_data_ready(sk, 0);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	data->consume_head = 0;
+	data->produce_tail = 0;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	bool was_empty;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+	smp_mb();
+
+	was_empty =
+		vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
+	if (was_empty) {
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+	}
+
+	if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
+		pr_err("%p unable to send wrote notification to peer\n",
+		       sk);
+		return err;
+	}
+
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+				struct sock *sk,
+				struct vmci_transport_packet *pkt,
+				bool bottom_half,
+				struct sockaddr_vm *dst,
+				struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+/* Socket always on control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
new file mode 100644
index 00000000000..b7df1aea7c5
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.c
@@ -0,0 +1,86 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vsock_addr.h"
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->svm_family = AF_VSOCK;
+	addr->svm_cid = cid;
+	addr->svm_port = port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_init);
+
+int vsock_addr_validate(const struct sockaddr_vm *addr)
+{
+	if (!addr)
+		return -EFAULT;
+
+	if (addr->svm_family != AF_VSOCK)
+		return -EAFNOSUPPORT;
+
+	if (addr->svm_zero[0] != 0)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_validate);
+
+bool vsock_addr_bound(const struct sockaddr_vm *addr)
+{
+	return addr->svm_port != VMADDR_PORT_ANY;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_bound);
+
+void vsock_addr_unbind(struct sockaddr_vm *addr)
+{
+	vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_unbind);
+
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other)
+{
+	return addr->svm_cid == other->svm_cid &&
+		addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr);
+
+bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
+				const struct sockaddr_vm *other)
+{
+	return (addr->svm_cid == VMADDR_CID_ANY ||
+		other->svm_cid == VMADDR_CID_ANY ||
+		addr->svm_cid == other->svm_cid) &&
+	       addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any);
+
+int vsock_addr_cast(const struct sockaddr *addr,
+		    size_t len, struct sockaddr_vm **out_addr)
+{
+	if (len < sizeof(**out_addr))
+		return -EFAULT;
+
+	*out_addr = (struct sockaddr_vm *)addr;
+	return vsock_addr_validate(*out_addr);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_cast);
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
new file mode 100644
index 00000000000..cdfbcefdf84
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.h
@@ -0,0 +1,32 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_ADDR_H_
+#define _VSOCK_ADDR_H_
+
+#include <linux/vm_sockets.h>
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
+int vsock_addr_validate(const struct sockaddr_vm *addr);
+bool vsock_addr_bound(const struct sockaddr_vm *addr);
+void vsock_addr_unbind(struct sockaddr_vm *addr);
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other);
+bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
+				const struct sockaddr_vm *other);
+int vsock_addr_cast(const struct sockaddr *addr, size_t len,
+		    struct sockaddr_vm **out_addr);
+
+#endif
diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h
new file mode 100644
index 00000000000..4df7f5e2151
--- /dev/null
+++ b/net/vmw_vsock/vsock_version.h
@@ -0,0 +1,22 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2011-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_VERSION_H_
+#define _VSOCK_VERSION_H_
+
+#define VSOCK_DRIVER_VERSION_PARTS	{ 1, 0, 0, 0 }
+#define VSOCK_DRIVER_VERSION_STRING	"1.0.0.0-k"
+
+#endif /* _VSOCK_VERSION_H_ */
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
deleted file mode 100644
index a157a2e64e1..00000000000
--- a/net/wanrouter/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Configuration for WAN router
-#
-
-config WAN_ROUTER
-	tristate "WAN router (DEPRECATED)"
-	depends on EXPERIMENTAL
-	---help---
-	  Wide Area Networks (WANs), such as X.25, frame relay and leased
-	  lines, are used to interconnect Local Area Networks (LANs) over vast
-	  distances with data transfer rates significantly higher than those
-	  achievable with commonly used asynchronous modem connections.
-	  Usually, a quite expensive external device called a `WAN router' is
-	  needed to connect to a WAN.
-
-	  As an alternative, WAN routing can be built into the Linux kernel.
-	  With relatively inexpensive WAN interface cards available on the
-	  market, a perfectly usable router can be built for less than half
-	  the price of an external router.  If you have one of those cards and
-	  wish to use your Linux box as a WAN router, say Y here and also to
-	  the WAN driver for your card, below.  You will then need the
-	  wan-tools package which is available from <ftp://ftp.sangoma.com/>.
-
-	  To compile WAN routing support as a module, choose M here: the
-	  module will be called wanrouter.
-
-	  If unsure, say N.
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
deleted file mode 100644
index 4da14bc4807..00000000000
--- a/net/wanrouter/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux WAN router layer.
-#
-
-obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
-
-wanrouter-y :=  wanproc.o wanmain.o
diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel
deleted file mode 100644
index c043eea7767..00000000000
--- a/net/wanrouter/patchlevel
+++ /dev/null
@@ -1 +0,0 @@
-2.2.1
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
deleted file mode 100644
index 2ab785064b7..00000000000
--- a/net/wanrouter/wanmain.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*****************************************************************************
-* wanmain.c	WAN Multiprotocol Router Module. Main code.
-*
-*		This module is completely hardware-independent and provides
-*		the following common services for the WAN Link Drivers:
-*		 o WAN device management (registering, unregistering)
-*		 o Network interface management
-*		 o Physical connection management (dial-up, incoming calls)
-*		 o Logical connection management (switched virtual circuits)
-*		 o Protocol encapsulation/decapsulation
-*
-* Author:	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Nov 24, 2000  Nenad Corbic	Updated for 2.4.X kernels
-* Nov 07, 2000  Nenad Corbic	Fixed the Mulit-Port PPP for kernels 2.2.16 and
-*  				greater.
-* Aug 2,  2000  Nenad Corbic	Block the Multi-Port PPP from running on
-*  			        kernels 2.2.16 or greater.  The SyncPPP
-*  			        has changed.
-* Jul 13, 2000  Nenad Corbic	Added SyncPPP support
-* 				Added extra debugging in device_setup().
-* Oct 01, 1999  Gideon Hack     Update for s514 PCI card
-* Dec 27, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 31, 1997  Alan Cox	Hacked it about a bit for 2.1
-* Jun 27, 1997  Alan Cox	realigned with vendor code
-* Oct 15, 1997  Farhan Thawar   changed wan_encapsulate to add a pad byte of 0
-* Apr 20, 1998	Alan Cox	Fixed 2.1 symbols
-* May 17, 1998  K. Baranowski	Fixed SNAP encapsulation in wan_encapsulate
-* Dec 15, 1998  Arnaldo Melo    support for firmwares of up to 128000 bytes
-*                               check wandev->setup return value
-* Dec 22, 1998  Arnaldo Melo    vmalloc/vfree used in device_setup to allocate
-*                               kernel memory and copy configuration data to
-*                               kernel space (for big firmwares)
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.0.X and 2.2.X kernels.
-*****************************************************************************/
-
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/capability.h>
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>	/* support for loadable modules */
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/string.h>	/* inline mem*, str* functions */
-
-#include <asm/byteorder.h>	/* htons(), etc. */
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-
-#include <linux/vmalloc.h>	/* vmalloc, vfree */
-#include <asm/uaccess.h>        /* copy_to/from_user */
-#include <linux/init.h>         /* __initfunc et al. */
-
-#define DEV_TO_SLAVE(dev)	(*((struct net_device **)netdev_priv(dev)))
-
-/*
- * 	Function Prototypes
- */
-
-/*
- *	WAN device IOCTL handlers
- */
-
-static DEFINE_MUTEX(wanrouter_mutex);
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf);
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat);
-static int wanrouter_device_shutdown(struct wan_device *wandev);
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf);
-static int wanrouter_device_del_if(struct wan_device *wandev,
-				   char __user *u_name);
-
-/*
- *	Miscellaneous
- */
-
-static struct wan_device *wanrouter_find_device(char *name);
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock);
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock);
-
-
-
-/*
- *	Global Data
- */
-
-static char wanrouter_fullname[]  = "Sangoma WANPIPE Router";
-static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc.";
-static char wanrouter_modname[] = ROUTER_NAME; /* short module name */
-struct wan_device* wanrouter_router_devlist; /* list of registered devices */
-
-/*
- *	Organize Unique Identifiers for encapsulation/decapsulation
- */
-
-#if 0
-static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
-static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 };
-#endif
-
-static int __init wanrouter_init(void)
-{
-	int err;
-
-	printk(KERN_INFO "%s v%u.%u %s\n",
-	       wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE,
-	       wanrouter_copyright);
-
-	err = wanrouter_proc_init();
-	if (err)
-		printk(KERN_INFO "%s: can't create entry in proc filesystem!\n",
-		       wanrouter_modname);
-
-	return err;
-}
-
-static void __exit wanrouter_cleanup (void)
-{
-	wanrouter_proc_cleanup();
-}
-
-/*
- * This is just plain dumb.  We should move the bugger to drivers/net/wan,
- * slap it first in directory and make it module_init().  The only reason
- * for subsys_initcall() here is that net goes after drivers (why, BTW?)
- */
-subsys_initcall(wanrouter_init);
-module_exit(wanrouter_cleanup);
-
-/*
- * 	Kernel APIs
- */
-
-/*
- * 	Register WAN device.
- * 	o verify device credentials
- * 	o create an entry for the device in the /proc/net/router directory
- * 	o initialize internally maintained fields of the wan_device structure
- * 	o link device data space to a singly-linked list
- * 	o if it's the first device, then start kernel 'thread'
- * 	o increment module use count
- *
- * 	Return:
- *	0	Ok
- *	< 0	error.
- *
- * 	Context:	process
- */
-
-
-int register_wan_device(struct wan_device *wandev)
-{
-	int err, namelen;
-
-	if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) ||
-	    (wandev->name == NULL))
-		return -EINVAL;
-
-	namelen = strlen(wandev->name);
-	if (!namelen || (namelen > WAN_DRVNAME_SZ))
-		return -EINVAL;
-
-	if (wanrouter_find_device(wandev->name))
-		return -EEXIST;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: registering WAN device %s\n",
-	       wanrouter_modname, wandev->name);
-#endif
-
-	/*
-	 *	Register /proc directory entry
-	 */
-	err = wanrouter_proc_add(wandev);
-	if (err) {
-		printk(KERN_INFO
-			"%s: can't create /proc/net/router/%s entry!\n",
-			wanrouter_modname, wandev->name);
-		return err;
-	}
-
-	/*
-	 *	Initialize fields of the wan_device structure maintained by the
-	 *	router and update local data.
-	 */
-
-	wandev->ndev = 0;
-	wandev->dev  = NULL;
-	wandev->next = wanrouter_router_devlist;
-	wanrouter_router_devlist = wandev;
-	return 0;
-}
-
-/*
- *	Unregister WAN device.
- *	o shut down device
- *	o unlink device data space from the linked list
- *	o delete device entry in the /proc/net/router directory
- *	o decrement module use count
- *
- *	Return:		0	Ok
- *			<0	error.
- *	Context:	process
- */
-
-
-int unregister_wan_device(char *name)
-{
-	struct wan_device *wandev, *prev;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	for (wandev = wanrouter_router_devlist, prev = NULL;
-		wandev && strcmp(wandev->name, name);
-		prev = wandev, wandev = wandev->next)
-		;
-	if (wandev == NULL)
-		return -ENODEV;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: unregistering WAN device %s\n",
-	       wanrouter_modname, name);
-#endif
-
-	if (wandev->state != WAN_UNCONFIGURED)
-		wanrouter_device_shutdown(wandev);
-
-	if (prev)
-		prev->next = wandev->next;
-	else
-		wanrouter_router_devlist = wandev->next;
-
-	wanrouter_proc_delete(wandev);
-	return 0;
-}
-
-#if 0
-
-/*
- *	Encapsulate packet.
- *
- *	Return:	encapsulation header size
- *		< 0	- unsupported Ethertype
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
-			  unsigned short type)
-{
-	int hdr_len = 0;
-
-	switch (type) {
-	case ETH_P_IP:		/* IP datagram encapsulation */
-		hdr_len += 1;
-		skb_push(skb, 1);
-		skb->data[0] = NLPID_IP;
-		break;
-
-	case ETH_P_IPX:		/* SNAP encapsulation */
-	case ETH_P_ARP:
-		hdr_len += 7;
-		skb_push(skb, 7);
-		skb->data[0] = 0;
-		skb->data[1] = NLPID_SNAP;
-		skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
-					       sizeof(wanrouter_oui_ether));
-		*((unsigned short*)&skb->data[5]) = htons(type);
-		break;
-
-	default:		/* Unknown packet type */
-		printk(KERN_INFO
-			"%s: unsupported Ethertype 0x%04X on interface %s!\n",
-			wanrouter_modname, type, dev->name);
-		hdr_len = -EINVAL;
-	}
-	return hdr_len;
-}
-
-
-/*
- *	Decapsulate packet.
- *
- *	Return:	Ethertype (in network order)
- *			0	unknown encapsulation
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-	int cnt = skb->data[0] ? 0 : 1;	/* there may be a pad present */
-	__be16 ethertype;
-
-	switch (skb->data[cnt]) {
-	case NLPID_IP:		/* IP datagramm */
-		ethertype = htons(ETH_P_IP);
-		cnt += 1;
-		break;
-
-	case NLPID_SNAP:	/* SNAP encapsulation */
-		if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether,
-			   sizeof(wanrouter_oui_ether))){
-			printk(KERN_INFO
-				"%s: unsupported SNAP OUI %02X-%02X-%02X "
-				"on interface %s!\n", wanrouter_modname,
-				skb->data[cnt+1], skb->data[cnt+2],
-				skb->data[cnt+3], dev->name);
-			return 0;
-		}
-		ethertype = *((__be16*)&skb->data[cnt+4]);
-		cnt += 6;
-		break;
-
-	/* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */
-
-	default:
-		printk(KERN_INFO
-			"%s: unsupported NLPID 0x%02X on interface %s!\n",
-			wanrouter_modname, skb->data[cnt], dev->name);
-		return 0;
-	}
-	skb->protocol = ethertype;
-	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
-	skb_pull(skb, cnt);
-	skb_reset_mac_header(skb);
-	return ethertype;
-}
-
-#endif  /*  0  */
-
-/*
- *	WAN device IOCTL.
- *	o find WAN device associated with this node
- *	o execute requested action or pass command to the device driver
- */
-
-long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int err = 0;
-	struct proc_dir_entry *dent;
-	struct wan_device *wandev;
-	void __user *data = (void __user *)arg;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if ((cmd >> 8) != ROUTER_IOCTL)
-		return -EINVAL;
-
-	dent = PDE(inode);
-	if ((dent == NULL) || (dent->data == NULL))
-		return -EINVAL;
-
-	wandev = dent->data;
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	mutex_lock(&wanrouter_mutex);
-	switch (cmd) {
-	case ROUTER_SETUP:
-		err = wanrouter_device_setup(wandev, data);
-		break;
-
-	case ROUTER_DOWN:
-		err = wanrouter_device_shutdown(wandev);
-		break;
-
-	case ROUTER_STAT:
-		err = wanrouter_device_stat(wandev, data);
-		break;
-
-	case ROUTER_IFNEW:
-		err = wanrouter_device_new_if(wandev, data);
-		break;
-
-	case ROUTER_IFDEL:
-		err = wanrouter_device_del_if(wandev, data);
-		break;
-
-	case ROUTER_IFSTAT:
-		break;
-
-	default:
-		if ((cmd >= ROUTER_USER) &&
-		    (cmd <= ROUTER_USER_MAX) &&
-		    wandev->ioctl)
-			err = wandev->ioctl(wandev, cmd, arg);
-		else err = -EINVAL;
-	}
-	mutex_unlock(&wanrouter_mutex);
-	return err;
-}
-
-/*
- *	WAN Driver IOCTL Handlers
- */
-
-/*
- *	Setup WAN link device.
- *	o verify user address space
- *	o allocate kernel memory and copy configuration data to kernel space
- *	o if configuration data includes extension, copy it to kernel space too
- *	o call driver's setup() entry point
- */
-
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf)
-{
-	void *data = NULL;
-	wandev_conf_t *conf;
-	int err = -EINVAL;
-
-	if (wandev->setup == NULL) {	/* Nothing to do ? */
-		printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n",
-				wandev->name);
-		return 0;
-	}
-
-	conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL);
-	if (conf == NULL){
-		printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n",
-				wandev->name);
-		return -ENOBUFS;
-	}
-
-	if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) {
-		printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n",
-				wandev->name);
-		kfree(conf);
-		return -EFAULT;
-	}
-
-	if (conf->magic != ROUTER_MAGIC) {
-		kfree(conf);
-		printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n",
-				wandev->name);
-		return -EINVAL;
-	}
-
-	if (conf->data_size && conf->data) {
-		if (conf->data_size > 128000) {
-			printk(KERN_INFO
-			    "%s: ERROR, Invalid firmware data size %i !\n",
-					wandev->name, conf->data_size);
-			kfree(conf);
-			return -EINVAL;
-		}
-
-		data = vmalloc(conf->data_size);
-		if (!data) {
-			printk(KERN_INFO
-				"%s: ERROR, Failed allocate kernel memory !\n",
-				wandev->name);
-			kfree(conf);
-			return -ENOBUFS;
-		}
-		if (!copy_from_user(data, conf->data, conf->data_size)) {
-			conf->data = data;
-			err = wandev->setup(wandev, conf);
-		} else {
-			printk(KERN_INFO
-			     "%s: ERROR, Failed to copy from user data !\n",
-			       wandev->name);
-			err = -EFAULT;
-		}
-		vfree(data);
-	} else {
-		printk(KERN_INFO
-		    "%s: ERROR, No firmware found ! Firmware size = %i !\n",
-				wandev->name, conf->data_size);
-	}
-
-	kfree(conf);
-	return err;
-}
-
-/*
- *	Shutdown WAN device.
- *	o delete all not opened logical channels for this device
- *	o call driver's shutdown() entry point
- */
-
-static int wanrouter_device_shutdown(struct wan_device *wandev)
-{
-	struct net_device *dev;
-	int err=0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return 0;
-
-	printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name);
-
-	for (dev = wandev->dev; dev;) {
-		err = wanrouter_delete_interface(wandev, dev->name);
-		if (err)
-			return err;
-		/* The above function deallocates the current dev
-		 * structure. Therefore, we cannot use netdev_priv(dev)
-		 * as the next element: wandev->dev points to the
-		 * next element */
-		dev = wandev->dev;
-	}
-
-	if (wandev->ndev)
-		return -EBUSY;	/* there are opened interfaces  */
-
-	if (wandev->shutdown)
-		err=wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Get WAN device status & statistics.
- */
-
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat)
-{
-	wandev_stat_t stat;
-
-	memset(&stat, 0, sizeof(stat));
-
-	/* Ask device driver to update device statistics */
-	if ((wandev->state != WAN_UNCONFIGURED) && wandev->update)
-		wandev->update(wandev);
-
-	/* Fill out structure */
-	stat.ndev  = wandev->ndev;
-	stat.state = wandev->state;
-
-	if (copy_to_user(u_stat, &stat, sizeof(stat)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- *	Create new WAN interface.
- *	o verify user address space
- *	o copy configuration data to kernel address space
- *	o allocate network interface data space
- *	o call driver's new_if() entry point
- *	o make sure there is no interface name conflict
- *	o register network interface
- */
-
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf)
-{
-	wanif_conf_t *cnf;
-	struct net_device *dev = NULL;
-	int err;
-
-	if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL))
-		return -ENODEV;
-
-	cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL);
-	if (!cnf)
-		return -ENOBUFS;
-
-	err = -EFAULT;
-	if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t)))
-		goto out;
-
-	err = -EINVAL;
-	if (cnf->magic != ROUTER_MAGIC)
-		goto out;
-
-	if (cnf->config_id == WANCONFIG_MPPP) {
-		printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
-				wandev->name);
-		err = -EPROTONOSUPPORT;
-		goto out;
-	} else {
-		err = wandev->new_if(wandev, dev, cnf);
-	}
-
-	if (!err) {
-		/* Register network interface. This will invoke init()
-		 * function supplied by the driver.  If device registered
-		 * successfully, add it to the interface list.
-		 */
-
-#ifdef WANDEBUG
-		printk(KERN_INFO "%s: registering interface %s...\n",
-		       wanrouter_modname, dev->name);
-#endif
-
-		err = register_netdev(dev);
-		if (!err) {
-			struct net_device *slave = NULL;
-			unsigned long smp_flags=0;
-
-			lock_adapter_irq(&wandev->lock, &smp_flags);
-
-			if (wandev->dev == NULL) {
-				wandev->dev = dev;
-			} else {
-				for (slave=wandev->dev;
-				     DEV_TO_SLAVE(slave);
-				     slave = DEV_TO_SLAVE(slave))
-					DEV_TO_SLAVE(slave) = dev;
-			}
-			++wandev->ndev;
-
-			unlock_adapter_irq(&wandev->lock, &smp_flags);
-			err = 0;	/* done !!! */
-			goto out;
-		}
-		if (wandev->del_if)
-			wandev->del_if(wandev, dev);
-		free_netdev(dev);
-	}
-
-out:
-	kfree(cnf);
-	return err;
-}
-
-
-/*
- *	Delete WAN logical channel.
- *	 o verify user address space
- *	 o copy configuration data to kernel address space
- */
-
-static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name)
-{
-	char name[WAN_IFNAME_SZ + 1];
-	int err = 0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return -ENODEV;
-
-	memset(name, 0, sizeof(name));
-
-	if (copy_from_user(name, u_name, WAN_IFNAME_SZ))
-		return -EFAULT;
-
-	err = wanrouter_delete_interface(wandev, name);
-	if (err)
-		return err;
-
-	/* If last interface being deleted, shutdown card
-	 * This helps with administration at leaf nodes
-	 * (You can tell if the person at the other end of the phone
-	 * has an interface configured) and avoids DoS vulnerabilities
-	 * in binary driver files - this fixes a problem with the current
-	 * Sangoma driver going into strange states when all the network
-	 * interfaces are deleted and the link irrecoverably disconnected.
-	 */
-
-	if (!wandev->ndev && wandev->shutdown)
-		err = wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Miscellaneous Functions
- */
-
-/*
- *	Find WAN device by name.
- *	Return pointer to the WAN device data space or NULL if device not found.
- */
-
-static struct wan_device *wanrouter_find_device(char *name)
-{
-	struct wan_device *wandev;
-
-	for (wandev = wanrouter_router_devlist;
-	     wandev && strcmp(wandev->name, name);
-		wandev = wandev->next);
-	return wandev;
-}
-
-/*
- *	Delete WAN logical channel identified by its name.
- *	o find logical channel by its name
- *	o call driver's del_if() entry point
- *	o unregister network interface
- *	o unlink channel data space from linked list of channels
- *	o release channel data space
- *
- *	Return:	0		success
- *		-ENODEV		channel not found.
- *		-EBUSY		interface is open
- *
- *	Note: If (force != 0), then device will be destroyed even if interface
- *	associated with it is open. It's caller's responsibility to make
- *	sure that opened interfaces are not removed!
- */
-
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
-{
-	struct net_device *dev = NULL, *prev = NULL;
-	unsigned long smp_flags=0;
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	dev = wandev->dev;
-	prev = NULL;
-	while (dev && strcmp(name, dev->name)) {
-		struct net_device **slave = netdev_priv(dev);
-		prev = dev;
-		dev = *slave;
-	}
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	if (dev == NULL)
-		return -ENODEV;	/* interface not found */
-
-	if (netif_running(dev))
-		return -EBUSY;	/* interface in use */
-
-	if (wandev->del_if)
-		wandev->del_if(wandev, dev);
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	if (prev) {
-		struct net_device **prev_slave = netdev_priv(prev);
-		struct net_device **slave = netdev_priv(dev);
-
-		*prev_slave = *slave;
-	} else {
-		struct net_device **slave = netdev_priv(dev);
-		wandev->dev = *slave;
-	}
-	--wandev->ndev;
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name);
-
-	unregister_netdev(dev);
-
-	free_netdev(dev);
-
-	return 0;
-}
-
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock)
-{
-	spin_lock_irqsave(lock, *smp_flags);
-}
-
-
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock)
-{
-	spin_unlock_irqrestore(lock, *smp_flags);
-}
-
-EXPORT_SYMBOL(register_wan_device);
-EXPORT_SYMBOL(unregister_wan_device);
-
-MODULE_LICENSE("GPL");
-
-/*
- *	End
- */
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
deleted file mode 100644
index c43612ee96b..00000000000
--- a/net/wanrouter/wanproc.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*****************************************************************************
-* wanproc.c	WAN Router Module. /proc filesystem interface.
-*
-*		This module is completely hardware-independent and provides
-*		access to the router using Linux /proc filesystem.
-*
-* Author: 	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.2.X kernels.
-* Jun 29, 1997	Alan Cox	Merged with 1.0.3 vendor code
-* Jan 29, 1997	Gene Kozin	v1.0.1. Implemented /proc read routines
-* Jan 30, 1997	Alan Cox	Hacked around for 2.1
-* Dec 13, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-*****************************************************************************/
-
-#include <linux/init.h>		/* __initfunc et al. */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-#include <net/net_namespace.h>
-#include <asm/io.h>
-
-#define PROC_STATS_FORMAT "%30s: %12lu\n"
-
-/****** Defines and Macros **************************************************/
-
-#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\
-			      (prot == WANCONFIG_X25) ? " X25" : \
-				 (prot == WANCONFIG_PPP) ? " PPP" : \
-				    (prot == WANCONFIG_CHDLC) ? " CHDLC": \
-				       (prot == WANCONFIG_MPPP) ? " MPPP" : \
-					   " Unknown" )
-
-/****** Function Prototypes *************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-/* Miscellaneous */
-
-/*
- *	Structures for interfacing with the /proc filesystem.
- *	Router creates its own directory /proc/net/router with the following
- *	entries:
- *	config		device configuration
- *	status		global device statistics
- *	<device>	entry for each WAN device
- */
-
-/*
- *	Generic /proc/net/router/<file> file and inode operations
- */
-
-/*
- *	/proc/net/router
- */
-
-static DEFINE_MUTEX(config_mutex);
-static struct proc_dir_entry *proc_router;
-
-/* Strings */
-
-/*
- *	Interface functions
- */
-
-/****** Proc filesystem entry points ****************************************/
-
-/*
- *	Iterator
- */
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-	struct wan_device *wandev;
-	loff_t l = *pos;
-
-	mutex_lock(&config_mutex);
-	if (!l--)
-		return SEQ_START_TOKEN;
-	for (wandev = wanrouter_router_devlist; l-- && wandev;
-	     wandev = wandev->next)
-		;
-	return wandev;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct wan_device *wandev = v;
-	(*pos)++;
-	return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-	mutex_unlock(&config_mutex);
-}
-
-static int config_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    | port |IRQ|DMA|  mem.addr  |"
-			    "mem.size|option1|option2|option3|option4\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n",
-			p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize,
-			p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]);
-	return 0;
-}
-
-static int status_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    |protocol|station|interface|"
-			    "clocking|baud rate| MTU |ndev|link state\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |",
-		p->name,
-		PROT_DECODE(p->config_id),
-		p->config_id == WANCONFIG_FR ?
-			(p->station ? "Node" : "CPE") :
-			(p->config_id == WANCONFIG_X25 ?
-			(p->station ? "DCE" : "DTE") :
-			("N/A")),
-		p->interface ? "V.35" : "RS-232",
-		p->clocking ? "internal" : "external",
-		p->bps,
-		p->mtu,
-		p->ndev);
-
-	switch (p->state) {
-	case WAN_UNCONFIGURED:
-		seq_printf(m, "%-12s\n", "unconfigured");
-		break;
-	case WAN_DISCONNECTED:
-		seq_printf(m, "%-12s\n", "disconnected");
-		break;
-	case WAN_CONNECTING:
-		seq_printf(m, "%-12s\n", "connecting");
-		break;
-	case WAN_CONNECTED:
-		seq_printf(m, "%-12s\n", "connected");
-		break;
-	default:
-		seq_printf(m, "%-12s\n", "invalid");
-		break;
-	}
-	return 0;
-}
-
-static const struct seq_operations config_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= config_show,
-};
-
-static const struct seq_operations status_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= status_show,
-};
-
-static int config_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &config_op);
-}
-
-static int status_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &status_op);
-}
-
-static const struct file_operations config_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = config_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static const struct file_operations status_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = status_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static int wandev_show(struct seq_file *m, void *v)
-{
-	struct wan_device *wandev = m->private;
-
-	if (wandev->magic != ROUTER_MAGIC)
-		return 0;
-
-	if (!wandev->state) {
-		seq_puts(m, "device is not configured!\n");
-		return 0;
-	}
-
-	/* Update device statistics */
-	if (wandev->update) {
-		int err = wandev->update(wandev);
-		if (err == -EAGAIN) {
-			seq_puts(m, "Device is busy!\n");
-			return 0;
-		}
-		if (err) {
-			seq_puts(m, "Device is not configured!\n");
-			return 0;
-		}
-	}
-
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets received", wandev->stats.rx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets transmitted", wandev->stats.tx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes received", wandev->stats.rx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes transmitted", wandev->stats.tx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"bad packets received", wandev->stats.rx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"packet transmit problems", wandev->stats.tx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"received frames dropped", wandev->stats.rx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit frames dropped", wandev->stats.tx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"multicast packets received", wandev->stats.multicast);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit collisions", wandev->stats.collisions);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receive length errors", wandev->stats.rx_length_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver overrun errors", wandev->stats.rx_over_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"CRC errors", wandev->stats.rx_crc_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"frame format errors (aborts)", wandev->stats.rx_frame_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver fifo overrun", wandev->stats.rx_fifo_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver missed packet", wandev->stats.rx_missed_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"aborted frames transmitted", wandev->stats.tx_aborted_errors);
-	return 0;
-}
-
-static int wandev_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wandev_show, PDE(inode)->data);
-}
-
-static const struct file_operations wandev_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = wandev_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release,
-	.unlocked_ioctl  = wanrouter_ioctl,
-};
-
-/*
- *	Initialize router proc interface.
- */
-
-int __init wanrouter_proc_init(void)
-{
-	struct proc_dir_entry *p;
-	proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net);
-	if (!proc_router)
-		goto fail;
-
-	p = proc_create("config", S_IRUGO, proc_router, &config_fops);
-	if (!p)
-		goto fail_config;
-	p = proc_create("status", S_IRUGO, proc_router, &status_fops);
-	if (!p)
-		goto fail_stat;
-	return 0;
-fail_stat:
-	remove_proc_entry("config", proc_router);
-fail_config:
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-fail:
-	return -ENOMEM;
-}
-
-/*
- *	Clean up router proc interface.
- */
-
-void wanrouter_proc_cleanup(void)
-{
-	remove_proc_entry("config", proc_router);
-	remove_proc_entry("status", proc_router);
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-}
-
-/*
- *	Add directory entry for WAN device.
- */
-
-int wanrouter_proc_add(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	wandev->dent = proc_create(wandev->name, S_IRUGO,
-				   proc_router, &wandev_fops);
-	if (!wandev->dent)
-		return -ENOMEM;
-	wandev->dent->data	= wandev;
-	return 0;
-}
-
-/*
- *	Delete directory entry for WAN device.
- */
-int wanrouter_proc_delete(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-	remove_proc_entry(wandev->name, proc_router);
-	return 0;
-}
-
-#else
-
-/*
- *	No /proc - output stubs
- */
-
-int __init wanrouter_proc_init(void)
-{
-	return 0;
-}
-
-void wanrouter_proc_cleanup(void)
-{
-}
-
-int wanrouter_proc_add(struct wan_device *wandev)
-{
-	return 0;
-}
-
-int wanrouter_proc_delete(struct wan_device *wandev)
-{
-	return 0;
-}
-
-#endif
-
-/*
- *	End
- */
-
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 33b75b9b8ef..5ffff039b01 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -875,8 +875,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		/* allow mac80211 to determine the timeout */
 		wdev->ps_timeout = -1;
 
-		if (!dev->ethtool_ops)
-			dev->ethtool_ops = &cfg80211_ethtool_ops;
+		netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops);
 
 		if ((wdev->iftype == NL80211_IFTYPE_STATION ||
 		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index 48c48ffafa1..e37862f1b12 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -15,10 +15,10 @@ static void cfg80211_get_drvinfo(struct net_device *dev,
 	strlcpy(info->version, init_utsname()->release, sizeof(info->version));
 
 	if (wdev->wiphy->fw_version[0])
-		strncpy(info->fw_version, wdev->wiphy->fw_version,
+		strlcpy(info->fw_version, wdev->wiphy->fw_version,
 			sizeof(info->fw_version));
 	else
-		strncpy(info->fw_version, "N/A", sizeof(info->fw_version));
+		strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
 
 	strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)),
 		sizeof(info->bus_info));
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 93ab840957a..98532c00242 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2265,7 +2265,6 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return r;
 }
 
-#ifdef CONFIG_HOTPLUG
 int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct regulatory_request *lr;
@@ -2285,12 +2284,6 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 				      alpha2[0], alpha2[1]);
 	return 0;
 }
-#else
-int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	return -ENODEV;
-}
-#endif /* CONFIG_HOTPLUG */
 
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 73bf39f1131..238ee49b386 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -77,13 +77,11 @@ static void wiphy_dev_release(struct device *dev)
 	cfg80211_dev_free(rdev);
 }
 
-#ifdef CONFIG_HOTPLUG
 static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	/* TODO, we probably need stuff here */
 	return 0;
 }
-#endif
 
 static int wiphy_suspend(struct device *dev, pm_message_t state)
 {
@@ -132,9 +130,7 @@ struct class ieee80211_class = {
 	.owner = THIS_MODULE,
 	.dev_release = wiphy_dev_release,
 	.dev_attrs = ieee80211_dev_attrs,
-#ifdef CONFIG_HOTPLUG
 	.dev_uevent = wiphy_uevent,
-#endif
 	.suspend = wiphy_suspend,
 	.resume = wiphy_resume,
 	.ns_type = &net_ns_type_operations,
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 4ce2d93162c..6fb9d00a75d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -35,6 +35,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8,
 		.sadb_alg_ivlen = 8,
@@ -51,6 +53,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12,
 		.sadb_alg_ivlen = 8,
@@ -67,6 +71,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16,
 		.sadb_alg_ivlen = 8,
@@ -83,6 +89,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8,
 		.sadb_alg_ivlen = 8,
@@ -99,6 +107,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12,
 		.sadb_alg_ivlen = 8,
@@ -115,6 +125,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16,
 		.sadb_alg_ivlen = 8,
@@ -131,6 +143,8 @@ static struct xfrm_algo_desc aead_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC,
 		.sadb_alg_ivlen = 8,
@@ -151,6 +165,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_NULL,
 		.sadb_alg_ivlen = 0,
@@ -169,6 +185,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_AALG_MD5HMAC,
 		.sadb_alg_ivlen = 0,
@@ -187,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_AALG_SHA1HMAC,
 		.sadb_alg_ivlen = 0,
@@ -205,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
 		.sadb_alg_ivlen = 0,
@@ -222,6 +244,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_SHA2_384HMAC,
 		.sadb_alg_ivlen = 0,
@@ -239,6 +263,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_SHA2_512HMAC,
 		.sadb_alg_ivlen = 0,
@@ -257,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
 		.sadb_alg_ivlen = 0,
@@ -274,6 +302,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC,
 		.sadb_alg_ivlen = 0,
@@ -295,6 +325,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id =	SADB_EALG_NULL,
 		.sadb_alg_ivlen = 0,
@@ -313,6 +345,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_EALG_DESCBC,
 		.sadb_alg_ivlen = 8,
@@ -331,6 +365,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_EALG_3DESCBC,
 		.sadb_alg_ivlen = 8,
@@ -349,6 +385,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_CASTCBC,
 		.sadb_alg_ivlen = 8,
@@ -367,6 +405,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
 		.sadb_alg_ivlen = 8,
@@ -385,6 +425,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AESCBC,
 		.sadb_alg_ivlen = 8,
@@ -403,6 +445,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_SERPENTCBC,
 		.sadb_alg_ivlen = 8,
@@ -421,6 +465,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_CAMELLIACBC,
 		.sadb_alg_ivlen = 8,
@@ -439,6 +485,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
 		.sadb_alg_ivlen = 8,
@@ -456,6 +504,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 		}
 	},
 
+	.pfkey_supported = 1,
+
 	.desc = {
 		.sadb_alg_id = SADB_X_EALG_AESCTR,
 		.sadb_alg_ivlen	= 8,
@@ -473,6 +523,7 @@ static struct xfrm_algo_desc calg_list[] = {
 			.threshold = 90,
 		}
 	},
+	.pfkey_supported = 1,
 	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
 },
 {
@@ -482,6 +533,7 @@ static struct xfrm_algo_desc calg_list[] = {
 			.threshold = 90,
 		}
 	},
+	.pfkey_supported = 1,
 	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
 },
 {
@@ -491,6 +543,7 @@ static struct xfrm_algo_desc calg_list[] = {
 			.threshold = 50,
 		}
 	},
+	.pfkey_supported = 1,
 	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
 },
 };
@@ -700,8 +753,7 @@ void xfrm_probe_algs(void)
 	}
 
 	for (i = 0; i < ealg_entries(); i++) {
-		status = crypto_has_blkcipher(ealg_list[i].name, 0,
-					      CRYPTO_ALG_ASYNC);
+		status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0);
 		if (ealg_list[i].available != status)
 			ealg_list[i].available = status;
 	}
@@ -715,27 +767,27 @@ void xfrm_probe_algs(void)
 }
 EXPORT_SYMBOL_GPL(xfrm_probe_algs);
 
-int xfrm_count_auth_supported(void)
+int xfrm_count_pfkey_auth_supported(void)
 {
 	int i, n;
 
 	for (i = 0, n = 0; i < aalg_entries(); i++)
-		if (aalg_list[i].available)
+		if (aalg_list[i].available && aalg_list[i].pfkey_supported)
 			n++;
 	return n;
 }
-EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
+EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported);
 
-int xfrm_count_enc_supported(void)
+int xfrm_count_pfkey_enc_supported(void)
 {
 	int i, n;
 
 	for (i = 0, n = 0; i < ealg_entries(); i++)
-		if (ealg_list[i].available)
+		if (ealg_list[i].available && ealg_list[i].pfkey_supported)
 			n++;
 	return n;
 }
-EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
+EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
 
 #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 95a338c89f9..bcfda8921b5 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		}
 
 		spin_lock_bh(&x->lock);
+
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
+			goto error;
+		}
+
 		err = xfrm_state_check_expire(x);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 41eabc46f11..5b47180986f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -35,6 +35,10 @@
 
 #include "xfrm_hash.h"
 
+#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
+#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
+#define XFRM_MAX_QUEUE_LEN	100
+
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -51,7 +55,7 @@ static struct kmem_cache *xfrm_dst_cache __read_mostly;
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
-
+static void xfrm_policy_queue_process(unsigned long arg);
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
@@ -287,8 +291,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
 		atomic_set(&policy->refcnt, 1);
+		skb_queue_head_init(&policy->polq.hold_queue);
 		setup_timer(&policy->timer, xfrm_policy_timer,
 				(unsigned long)policy);
+		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
+			    (unsigned long)policy);
 		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
@@ -309,6 +316,16 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
 
+static void xfrm_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
 /* Rule must be locked. Release descentant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
  */
@@ -319,6 +336,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 
 	atomic_inc(&policy->genid);
 
+	del_timer(&policy->polq.hold_timer);
+	xfrm_queue_purge(&policy->polq.hold_queue);
+
 	if (del_timer(&policy->timer))
 		xfrm_pol_put(policy);
 
@@ -562,6 +582,46 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s
 	return 0;
 }
 
+static void xfrm_policy_requeue(struct xfrm_policy *old,
+				struct xfrm_policy *new)
+{
+	struct xfrm_policy_queue *pq = &old->polq;
+	struct sk_buff_head list;
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_bh(&pq->hold_queue.lock);
+	skb_queue_splice_init(&pq->hold_queue, &list);
+	del_timer(&pq->hold_timer);
+	spin_unlock_bh(&pq->hold_queue.lock);
+
+	if (skb_queue_empty(&list))
+		return;
+
+	pq = &new->polq;
+
+	spin_lock_bh(&pq->hold_queue.lock);
+	skb_queue_splice(&list, &pq->hold_queue);
+	pq->timeout = XFRM_QUEUE_TMO_MIN;
+	mod_timer(&pq->hold_timer, jiffies);
+	spin_unlock_bh(&pq->hold_queue.lock);
+}
+
+static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
+				   struct xfrm_policy *pol)
+{
+	u32 mark = policy->mark.v & policy->mark.m;
+
+	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
+		return true;
+
+	if ((mark & pol->mark.m) == pol->mark.v &&
+	    policy->priority == pol->priority)
+		return true;
+
+	return false;
+}
+
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 {
 	struct net *net = xp_net(policy);
@@ -569,7 +629,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct xfrm_policy *delpol;
 	struct hlist_head *chain;
 	struct hlist_node *entry, *newpos;
-	u32 mark = policy->mark.v & policy->mark.m;
 
 	write_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
@@ -578,7 +637,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (pol->type == policy->type &&
 		    !selector_cmp(&pol->selector, &policy->selector) &&
-		    (mark & pol->mark.m) == pol->mark.v &&
+		    xfrm_policy_mark_match(policy, pol) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
 		    !WARN_ON(delpol)) {
 			if (excl) {
@@ -603,8 +662,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	net->xfrm.policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
 	rt_genid_bump(net);
-	if (delpol)
+	if (delpol) {
+		xfrm_policy_requeue(delpol, policy);
 		__xfrm_policy_unlink(delpol, dir);
+	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
 	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
 	policy->curlft.add_time = get_seconds();
@@ -1115,11 +1176,15 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
-	if (old_pol)
+	if (old_pol) {
+		if (pol)
+			xfrm_policy_requeue(old_pol, pol);
+
 		/* Unlinking succeeds always. This is the only function
 		 * allowed to delete or replace socket policy.
 		 */
 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+	}
 	write_unlock_bh(&xfrm_policy_lock);
 
 	if (old_pol) {
@@ -1310,6 +1375,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f
 		 * It means we need to try again resolving. */
 		if (xdst->num_xfrms > 0)
 			return NULL;
+	} else if (dst->flags & DST_XFRM_QUEUE) {
+		return NULL;
 	} else {
 		/* Real bundle */
 		if (stale_bundle(dst))
@@ -1673,6 +1740,171 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	return xdst;
 }
 
+static void xfrm_policy_queue_process(unsigned long arg)
+{
+	int err = 0;
+	struct sk_buff *skb;
+	struct sock *sk;
+	struct dst_entry *dst;
+	struct net_device *dev;
+	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+	struct xfrm_policy_queue *pq = &pol->polq;
+	struct flowi fl;
+	struct sk_buff_head list;
+
+	spin_lock(&pq->hold_queue.lock);
+	skb = skb_peek(&pq->hold_queue);
+	dst = skb_dst(skb);
+	sk = skb->sk;
+	xfrm_decode_session(skb, &fl, dst->ops->family);
+	spin_unlock(&pq->hold_queue.lock);
+
+	dst_hold(dst->path);
+	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
+			  sk, 0);
+	if (IS_ERR(dst))
+		goto purge_queue;
+
+	if (dst->flags & DST_XFRM_QUEUE) {
+		dst_release(dst);
+
+		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
+			goto purge_queue;
+
+		pq->timeout = pq->timeout << 1;
+		mod_timer(&pq->hold_timer, jiffies + pq->timeout);
+		return;
+	}
+
+	dst_release(dst);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock(&pq->hold_queue.lock);
+	pq->timeout = 0;
+	skb_queue_splice_init(&pq->hold_queue, &list);
+	spin_unlock(&pq->hold_queue.lock);
+
+	while (!skb_queue_empty(&list)) {
+		skb = __skb_dequeue(&list);
+
+		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+		dst_hold(skb_dst(skb)->path);
+		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
+				  &fl, skb->sk, 0);
+		if (IS_ERR(dst)) {
+			dev_put(skb->dev);
+			kfree_skb(skb);
+			continue;
+		}
+
+		nf_reset(skb);
+		skb_dst_drop(skb);
+		skb_dst_set(skb, dst);
+
+		dev = skb->dev;
+		err = dst_output(skb);
+		dev_put(dev);
+	}
+
+	return;
+
+purge_queue:
+	pq->timeout = 0;
+	xfrm_queue_purge(&pq->hold_queue);
+}
+
+static int xdst_queue_output(struct sk_buff *skb)
+{
+	unsigned long sched_next;
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+	struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
+
+	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
+		kfree_skb(skb);
+		return -EAGAIN;
+	}
+
+	skb_dst_force(skb);
+	dev_hold(skb->dev);
+
+	spin_lock_bh(&pq->hold_queue.lock);
+
+	if (!pq->timeout)
+		pq->timeout = XFRM_QUEUE_TMO_MIN;
+
+	sched_next = jiffies + pq->timeout;
+
+	if (del_timer(&pq->hold_timer)) {
+		if (time_before(pq->hold_timer.expires, sched_next))
+			sched_next = pq->hold_timer.expires;
+	}
+
+	__skb_queue_tail(&pq->hold_queue, skb);
+	mod_timer(&pq->hold_timer, sched_next);
+
+	spin_unlock_bh(&pq->hold_queue.lock);
+
+	return 0;
+}
+
+static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
+						 struct dst_entry *dst,
+						 const struct flowi *fl,
+						 int num_xfrms,
+						 u16 family)
+{
+	int err;
+	struct net_device *dev;
+	struct dst_entry *dst1;
+	struct xfrm_dst *xdst;
+
+	xdst = xfrm_alloc_dst(net, family);
+	if (IS_ERR(xdst))
+		return xdst;
+
+	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 ||
+	    (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP))
+		return xdst;
+
+	dst1 = &xdst->u.dst;
+	dst_hold(dst);
+	xdst->route = dst;
+
+	dst_copy_metrics(dst1, dst);
+
+	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
+	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
+	dst1->lastuse = jiffies;
+
+	dst1->input = dst_discard;
+	dst1->output = xdst_queue_output;
+
+	dst_hold(dst);
+	dst1->child = dst;
+	dst1->path = dst;
+
+	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
+
+	err = -ENODEV;
+	dev = dst->dev;
+	if (!dev)
+		goto free_dst;
+
+	err = xfrm_fill_dst(xdst, dev, fl);
+	if (err)
+		goto free_dst;
+
+out:
+	return xdst;
+
+free_dst:
+	dst_release(dst1);
+	xdst = ERR_PTR(err);
+	goto out;
+}
+
 static struct flow_cache_object *
 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 		   struct flow_cache_object *oldflo, void *ctx)
@@ -1751,7 +1983,7 @@ make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
 	 * either because the policy blocks, has no transformations or
 	 * we could not build template (no xfrm_states).*/
-	xdst = xfrm_alloc_dst(net, family);
+	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
 	if (IS_ERR(xdst)) {
 		xfrm_pols_put(pols, num_pols);
 		return ERR_CAST(xdst);
@@ -2359,6 +2591,9 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
 	    (dst->dev && !netif_running(dst->dev)))
 		return 0;
 
+	if (dst->flags & DST_XFRM_QUEUE)
+		return 1;
+
 	last = NULL;
 
 	do {
@@ -2656,7 +2891,7 @@ static void xfrm_policy_fini(struct net *net)
 		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
 
 		htab = &net->xfrm.policy_bydst[dir];
-		sz = (htab->hmask + 1);
+		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
 		WARN_ON(!hlist_empty(htab->table));
 		xfrm_hash_free(htab->table, sz);
 	}
@@ -2786,10 +3021,10 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
 {
 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
 		if (sel_tgt->family == sel_cmp->family &&
-		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
-				  sel_cmp->family) == 0 &&
-		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
-				  sel_cmp->family) == 0 &&
+		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
+				    sel_cmp->family) &&
+		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
+				    sel_cmp->family) &&
 		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
 		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
 			return true;
@@ -2847,10 +3082,10 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
 		switch (t->mode) {
 		case XFRM_MODE_TUNNEL:
 		case XFRM_MODE_BEET:
-			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
-					  m->old_family) == 0 &&
-			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
-					  m->old_family) == 0) {
+			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
+					    m->old_family) &&
+			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
+					    m->old_family)) {
 				match = 1;
 			}
 			break;
@@ -2916,10 +3151,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
 		return -EINVAL;
 
 	for (i = 0; i < num_migrate; i++) {
-		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
-				   m[i].old_family) == 0) &&
-		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
-				   m[i].old_family) == 0))
+		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
+				    m[i].old_family) &&
+		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
+				    m[i].old_family))
 			return -EINVAL;
 		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
 		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index d0a1af8ed58..603903853e8 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
 	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
 	SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
+	SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 765f6fe951e..35754cc8a9e 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -242,11 +242,13 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
 	u32 diff;
 	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
 	u32 seq = ntohl(net_seq);
-	u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+	u32 pos;
 
 	if (!replay_esn->replay_window)
 		return;
 
+	pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+
 	if (seq > replay_esn->seq) {
 		diff = seq - replay_esn->seq;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3459692092e..ae01bdbcb29 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -158,8 +158,8 @@ out_unlock:
 	mutex_unlock(&hash_resize_mutex);
 }
 
-static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
-static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
+static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
+static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
 
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
@@ -168,58 +168,45 @@ int __xfrm_state_delete(struct xfrm_state *x);
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 
-static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
-{
-	struct xfrm_state_afinfo *afinfo;
-	if (unlikely(family >= NPROTO))
-		return NULL;
-	write_lock_bh(&xfrm_state_afinfo_lock);
-	afinfo = xfrm_state_afinfo[family];
-	if (unlikely(!afinfo))
-		write_unlock_bh(&xfrm_state_afinfo_lock);
-	return afinfo;
-}
-
-static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
-	__releases(xfrm_state_afinfo_lock)
-{
-	write_unlock_bh(&xfrm_state_afinfo_lock);
-}
-
+static DEFINE_SPINLOCK(xfrm_type_lock);
 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 {
-	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	const struct xfrm_type **typemap;
 	int err = 0;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 	typemap = afinfo->type_map;
+	spin_lock_bh(&xfrm_type_lock);
 
 	if (likely(typemap[type->proto] == NULL))
 		typemap[type->proto] = type;
 	else
 		err = -EEXIST;
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_type_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_register_type);
 
 int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 {
-	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	const struct xfrm_type **typemap;
 	int err = 0;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 	typemap = afinfo->type_map;
+	spin_lock_bh(&xfrm_type_lock);
 
 	if (unlikely(typemap[type->proto] != type))
 		err = -ENOENT;
 	else
 		typemap[type->proto] = NULL;
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_type_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_unregister_type);
@@ -256,6 +243,7 @@ static void xfrm_put_type(const struct xfrm_type *type)
 	module_put(type->owner);
 }
 
+static DEFINE_SPINLOCK(xfrm_mode_lock);
 int xfrm_register_mode(struct xfrm_mode *mode, int family)
 {
 	struct xfrm_state_afinfo *afinfo;
@@ -265,12 +253,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
 	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 		return -EINVAL;
 
-	afinfo = xfrm_state_lock_afinfo(family);
+	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	err = -EEXIST;
 	modemap = afinfo->mode_map;
+	spin_lock_bh(&xfrm_mode_lock);
 	if (modemap[mode->encap])
 		goto out;
 
@@ -283,7 +272,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
 	err = 0;
 
 out:
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_mode_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_register_mode);
@@ -297,19 +287,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 		return -EINVAL;
 
-	afinfo = xfrm_state_lock_afinfo(family);
+	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	err = -ENOENT;
 	modemap = afinfo->mode_map;
+	spin_lock_bh(&xfrm_mode_lock);
 	if (likely(modemap[mode->encap] == mode)) {
 		modemap[mode->encap] = NULL;
 		module_put(mode->afinfo->owner);
 		err = 0;
 	}
 
-	xfrm_state_unlock_afinfo(afinfo);
+	spin_unlock_bh(&xfrm_mode_lock);
+	xfrm_state_put_afinfo(afinfo);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_unregister_mode);
@@ -699,7 +691,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto ||
-		    xfrm_addr_cmp(&x->id.daddr, daddr, family))
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
 			continue;
 
 		if ((mark & x->mark.m) != x->mark.v)
@@ -723,8 +715,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 	hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto ||
-		    xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
-		    xfrm_addr_cmp(&x->props.saddr, saddr, family))
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
+		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 			continue;
 
 		if ((mark & x->mark.m) != x->mark.v)
@@ -989,8 +981,8 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
-		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
-		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
+		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
+		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
 			x->genid++;
 	}
 }
@@ -1024,8 +1016,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 		    x->id.spi       != 0 ||
 		    x->id.proto	    != proto ||
 		    (mark & x->mark.m) != x->mark.v ||
-		    xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
-		    xfrm_addr_cmp(&x->props.saddr, saddr, family))
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
+		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 			continue;
 
 		xfrm_state_hold(x);
@@ -1108,7 +1100,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	if (use_spi && x->km.seq) {
 		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
 		if (x1 && ((x1->id.proto != x->id.proto) ||
-		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
+		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
 			to_put = x1;
 			x1 = NULL;
 		}
@@ -1234,10 +1226,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 				continue;
 			if (m->reqid && x->props.reqid != m->reqid)
 				continue;
-			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
-					  m->old_family) ||
-			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
-					  m->old_family))
+			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
+					     m->old_family) ||
+			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
+					     m->old_family))
 				continue;
 			xfrm_state_hold(x);
 			return x;
@@ -1249,10 +1241,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
-			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
-					  m->old_family) ||
-			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
-					  m->old_family))
+			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
+					     m->old_family) ||
+			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
+					     m->old_family))
 				continue;
 			xfrm_state_hold(x);
 			return x;
@@ -1277,7 +1269,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
 	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
 
 	/* add state */
-	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
+	if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
 		/* a care is needed when the destination address of the
 		   state is to be updated as it is a part of triplet */
 		xfrm_state_insert(xc);
@@ -1370,9 +1362,6 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 	if (!x->curlft.use_time)
 		x->curlft.use_time = get_seconds();
 
-	if (x->km.state != XFRM_STATE_VALID)
-		return -EINVAL;
-
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
@@ -1648,27 +1637,26 @@ static void xfrm_replay_timer_handler(unsigned long data)
 }
 
 static LIST_HEAD(xfrm_km_list);
-static DEFINE_RWLOCK(xfrm_km_lock);
 
 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list)
 		if (km->notify_policy)
 			km->notify_policy(xp, dir, c);
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 }
 
 void km_state_notify(struct xfrm_state *x, const struct km_event *c)
 {
 	struct xfrm_mgr *km;
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list)
 		if (km->notify)
 			km->notify(x, c);
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 }
 
 EXPORT_SYMBOL(km_policy_notify);
@@ -1698,13 +1686,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
 	int err = -EINVAL, acqret;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		acqret = km->acquire(x, t, pol);
 		if (!acqret)
 			err = acqret;
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_query);
@@ -1714,14 +1702,14 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
 	int err = -EINVAL;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		if (km->new_mapping)
 			err = km->new_mapping(x, ipaddr, sport);
 		if (!err)
 			break;
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_new_mapping);
@@ -1750,15 +1738,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 	int ret;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		if (km->migrate) {
 			ret = km->migrate(sel, dir, type, m, num_migrate, k);
 			if (!ret)
 				err = ret;
 		}
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_migrate);
@@ -1770,15 +1758,15 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
 	int ret;
 	struct xfrm_mgr *km;
 
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		if (km->report) {
 			ret = km->report(net, proto, sel, addr);
 			if (!ret)
 				err = ret;
 		}
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL(km_report);
@@ -1802,14 +1790,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 		goto out;
 
 	err = -EINVAL;
-	read_lock(&xfrm_km_lock);
-	list_for_each_entry(km, &xfrm_km_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
 		pol = km->compile_policy(sk, optname, data,
 					 optlen, &err);
 		if (err >= 0)
 			break;
 	}
-	read_unlock(&xfrm_km_lock);
+	rcu_read_unlock();
 
 	if (err >= 0) {
 		xfrm_sk_policy_insert(sk, err, pol);
@@ -1823,20 +1811,23 @@ out:
 }
 EXPORT_SYMBOL(xfrm_user_policy);
 
+static DEFINE_SPINLOCK(xfrm_km_lock);
+
 int xfrm_register_km(struct xfrm_mgr *km)
 {
-	write_lock_bh(&xfrm_km_lock);
-	list_add_tail(&km->list, &xfrm_km_list);
-	write_unlock_bh(&xfrm_km_lock);
+	spin_lock_bh(&xfrm_km_lock);
+	list_add_tail_rcu(&km->list, &xfrm_km_list);
+	spin_unlock_bh(&xfrm_km_lock);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_register_km);
 
 int xfrm_unregister_km(struct xfrm_mgr *km)
 {
-	write_lock_bh(&xfrm_km_lock);
-	list_del(&km->list);
-	write_unlock_bh(&xfrm_km_lock);
+	spin_lock_bh(&xfrm_km_lock);
+	list_del_rcu(&km->list);
+	spin_unlock_bh(&xfrm_km_lock);
+	synchronize_rcu();
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_unregister_km);
@@ -1848,12 +1839,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock_bh(&xfrm_state_afinfo_lock);
+	spin_lock_bh(&xfrm_state_afinfo_lock);
 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
 	else
-		xfrm_state_afinfo[afinfo->family] = afinfo;
-	write_unlock_bh(&xfrm_state_afinfo_lock);
+		rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
+	spin_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_register_afinfo);
@@ -1865,14 +1856,15 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock_bh(&xfrm_state_afinfo_lock);
+	spin_lock_bh(&xfrm_state_afinfo_lock);
 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
 			err = -EINVAL;
 		else
-			xfrm_state_afinfo[afinfo->family] = NULL;
+			RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
 	}
-	write_unlock_bh(&xfrm_state_afinfo_lock);
+	spin_unlock_bh(&xfrm_state_afinfo_lock);
+	synchronize_rcu();
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
@@ -1882,17 +1874,16 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
 	struct xfrm_state_afinfo *afinfo;
 	if (unlikely(family >= NPROTO))
 		return NULL;
-	read_lock(&xfrm_state_afinfo_lock);
-	afinfo = xfrm_state_afinfo[family];
+	rcu_read_lock();
+	afinfo = rcu_dereference(xfrm_state_afinfo[family]);
 	if (unlikely(!afinfo))
-		read_unlock(&xfrm_state_afinfo_lock);
+		rcu_read_unlock();
 	return afinfo;
 }
 
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
-	__releases(xfrm_state_afinfo_lock)
 {
-	read_unlock(&xfrm_state_afinfo_lock);
+	rcu_read_unlock();
 }
 
 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index eb872b2e366..fbd9e6cd0fd 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1112,7 +1112,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	mark = xfrm_mark_get(attrs, &m);
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
-		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
+		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
 		}