182 files changed, 6159 insertions, 3024 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6e64f7c6a2e..7850412f52b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev,
 static void vlan_transfer_features(struct net_device *dev,
 				   struct net_device *vlandev)
 {
-	unsigned long old_features = vlandev->features;
+	u32 old_features = vlandev->features;
 
 	vlandev->features &= ~dev->vlan_features;
 	vlandev->features |= dev->features & dev->vlan_features;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 17c5ba7551a..29a54ccd213 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -59,7 +59,6 @@
 						 * safely advertise a maxsize
 						 * of 64k */
 
-#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
 /**
  * struct p9_trans_rdma - RDMA transport instance
  *
diff --git a/net/Kconfig b/net/Kconfig
index 72840626284..79cabf1ee68 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -221,6 +221,12 @@ config RPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config RFS_ACCEL
+	boolean
+	depends on RPS && GENERIC_HARDIRQS
+	select CPU_RMAP
+	default y
+
 config XPS
 	boolean
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index a77b773b086..7db9ad82cc0 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -64,6 +64,7 @@ static void free_info(struct kref *ref)
 
 	spin_unlock_bh(&bat_priv->vis_list_lock);
 	kfree_skb(info->skb_packet);
+	kfree(info);
 }
 
 /* Compare two vis packets, used by the hashing algorithm */
@@ -268,10 +269,10 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				buff_pos += sprintf(buff + buff_pos, "%pM,",
 						entry->addr);
 
-				for (i = 0; i < packet->entries; i++)
+				for (j = 0; j < packet->entries; j++)
 					buff_pos += vis_data_read_entry(
 							buff + buff_pos,
-							&entries[i],
+							&entries[j],
 							entry->addr,
 							entry->primary);
 
@@ -444,7 +445,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
 			      info);
 	if (hash_added < 0) {
 		/* did not work (for some reason) */
-		kref_put(&old_info->refcount, free_info);
+		kref_put(&info->refcount, free_info);
 		info = NULL;
 	}
 
@@ -815,7 +816,7 @@ static void send_vis_packets(struct work_struct *work)
 		container_of(work, struct delayed_work, work);
 	struct bat_priv *bat_priv =
 		container_of(delayed_work, struct bat_priv, vis_work);
-	struct vis_info *info, *temp;
+	struct vis_info *info;
 
 	spin_lock_bh(&bat_priv->vis_hash_lock);
 	purge_vis_packets(bat_priv);
@@ -825,8 +826,9 @@ static void send_vis_packets(struct work_struct *work)
 		send_list_add(bat_priv, bat_priv->my_vis_info);
 	}
 
-	list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list,
-				 send_list) {
+	while (!list_empty(&bat_priv->vis_send_list)) {
+		info = list_first_entry(&bat_priv->vis_send_list,
+					typeof(*info), send_list);
 
 		kref_get(&info->refcount);
 		spin_unlock_bh(&bat_priv->vis_hash_lock);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6b90a419173..99cd8d9d891 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -379,14 +379,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 	hci_conn_hold(acl);
 
 	if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
-		acl->sec_level = sec_level;
+		acl->sec_level = BT_SECURITY_LOW;
+		acl->pending_sec_level = sec_level;
 		acl->auth_type = auth_type;
 		hci_acl_connect(acl);
-	} else {
-		if (acl->sec_level < sec_level)
-			acl->sec_level = sec_level;
-		if (acl->auth_type < auth_type)
-			acl->auth_type = auth_type;
 	}
 
 	if (type == ACL_LINK)
@@ -442,11 +438,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
 {
 	BT_DBG("conn %p", conn);
 
+	if (conn->pending_sec_level > sec_level)
+		sec_level = conn->pending_sec_level;
+
 	if (sec_level > conn->sec_level)
-		conn->sec_level = sec_level;
+		conn->pending_sec_level = sec_level;
 	else if (conn->link_mode & HCI_LM_AUTH)
 		return 1;
 
+	/* Make sure we preserve an existing MITM requirement*/
+	auth_type |= (conn->auth_type & 0x01);
+
 	conn->auth_type = auth_type;
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 8b602d881fd..9c4541bc488 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1011,6 +1011,10 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	destroy_workqueue(hdev->workqueue);
 
+	hci_dev_lock_bh(hdev);
+	hci_blacklist_clear(hdev);
+	hci_dev_unlock_bh(hdev);
+
 	__hci_dev_put(hdev);
 
 	return 0;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 38100170d38..a290854fdaa 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -692,13 +692,13 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev,
 	if (conn->state != BT_CONFIG || !conn->out)
 		return 0;
 
-	if (conn->sec_level == BT_SECURITY_SDP)
+	if (conn->pending_sec_level == BT_SECURITY_SDP)
 		return 0;
 
 	/* Only request authentication for SSP connections or non-SSP
 	 * devices with sec_level HIGH */
 	if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) &&
-					conn->sec_level != BT_SECURITY_HIGH)
+				conn->pending_sec_level != BT_SECURITY_HIGH)
 		return 0;
 
 	return 1;
@@ -1095,9 +1095,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
-		if (!ev->status)
+		if (!ev->status) {
 			conn->link_mode |= HCI_LM_AUTH;
-		else
+			conn->sec_level = conn->pending_sec_level;
+		} else
 			conn->sec_level = BT_SECURITY_LOW;
 
 		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c791fcda7b2..7550abb0c96 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -305,33 +305,44 @@ static void l2cap_chan_del(struct sock *sk, int err)
 	}
 }
 
-/* Service level security */
-static inline int l2cap_check_security(struct sock *sk)
+static inline u8 l2cap_get_auth_type(struct sock *sk)
 {
-	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-	__u8 auth_type;
+	if (sk->sk_type == SOCK_RAW) {
+		switch (l2cap_pi(sk)->sec_level) {
+		case BT_SECURITY_HIGH:
+			return HCI_AT_DEDICATED_BONDING_MITM;
+		case BT_SECURITY_MEDIUM:
+			return HCI_AT_DEDICATED_BONDING;
+		default:
+			return HCI_AT_NO_BONDING;
+		}
+	} else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
+		if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
+			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
 
-	if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
 		if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
-			auth_type = HCI_AT_NO_BONDING_MITM;
+			return HCI_AT_NO_BONDING_MITM;
 		else
-			auth_type = HCI_AT_NO_BONDING;
-
-		if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
+			return HCI_AT_NO_BONDING;
 	} else {
 		switch (l2cap_pi(sk)->sec_level) {
 		case BT_SECURITY_HIGH:
-			auth_type = HCI_AT_GENERAL_BONDING_MITM;
-			break;
+			return HCI_AT_GENERAL_BONDING_MITM;
 		case BT_SECURITY_MEDIUM:
-			auth_type = HCI_AT_GENERAL_BONDING;
-			break;
+			return HCI_AT_GENERAL_BONDING;
 		default:
-			auth_type = HCI_AT_NO_BONDING;
-			break;
+			return HCI_AT_NO_BONDING;
 		}
 	}
+}
+
+/* Service level security */
+static inline int l2cap_check_security(struct sock *sk)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	__u8 auth_type;
+
+	auth_type = l2cap_get_auth_type(sk);
 
 	return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level,
 								auth_type);
@@ -1068,39 +1079,7 @@ static int l2cap_do_connect(struct sock *sk)
 
 	err = -ENOMEM;
 
-	if (sk->sk_type == SOCK_RAW) {
-		switch (l2cap_pi(sk)->sec_level) {
-		case BT_SECURITY_HIGH:
-			auth_type = HCI_AT_DEDICATED_BONDING_MITM;
-			break;
-		case BT_SECURITY_MEDIUM:
-			auth_type = HCI_AT_DEDICATED_BONDING;
-			break;
-		default:
-			auth_type = HCI_AT_NO_BONDING;
-			break;
-		}
-	} else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
-		if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
-			auth_type = HCI_AT_NO_BONDING_MITM;
-		else
-			auth_type = HCI_AT_NO_BONDING;
-
-		if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
-	} else {
-		switch (l2cap_pi(sk)->sec_level) {
-		case BT_SECURITY_HIGH:
-			auth_type = HCI_AT_GENERAL_BONDING_MITM;
-			break;
-		case BT_SECURITY_MEDIUM:
-			auth_type = HCI_AT_GENERAL_BONDING;
-			break;
-		default:
-			auth_type = HCI_AT_NO_BONDING;
-			break;
-		}
-	}
+	auth_type = l2cap_get_auth_type(sk);
 
 	hcon = hci_connect(hdev, ACL_LINK, dst,
 					l2cap_pi(sk)->sec_level, auth_type);
@@ -1127,7 +1106,8 @@ static int l2cap_do_connect(struct sock *sk)
 		if (sk->sk_type != SOCK_SEQPACKET &&
 				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
-			sk->sk_state = BT_CONNECTED;
+			if (l2cap_check_security(sk))
+				sk->sk_state = BT_CONNECTED;
 		} else
 			l2cap_do_start(sk);
 	}
@@ -1893,8 +1873,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		if (pi->mode == L2CAP_MODE_STREAMING) {
 			l2cap_streaming_send(sk);
 		} else {
-			if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
-					pi->conn_state && L2CAP_CONN_WAIT_F) {
+			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+					(pi->conn_state & L2CAP_CONN_WAIT_F)) {
 				err = len;
 				break;
 			}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ff8aaa73665..6b83776534f 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1164,7 +1164,8 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 			 * initiator rfcomm_process_rx already calls
 			 * rfcomm_session_put() */
 			if (s->sock->sk->sk_state != BT_CLOSED)
-				rfcomm_session_put(s);
+				if (list_empty(&s->dlcs))
+					rfcomm_session_put(s);
 			break;
 		}
 	}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9d1e2bac1d..2a6801d8b72 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -365,7 +365,7 @@ int br_min_mtu(const struct net_bridge *br)
 void br_features_recompute(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
-	unsigned long features, mask;
+	u32 features, mask;
 
 	features = mask = br->feature_mask;
 	if (list_empty(&br->port_list))
@@ -379,7 +379,7 @@ void br_features_recompute(struct net_bridge *br)
 	}
 
 done:
-	br->dev->features = netdev_fix_features(features, NULL);
+	br->dev->features = netdev_fix_features(br->dev, features);
 }
 
 /* called with RTNL */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 84aac7734bf..9f22898c535 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -182,7 +182,7 @@ struct net_bridge
 	struct br_cpu_netstats __percpu *stats;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
-	unsigned long			feature_mask;
+	u32				feature_mask;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct rtable 			fake_rtable;
 	bool				nf_call_iptables;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 50a46afc2bc..2ed0056a39a 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ip6.h>
 
-struct tcpudphdr {
-	__be16 src;
-	__be16 dst;
+union pkthdr {
+	struct {
+		__be16 src;
+		__be16 dst;
+	} tcpudphdr;
+	struct {
+		u8 type;
+		u8 code;
+	} icmphdr;
 };
 
 static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
 	struct ipv6hdr _ip6h;
-	const struct tcpudphdr *pptr;
-	struct tcpudphdr _ports;
+	const union pkthdr *pptr;
+	union pkthdr _pkthdr;
 
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 		if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
 			return false;
-		if (!(info->bitmask & EBT_IP6_DPORT) &&
-		    !(info->bitmask & EBT_IP6_SPORT))
+		if (!(info->bitmask & ( EBT_IP6_DPORT |
+					EBT_IP6_SPORT | EBT_IP6_ICMP6)))
 			return true;
-		pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
-					  &_ports);
+
+		/* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
+		pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
+					  &_pkthdr);
 		if (pptr == NULL)
 			return false;
 		if (info->bitmask & EBT_IP6_DPORT) {
-			u32 dst = ntohs(pptr->dst);
+			u16 dst = ntohs(pptr->tcpudphdr.dst);
 			if (FWINV(dst < info->dport[0] ||
 				  dst > info->dport[1], EBT_IP6_DPORT))
 				return false;
 		}
 		if (info->bitmask & EBT_IP6_SPORT) {
-			u32 src = ntohs(pptr->src);
+			u16 src = ntohs(pptr->tcpudphdr.src);
 			if (FWINV(src < info->sport[0] ||
 				  src > info->sport[1], EBT_IP6_SPORT))
 			return false;
 		}
-		return true;
+		if ((info->bitmask & EBT_IP6_ICMP6) &&
+		     FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
+			   pptr->icmphdr.type > info->icmpv6_type[1] ||
+			   pptr->icmphdr.code < info->icmpv6_code[0] ||
+			   pptr->icmphdr.code > info->icmpv6_code[1],
+							EBT_IP6_ICMP6))
+			return false;
 	}
 	return true;
 }
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 	if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
 		return -EINVAL;
+	if (info->bitmask & EBT_IP6_ICMP6) {
+		if ((info->invflags & EBT_IP6_PROTO) ||
+		     info->protocol != IPPROTO_ICMPV6)
+			return -EINVAL;
+		if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
+		    info->icmpv6_code[0] > info->icmpv6_code[1])
+			return -EINVAL;
+	}
 	return 0;
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df0532d4b..5f1825df9dc 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
 
 	newinfo->entries_size = size;
 
+	xt_compat_init_offsets(AF_INET, info->nentries);
 	return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
 							entries, newinfo);
 }
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c665de778b6..f1f98d967d8 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -23,10 +23,8 @@
 #include <asm/atomic.h>
 
 #define MAX_PHY_LAYERS 7
-#define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
-#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index d3ed264ad6c..27dab26ad3b 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -18,7 +18,6 @@
 #define DGM_CMD_BIT  0x80
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
-#define DGM_CTRL_PKT_SIZE 1
 #define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 9297f7dea9d..8303fe3ebf8 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -25,7 +25,6 @@ struct cfserl {
 	spinlock_t sync;
 	bool usestx;
 };
-#define STXLEN(layr) (layr->usestx ? 1 : 0)
 
 static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index efad410e4c8..315c0d60136 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -20,7 +20,7 @@
 #define UTIL_REMOTE_SHUTDOWN 0x82
 #define UTIL_FLOW_OFF 0x81
 #define UTIL_FLOW_ON  0x80
-#define UTIL_CTRL_PKT_SIZE 1
+
 static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3b425b189a9..c3b1dec4acf 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -17,7 +17,7 @@
 #define VEI_FLOW_OFF 0x81
 #define VEI_FLOW_ON  0x80
 #define VEI_SET_PIN  0x82
-#define VEI_CTRL_PKT_SIZE 1
+
 #define container_obj(layr) container_of(layr, struct cfsrvl, layer)
 
 static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7c6a46f8037..9109e2648d4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
 #include <trace/events/skb.h>
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
+#include <linux/cpu_rmap.h>
 
 #include "net-sysfs.h"
 
@@ -749,7 +750,8 @@ EXPORT_SYMBOL(dev_get_by_index);
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold RCU
+ *	is not found or a pointer to the device.
+ *	The caller must hold RCU or RTNL.
  *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
@@ -1285,7 +1287,7 @@ static int __dev_close(struct net_device *dev)
 	return __dev_close_many(&single);
 }
 
-int dev_close_many(struct list_head *head)
+static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
 	LIST_HEAD(tmp_list);
@@ -1593,6 +1595,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+	int i;
+	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+	/* If TC0 is invalidated disable TC mapping */
+	if (tc->offset + tc->count > txq) {
+		pr_warning("Number of in use tx queues changed "
+			   "invalidating tc mappings. Priority "
+			   "traffic classification disabled!\n");
+		dev->num_tc = 0;
+		return;
+	}
+
+	/* Invalidated prio to tc mappings set to TC0 */
+	for (i = 1; i < TC_BITMASK + 1; i++) {
+		int q = netdev_get_prio_tc_map(dev, i);
+
+		tc = &dev->tc_to_txq[q];
+		if (tc->offset + tc->count > txq) {
+			pr_warning("Number of in use tx queues "
+				   "changed. Priority %i to tc "
+				   "mapping %i is no longer valid "
+				   "setting map to 0\n",
+				   i, q);
+			netdev_set_prio_tc_map(dev, i, 0);
+		}
+	}
+}
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1612,6 +1656,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (rc)
 			return rc;
 
+		if (dev->num_tc)
+			netif_setup_tc(dev, txq);
+
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -1811,7 +1858,7 @@ EXPORT_SYMBOL(skb_checksum_help);
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
@@ -1999,7 +2046,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 		 protocol == htons(ETH_P_FCOE)));
 }
 
-static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
 {
 	if (!can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
@@ -2011,10 +2058,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
 	return features;
 }
 
-int netif_skb_features(struct sk_buff *skb)
+u32 netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
-	int features = skb->dev->features;
+	u32 features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2059,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
-		int features;
+		u32 features;
 
 		/*
 		 * If device doesnt need skb->dst, release it right now while
@@ -2161,6 +2208,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues)
 {
 	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
@@ -2169,13 +2218,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		return hash;
 	}
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
-	return (u16) (((u64) hash * num_tx_queues) >> 32);
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 
@@ -2272,15 +2327,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
-	bool contended = qdisc_is_running(q);
+	bool contended;
 	int rc;
 
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
+	qdisc_calculate_pkt_len(skb, q);
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
 	 * separate lock before trying to get qdisc main lock.
 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
 	 * and dequeue packets faster.
 	 */
+	contended = qdisc_is_running(q);
 	if (unlikely(contended))
 		spin_lock(&q->busylock);
 
@@ -2298,7 +2356,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 
-		qdisc_skb_cb(skb)->pkt_len = skb->len;
 		qdisc_bstats_update(q, skb);
 
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2313,7 +2370,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
-		rc = qdisc_enqueue_root(skb, q);
+		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
@@ -2532,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
+static struct rps_dev_flow *
+set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+	    struct rps_dev_flow *rflow, u16 next_cpu)
+{
+	u16 tcpu;
+
+	tcpu = rflow->cpu = next_cpu;
+	if (tcpu != RPS_NO_CPU) {
+#ifdef CONFIG_RFS_ACCEL
+		struct netdev_rx_queue *rxqueue;
+		struct rps_dev_flow_table *flow_table;
+		struct rps_dev_flow *old_rflow;
+		u32 flow_id;
+		u16 rxq_index;
+		int rc;
+
+		/* Should we steer this flow to a different hardware queue? */
+		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
+			goto out;
+		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
+		if (rxq_index == skb_get_rx_queue(skb))
+			goto out;
+
+		rxqueue = dev->_rx + rxq_index;
+		flow_table = rcu_dereference(rxqueue->rps_flow_table);
+		if (!flow_table)
+			goto out;
+		flow_id = skb->rxhash & flow_table->mask;
+		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
+							rxq_index, flow_id);
+		if (rc < 0)
+			goto out;
+		old_rflow = rflow;
+		rflow = &flow_table->flows[flow_id];
+		rflow->cpu = next_cpu;
+		rflow->filter = rc;
+		if (old_rflow->filter == rflow->filter)
+			old_rflow->filter = RPS_NO_FILTER;
+	out:
+#endif
+		rflow->last_qtail =
+			per_cpu(softnet_data, tcpu).input_queue_head;
+	}
+
+	return rflow;
+}
+
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2602,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		if (unlikely(tcpu != next_cpu) &&
 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
-		      rflow->last_qtail)) >= 0)) {
-			tcpu = rflow->cpu = next_cpu;
-			if (tcpu != RPS_NO_CPU)
-				rflow->last_qtail = per_cpu(softnet_data,
-				    tcpu).input_queue_head;
-		}
+		      rflow->last_qtail)) >= 0))
+			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+
 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
 			*rflowp = rflow;
 			cpu = tcpu;
@@ -2628,6 +2729,46 @@ done:
 	return cpu;
 }
 
+#ifdef CONFIG_RFS_ACCEL
+
+/**
+ * rps_may_expire_flow - check whether an RFS hardware filter may be removed
+ * @dev: Device on which the filter was set
+ * @rxq_index: RX queue index
+ * @flow_id: Flow ID passed to ndo_rx_flow_steer()
+ * @filter_id: Filter ID returned by ndo_rx_flow_steer()
+ *
+ * Drivers that implement ndo_rx_flow_steer() should periodically call
+ * this function for each installed filter and remove the filters for
+ * which it returns %true.
+ */
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+			 u32 flow_id, u16 filter_id)
+{
+	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_dev_flow *rflow;
+	bool expire = true;
+	int cpu;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	if (flow_table && flow_id <= flow_table->mask) {
+		rflow = &flow_table->flows[flow_id];
+		cpu = ACCESS_ONCE(rflow->cpu);
+		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
+			   rflow->last_qtail) <
+		     (int)(10 * flow_table->mask)))
+			expire = false;
+	}
+	rcu_read_unlock();
+	return expire;
+}
+EXPORT_SYMBOL(rps_may_expire_flow);
+
+#endif /* CONFIG_RFS_ACCEL */
+
 /* Called from hardirq (IPI) context */
 static void rps_trigger_softirq(void *data)
 {
@@ -3423,6 +3564,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	__skb_pull(skb, skb_headlen(skb));
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
 	skb->vlan_tci = 0;
+	skb->dev = napi->dev;
 
 	napi->skb = skb;
 }
@@ -3910,12 +4052,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev = (v == SEQ_START_TOKEN) ?
-				  first_net_device(seq_file_net(seq)) :
-				  next_net_device((struct net_device *)v);
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		dev = first_net_device_rcu(seq_file_net(seq));
+	else
+		dev = next_net_device_rcu(dev);
 
 	++*pos;
-	return rcu_dereference(dev);
+	return dev;
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4572,6 +4717,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 EXPORT_SYMBOL(dev_set_mtu);
 
 /**
+ *	dev_set_group - Change group this device belongs to
+ *	@dev: device
+ *	@new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+	dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
+/**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
  *	@sa: new address
@@ -5061,41 +5217,49 @@ static void rollback_registered(struct net_device *dev)
 	rollback_registered_many(&single);
 }
 
-unsigned long netdev_fix_features(unsigned long features, const char *name)
+u32 netdev_fix_features(struct net_device *dev, u32 features)
 {
+	/* Fix illegal checksum combinations */
+	if ((features & NETIF_F_HW_CSUM) &&
+	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		netdev_info(dev, "mixed HW and IP checksum settings.\n");
+		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	}
+
+	if ((features & NETIF_F_NO_CSUM) &&
+	    (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		netdev_info(dev, "mixed no checksumming and other settings.\n");
+		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+	}
+
 	/* Fix illegal SG+CSUM combinations. */
 	if ((features & NETIF_F_SG) &&
 	    !(features & NETIF_F_ALL_CSUM)) {
-		if (name)
-			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
-			       "checksum feature.\n", name);
+		netdev_info(dev,
+			    "Dropping NETIF_F_SG since no checksum feature.\n");
 		features &= ~NETIF_F_SG;
 	}
 
 	/* TSO requires that SG is present as well. */
 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
-		if (name)
-			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
-			       "SG feature.\n", name);
+		netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
 		features &= ~NETIF_F_TSO;
 	}
 
+	/* UFO needs SG and checksumming */
 	if (features & NETIF_F_UFO) {
 		/* maybe split UFO into V4 and V6? */
 		if (!((features & NETIF_F_GEN_CSUM) ||
 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-			if (name)
-				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no checksum offload features.\n",
-				       name);
+			netdev_info(dev,
+				"Dropping NETIF_F_UFO since no checksum offload features.\n");
 			features &= ~NETIF_F_UFO;
 		}
 
 		if (!(features & NETIF_F_SG)) {
-			if (name)
-				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no NETIF_F_SG feature.\n", name);
+			netdev_info(dev,
+				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
 			features &= ~NETIF_F_UFO;
 		}
 	}
@@ -5238,22 +5402,7 @@ int register_netdevice(struct net_device *dev)
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
-	/* Fix illegal checksum combinations */
-	if ((dev->features & NETIF_F_HW_CSUM) &&
-	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
-		       dev->name);
-		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
-	}
-
-	if ((dev->features & NETIF_F_NO_CSUM) &&
-	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
-		       dev->name);
-		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
-	}
-
-	dev->features = netdev_fix_features(dev->features, dev->name);
+	dev->features = netdev_fix_features(dev, dev->features);
 
 	/* Enable software GSO if SG is supported. */
 	if (dev->features & NETIF_F_SG)
@@ -5678,6 +5827,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	strcpy(dev->name, name);
+	dev->group = INIT_NETDEV_GROUP;
 	return dev;
 
 free_pcpu:
@@ -5988,8 +6138,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  *	@one to the master device with current feature set @all.  Will not
  *	enable anything that is off in @mask. Returns the new feature set.
  */
-unsigned long netdev_increment_features(unsigned long all, unsigned long one,
-					unsigned long mask)
+u32 netdev_increment_features(u32 all, u32 one, u32 mask)
 {
 	/* If device needs checksumming, downgrade to it. */
 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7ffce..c1674fde827 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
 
+const u32 dst_default_metrics[RTAX_MAX];
+
 void *dst_alloc(struct dst_ops *ops)
 {
 	struct dst_entry *dst;
@@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops)
 	dst->lastuse = jiffies;
 	dst->path = dst;
 	dst->input = dst->output = dst_discard;
+	dst_init_metrics(dst, dst_default_metrics, true);
 #if RT_CACHE_DEBUG >= 2
 	atomic_inc(&dst_total);
 #endif
@@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst)
 }
 EXPORT_SYMBOL(dst_release);
 
+u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+
+	if (p) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			kfree(p);
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		}
+	}
+	return p;
+}
+EXPORT_SYMBOL(dst_cow_metrics_generic);
+
+/* Caller asserts that dst_metrics_read_only(dst) is false.  */
+void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+	unsigned long prev, new;
+
+	new = (unsigned long) dst_default_metrics;
+	prev = cmpxchg(&dst->_metrics, old, new);
+	if (prev == old)
+		kfree(__DST_METRICS_PTR(old));
+}
+EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+
 /**
  * skb_dst_set_noref - sets skb dst, without a reference
  * @skb: buffer
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17741782a34..5984ee0c713 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -817,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	if (regs.len > reglen)
 		regs.len = reglen;
 
-	regbuf = vmalloc(reglen);
+	regbuf = vzalloc(reglen);
 	if (!regbuf)
 		return -ENOMEM;
 
@@ -1458,7 +1458,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	void __user *useraddr = ifr->ifr_data;
 	u32 ethcmd;
 	int rc;
-	unsigned long old_features;
+	u32 old_features;
 
 	if (!dev || !netif_device_present(dev))
 		return -ENODEV;
diff --git a/net/core/filter.c b/net/core/filter.c
index afc58374ca9..232b1873bb2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	if (err)
 		return err;
 
-	rcu_read_lock_bh();
-	filter = rcu_dereference_bh(sk->sk_filter);
+	rcu_read_lock();
+	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 
 	return err;
 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60a90291342..799f06e03a2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 {
 	size_t size = entries * sizeof(struct neighbour *);
 	struct neigh_hash_table *ret;
-	struct neighbour **buckets;
+	struct neighbour __rcu **buckets;
 
 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 	if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 	if (size <= PAGE_SIZE)
 		buckets = kzalloc(size, GFP_ATOMIC);
 	else
-		buckets = (struct neighbour **)
+		buckets = (struct neighbour __rcu **)
 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 					   get_order(size));
 	if (!buckets) {
 		kfree(ret);
 		return NULL;
 	}
-	rcu_assign_pointer(ret->hash_buckets, buckets);
+	ret->hash_buckets = buckets;
 	ret->hash_mask = entries - 1;
 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
 	return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 						    struct neigh_hash_table,
 						    rcu);
 	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
-	struct neighbour **buckets = nht->hash_buckets;
+	struct neighbour __rcu **buckets = nht->hash_buckets;
 
 	if (size <= PAGE_SIZE)
 		kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot create neighbour proc dir entry");
 #endif
 
-	tbl->nht = neigh_hash_alloc(8);
+	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
 
 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
 	}
 	write_unlock(&neigh_tbl_lock);
 
-	call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+		 neigh_hash_free_rcu);
 	tbl->nht = NULL;
 
 	kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e23c01be5a5..2e4a393dfc3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
 NETDEVICE_SHOW(addr_len, fmt_dec);
 NETDEVICE_SHOW(iflink, fmt_dec);
 NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(features, fmt_hex);
 NETDEVICE_SHOW(type, fmt_dec);
 NETDEVICE_SHOW(link_mode, fmt_dec);
 
@@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev,
 	return ret;
 }
 
+NETDEVICE_SHOW(group, fmt_dec);
+
+static int change_group(struct net_device *net, unsigned long new_group)
+{
+	dev_set_group(net, (int) new_group);
+	return 0;
+}
+
+static ssize_t store_group(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_group);
+}
+
 static struct device_attribute net_class_attributes[] = {
 	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
 	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
 	       store_tx_queue_len),
+	__ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group),
 	{}
 };
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461..d73b77adb67 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -251,6 +251,7 @@ struct pktgen_dev {
 	int max_pkt_size;	/* = ETH_ZLEN; */
 	int pkt_overhead;	/* overhead for MPLS, VLANs, IPSEC etc */
 	int nfrags;
+	struct page *page;
 	u64 delay;		/* nano-seconds */
 
 	__u64 count;		/* Default No packets to send */
@@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (node_possible(value)) {
 			pkt_dev->node = value;
 			sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+			if (pkt_dev->page) {
+				put_page(pkt_dev->page);
+				pkt_dev->page = NULL;
+			}
 		}
 		else
 			sprintf(pg_result, "ERROR: node not possible");
@@ -2605,6 +2610,90 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
 	return htons(id | (cfi << 12) | (prio << 13));
 }
 
+static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
+				int datalen)
+{
+	struct timeval timestamp;
+	struct pktgen_hdr *pgh;
+
+	pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+	datalen -= sizeof(*pgh);
+
+	if (pkt_dev->nfrags <= 0) {
+		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+		memset(pgh + 1, 0, datalen);
+	} else {
+		int frags = pkt_dev->nfrags;
+		int i, len;
+
+
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		len = datalen - frags * PAGE_SIZE;
+		if (len > 0) {
+			memset(skb_put(skb, len), 0, len);
+			datalen = frags * PAGE_SIZE;
+		}
+
+		i = 0;
+		while (datalen > 0) {
+			if (unlikely(!pkt_dev->page)) {
+				int node = numa_node_id();
+
+				if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
+					node = pkt_dev->node;
+				pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+				if (!pkt_dev->page)
+					break;
+			}
+			skb_shinfo(skb)->frags[i].page = pkt_dev->page;
+			get_page(pkt_dev->page);
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size =
+			    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+			datalen -= skb_shinfo(skb)->frags[i].size;
+			skb->len += skb_shinfo(skb)->frags[i].size;
+			skb->data_len += skb_shinfo(skb)->frags[i].size;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+
+		while (i < frags) {
+			int rem;
+
+			if (i == 0)
+				break;
+
+			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+			if (rem == 0)
+				break;
+
+			skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+			skb_shinfo(skb)->frags[i] =
+			    skb_shinfo(skb)->frags[i - 1];
+			get_page(skb_shinfo(skb)->frags[i].page);
+			skb_shinfo(skb)->frags[i].page =
+			    skb_shinfo(skb)->frags[i - 1].page;
+			skb_shinfo(skb)->frags[i].page_offset +=
+			    skb_shinfo(skb)->frags[i - 1].size;
+			skb_shinfo(skb)->frags[i].size = rem;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+	/* Stamp the time, and sequence number,
+	 * convert them to network byte order
+	 */
+	pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	pgh->seq_num = htonl(pkt_dev->seq_num);
+
+	do_gettimeofday(&timestamp);
+	pgh->tv_sec = htonl(timestamp.tv_sec);
+	pgh->tv_usec = htonl(timestamp.tv_usec);
+}
+
 static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 					struct pktgen_dev *pkt_dev)
 {
@@ -2613,7 +2702,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	struct udphdr *udph;
 	int datalen, iplen;
 	struct iphdr *iph;
-	struct pktgen_hdr *pgh = NULL;
 	__be16 protocol = htons(ETH_P_IP);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
@@ -2729,76 +2817,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 			   pkt_dev->pkt_overhead);
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-
-	if (pkt_dev->nfrags <= 0) {
-		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-		memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
-	} else {
-		int frags = pkt_dev->nfrags;
-		int i, len;
-
-		pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
-		if (frags > MAX_SKB_FRAGS)
-			frags = MAX_SKB_FRAGS;
-		if (datalen > frags * PAGE_SIZE) {
-			len = datalen - frags * PAGE_SIZE;
-			memset(skb_put(skb, len), 0, len);
-			datalen = frags * PAGE_SIZE;
-		}
-
-		i = 0;
-		while (datalen > 0) {
-			struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
-			skb_shinfo(skb)->frags[i].page = page;
-			skb_shinfo(skb)->frags[i].page_offset = 0;
-			skb_shinfo(skb)->frags[i].size =
-			    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
-			datalen -= skb_shinfo(skb)->frags[i].size;
-			skb->len += skb_shinfo(skb)->frags[i].size;
-			skb->data_len += skb_shinfo(skb)->frags[i].size;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-
-		while (i < frags) {
-			int rem;
-
-			if (i == 0)
-				break;
-
-			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
-			if (rem == 0)
-				break;
-
-			skb_shinfo(skb)->frags[i - 1].size -= rem;
-
-			skb_shinfo(skb)->frags[i] =
-			    skb_shinfo(skb)->frags[i - 1];
-			get_page(skb_shinfo(skb)->frags[i].page);
-			skb_shinfo(skb)->frags[i].page =
-			    skb_shinfo(skb)->frags[i - 1].page;
-			skb_shinfo(skb)->frags[i].page_offset +=
-			    skb_shinfo(skb)->frags[i - 1].size;
-			skb_shinfo(skb)->frags[i].size = rem;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-	}
-
-	/* Stamp the time, and sequence number,
-	 * convert them to network byte order
-	 */
-	if (pgh) {
-		struct timeval timestamp;
-
-		pgh->pgh_magic = htonl(PKTGEN_MAGIC);
-		pgh->seq_num = htonl(pkt_dev->seq_num);
-
-		do_gettimeofday(&timestamp);
-		pgh->tv_sec = htonl(timestamp.tv_sec);
-		pgh->tv_usec = htonl(timestamp.tv_usec);
-	}
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
 
 #ifdef CONFIG_XFRM
 	if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2980,7 +2999,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	struct udphdr *udph;
 	int datalen;
 	struct ipv6hdr *iph;
-	struct pktgen_hdr *pgh = NULL;
 	__be16 protocol = htons(ETH_P_IPV6);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
@@ -3083,75 +3101,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
-	if (pkt_dev->nfrags <= 0)
-		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-	else {
-		int frags = pkt_dev->nfrags;
-		int i;
-
-		pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
-		if (frags > MAX_SKB_FRAGS)
-			frags = MAX_SKB_FRAGS;
-		if (datalen > frags * PAGE_SIZE) {
-			skb_put(skb, datalen - frags * PAGE_SIZE);
-			datalen = frags * PAGE_SIZE;
-		}
-
-		i = 0;
-		while (datalen > 0) {
-			struct page *page = alloc_pages(GFP_KERNEL, 0);
-			skb_shinfo(skb)->frags[i].page = page;
-			skb_shinfo(skb)->frags[i].page_offset = 0;
-			skb_shinfo(skb)->frags[i].size =
-			    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
-			datalen -= skb_shinfo(skb)->frags[i].size;
-			skb->len += skb_shinfo(skb)->frags[i].size;
-			skb->data_len += skb_shinfo(skb)->frags[i].size;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-
-		while (i < frags) {
-			int rem;
-
-			if (i == 0)
-				break;
-
-			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
-			if (rem == 0)
-				break;
-
-			skb_shinfo(skb)->frags[i - 1].size -= rem;
-
-			skb_shinfo(skb)->frags[i] =
-			    skb_shinfo(skb)->frags[i - 1];
-			get_page(skb_shinfo(skb)->frags[i].page);
-			skb_shinfo(skb)->frags[i].page =
-			    skb_shinfo(skb)->frags[i - 1].page;
-			skb_shinfo(skb)->frags[i].page_offset +=
-			    skb_shinfo(skb)->frags[i - 1].size;
-			skb_shinfo(skb)->frags[i].size = rem;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-	}
-
-	/* Stamp the time, and sequence number,
-	 * convert them to network byte order
-	 * should we update cloned packets too ?
-	 */
-	if (pgh) {
-		struct timeval timestamp;
-
-		pgh->pgh_magic = htonl(PKTGEN_MAGIC);
-		pgh->seq_num = htonl(pkt_dev->seq_num);
-
-		do_gettimeofday(&timestamp);
-		pgh->tv_sec = htonl(timestamp.tv_sec);
-		pgh->tv_usec = htonl(timestamp.tv_usec);
-	}
-	/* pkt_dev->seq_num++; FF: you really mean this? */
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
 
 	return skb;
 }
@@ -3884,6 +3834,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	free_SAs(pkt_dev);
 #endif
 	vfree(pkt_dev->flows);
+	if (pkt_dev->page)
+		put_page(pkt_dev->page);
 	kfree(pkt_dev);
 	return 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 750db57f3bb..da0fe457c85 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
 	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
 	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+	NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
 
 	if (dev->ifindex != dev->iflink)
 		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1121,8 +1122,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 				return -EOPNOTSUPP;
 
 			if (af_ops->validate_link_af) {
-				err = af_ops->validate_link_af(dev,
-							tb[IFLA_AF_SPEC]);
+				err = af_ops->validate_link_af(dev, af);
 				if (err < 0)
 					return err;
 			}
@@ -1265,6 +1265,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		modified = 1;
 	}
 
+	if (tb[IFLA_GROUP]) {
+		dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+		modified = 1;
+	}
+
 	/*
 	 * Interface selected by interface index but interface
 	 * name provided implies that a name change has been
@@ -1542,6 +1547,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 	if (tb[IFLA_LINKMODE])
 		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+	if (tb[IFLA_GROUP])
+		dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
 
 	return dev;
 
@@ -1552,6 +1559,24 @@ err:
 }
 EXPORT_SYMBOL(rtnl_create_link);
 
+static int rtnl_group_changelink(struct net *net, int group,
+		struct ifinfomsg *ifm,
+		struct nlattr **tb)
+{
+	struct net_device *dev;
+	int err;
+
+	for_each_netdev(net, dev) {
+		if (dev->group == group) {
+			err = do_setlink(dev, ifm, tb, NULL, 0);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
 static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1579,10 +1604,12 @@ replay:
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
 		dev = __dev_get_by_index(net, ifm->ifi_index);
-	else if (ifname[0])
-		dev = __dev_get_by_name(net, ifname);
-	else
-		dev = NULL;
+	else {
+		if (ifname[0])
+			dev = __dev_get_by_name(net, ifname);
+		else
+			dev = NULL;
+	}
 
 	err = validate_linkmsg(dev, tb);
 	if (err < 0)
@@ -1646,8 +1673,13 @@ replay:
 			return do_setlink(dev, ifm, tb, ifname, modified);
 		}
 
-		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+			if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+				return rtnl_group_changelink(net,
+						nla_get_u32(tb[IFLA_GROUP]),
+						ifm, tb);
 			return -ENODEV;
+		}
 
 		if (ifm->ifi_index)
 			return -EOPNOTSUPP;
@@ -1672,6 +1704,9 @@ replay:
 			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
 
 		dest_net = rtnl_link_get_net(net, tb);
+		if (IS_ERR(dest_net))
+			return PTR_ERR(dest_net);
+
 		dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
 
 		if (IS_ERR(dev))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d31bb36ae0d..14cf560b4a3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -210,6 +210,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -2497,7 +2498,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
  *	a pointer to the first in a list of new skbs for the segments.
  *	In case of error it returns ERR_PTR(err).
  */
-struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
@@ -2507,7 +2508,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 	unsigned int offset = doffset;
 	unsigned int headroom;
 	unsigned int len;
-	int sg = features & NETIF_F_SG;
+	int sg = !!(features & NETIF_F_SG);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int err = -ENOMEM;
 	int i = 0;
@@ -2744,8 +2745,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 merge:
 	if (offset > headlen) {
-		skbinfo->frags[0].page_offset += offset - headlen;
-		skbinfo->frags[0].size -= offset - headlen;
+		unsigned int eat = offset - headlen;
+
+		skbinfo->frags[0].page_offset += eat;
+		skbinfo->frags[0].size -= eat;
+		skb->data_len -= eat;
+		skb->len -= eat;
 		offset = headlen;
 	}
 
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d900ab99814..6b03f561cae 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -583,7 +583,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb,
 	u8 up, idtype;
 	int ret = -EINVAL;
 
-	if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->getapp)
+	if (!tb[DCB_ATTR_APP])
 		goto out;
 
 	ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
@@ -604,7 +604,16 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb,
 		goto out;
 
 	id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]);
-	up = netdev->dcbnl_ops->getapp(netdev, idtype, id);
+
+	if (netdev->dcbnl_ops->getapp) {
+		up = netdev->dcbnl_ops->getapp(netdev, idtype, id);
+	} else {
+		struct dcb_app app = {
+					.selector = idtype,
+					.protocol = id,
+				     };
+		up = dcb_getapp(netdev, &app);
+	}
 
 	/* send this back */
 	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e636365d33..42c9c62d341 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
 static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
 static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
+static void dn_dst_destroy(struct dst_entry *);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = {
 	.check =		dn_dst_check,
 	.default_advmss =	dn_dst_default_advmss,
 	.default_mtu =		dn_dst_default_mtu,
+	.cow_metrics =		dst_cow_metrics_generic,
+	.destroy =		dn_dst_destroy,
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
 };
 
+static void dn_dst_destroy(struct dst_entry *dst)
+{
+	dst_destroy_metrics_generic(dst);
+}
+
 static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
 {
 	__u16 tmp = (__u16 __force)(src ^ dst);
@@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
 	struct net_device *dev = rt->dst.dev;
+	unsigned int mss_metric;
 	struct neighbour *n;
-	unsigned int metric;
 
 	if (fi) {
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		dst_import_metrics(&rt->dst, fi->fib_metrics);
+		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 	}
 	rt->rt_type = res->type;
 
@@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 
 	if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
 		dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
-	metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
-	if (metric) {
+	mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
+	if (mss_metric) {
 		unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
-		if (metric > mss)
+		if (mss_metric > mss)
 			dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
 	}
 	return 0;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f2abd375569..b66600b3f4b 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -59,7 +59,6 @@ struct dn_hash
 };
 
 #define dz_key_0(key)		((key).datum = 0)
-#define dz_prefix(key,dz)	((key).datum)
 
 #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
 	for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0c877a74e1f..3fb14b7c13c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -428,7 +428,7 @@ static void __exit dsa_cleanup_module(void)
 }
 module_exit(dsa_cleanup_module);
 
-MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>")
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
 MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:dsa");
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 15dcc1a586b..0c282633791 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -265,13 +265,13 @@ static void ec_tx_done(struct sk_buff *skb, int result)
 static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			  struct msghdr *msg, size_t len)
 {
-	struct sock *sk = sock->sk;
 	struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name;
 	struct net_device *dev;
 	struct ec_addr addr;
 	int err;
 	unsigned char port, cb;
 #if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	struct ec_cb *eb;
 #endif
@@ -488,10 +488,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 error_free_buf:
 	vfree(userbuf);
+error:
 #else
 	err = -EPROTOTYPE;
 #endif
-	error:
 	mutex_unlock(&econet_mutex);
 
 	return err;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a5a1050595d..8949a05ac30 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE
 	  handled by the klogd daemon which is responsible for kernel messages
 	  ("man klogd").
 
+config IP_ROUTE_CLASSID
+	bool
+
 config IP_PNP
 	bool "IP: kernel level autoconfiguration"
 	help
@@ -657,4 +660,3 @@ config TCP_MD5SIG
 	  on the Internet.
 
 	  If unsure, say N.
-
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f2b61107df6..7ceb8044763 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(inet_ioctl);
 
+#ifdef CONFIG_COMPAT
+int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err = -ENOIOCTLCMD;
+
+	if (sk->sk_prot->compat_ioctl)
+		err = sk->sk_prot->compat_ioctl(sk, cmd, arg);
+
+	return err;
+}
+#endif
+
 const struct proto_ops inet_stream_ops = {
 	.family		   = PF_INET,
 	.owner		   = THIS_MODULE,
@@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 };
 EXPORT_SYMBOL(inet_stream_ops);
@@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 };
 EXPORT_SYMBOL(inet_dgram_ops);
@@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 };
 
@@ -1215,7 +1231,7 @@ out:
 	return err;
 }
 
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct iphdr *iph;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 04c8b69fd42..7927589813b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1017,14 +1017,13 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
 		IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
 		return 0;
 	}
-	if (__in_dev_get_rcu(dev)) {
-		IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on);
+	if (__in_dev_get_rtnl(dev)) {
+		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
 		return 0;
 	}
 	return -ENXIO;
 }
 
-/* must be called with rcu_read_lock() */
 static int arp_req_set_public(struct net *net, struct arpreq *r,
 		struct net_device *dev)
 {
@@ -1233,10 +1232,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (!(r.arp_flags & ATF_NETMASK))
 		((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
 							   htonl(0xFFFFFFFFUL);
-	rcu_read_lock();
+	rtnl_lock();
 	if (r.arp_dev[0]) {
 		err = -ENODEV;
-		dev = dev_get_by_name_rcu(net, r.arp_dev);
+		dev = __dev_get_by_name(net, r.arp_dev);
 		if (dev == NULL)
 			goto out;
 
@@ -1263,7 +1262,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		break;
 	}
 out:
-	rcu_read_unlock();
+	rtnl_unlock();
 	if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
 		err = -EFAULT;
 	return err;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 7981a24f5c7..9cefe72029c 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,12 +41,12 @@ struct fib4_rule {
 	__be32			srcmask;
 	__be32			dst;
 	__be32			dstmask;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	u32			tclassid;
 #endif
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 u32 fib_rules_tclass(struct fib_result *res)
 {
 	return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	if (frh->dst_len)
 		rule4->dst = nla_get_be32(tb[FRA_DST]);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (tb[FRA_FLOW])
 		rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
 #endif
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->tos && (rule4->tos != frh->tos))
 		return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
 		return 0;
 #endif
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	if (rule4->src_len)
 		NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (rule4->tclassid)
 		NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
 #endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 12d3dc3df1b..48e93a56007 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -152,6 +152,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
 
+	if (fi->fib_metrics != (u32 *) dst_default_metrics)
+		kfree(fi->fib_metrics);
 	kfree(fi);
 }
 
@@ -200,7 +202,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		    nh->nh_weight != onh->nh_weight ||
 #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
 		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -422,7 +424,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
@@ -476,7 +478,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			if (nla && nla_get_be32(nla) != nh->nh_gw)
 				return 1;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 				return 1;
@@ -742,6 +744,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 	if (fi == NULL)
 		goto failure;
+	if (cfg->fc_mx) {
+		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		if (!fi->fib_metrics)
+			goto failure;
+	} else
+		fi->fib_metrics = (u32 *) dst_default_metrics;
 	fib_info_cnt++;
 
 	fi->fib_net = hold_net(net);
@@ -779,7 +787,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 			goto err_inval;
 		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 			goto err_inval;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 			goto err_inval;
 #endif
@@ -792,7 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		nh->nh_oif = cfg->fc_oif;
 		nh->nh_gw = cfg->fc_gw;
 		nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		nh->nh_tclassid = cfg->fc_flow;
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1002,7 +1010,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
 		if (fi->fib_nh->nh_oif)
 			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid)
 			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
@@ -1027,7 +1035,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
 			if (nh->nh_gw)
 				NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 			if (nh->nh_tclassid)
 				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d9bc85751c7..b6513b13d72 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -475,7 +475,7 @@ static int cleanup_once(unsigned long ttl)
 struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer_base *base = family_to_base(AF_INET);
+	struct inet_peer_base *base = family_to_base(daddr->family);
 	struct inet_peer *p;
 
 	/* Look up for the address quickly, lockless.
@@ -512,6 +512,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 		atomic_set(&p->rid, 0);
 		atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
 		p->tcp_ts_stamp = 0;
+		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		INIT_LIST_HEAD(&p->unused);
 
 
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d859bcc26cb..d7b2b0987a3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 		}
 	}
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (unlikely(skb_dst(skb)->tclassid)) {
 		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
 		u32 idx = skb_dst(skb)->tclassid;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f3a9afd73e..7e41ac0b926 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -60,6 +60,7 @@
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/compat.h>
 #include <net/ipip.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
@@ -1434,6 +1435,51 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	}
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_sioc_sg_req {
+	struct in_addr src;
+	struct in_addr grp;
+	compat_ulong_t pktcnt;
+	compat_ulong_t bytecnt;
+	compat_ulong_t wrong_if;
+};
+
+int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	struct sioc_sg_req sr;
+	struct mfc_cache *c;
+	struct net *net = sock_net(sk);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	switch (cmd) {
+	case SIOCGETSGCNT:
+		if (copy_from_user(&sr, arg, sizeof(sr)))
+			return -EFAULT;
+
+		rcu_read_lock();
+		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
+			rcu_read_unlock();
+
+			if (copy_to_user(arg, &sr, sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		rcu_read_unlock();
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+#endif
+
 
 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index babd1a2bae5..f926a310075 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
 
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support"
-	depends on NF_NAT
+	depends on NF_CONNTRACK_SNMP && NF_NAT
 	depends on NETFILTER_ADVANCED
+	default NF_NAT && NF_CONNTRACK_SNMP
 	---help---
 
 	  This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e855fffaed9..e95054c690c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(NFPROTO_ARP, info->number);
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
 	xt_compat_lock(NFPROTO_ARP);
+	xt_compat_init_offsets(NFPROTO_ARP, number);
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 652efea013d..ef7d7b9680e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(AF_INET, info->number);
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
 	xt_compat_lock(AF_INET);
+	xt_compat_init_offsets(AF_INET, number);
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a489765..403ca57f601 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	 * that the ->target() function isn't called after ->destroy() */
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL) {
-		pr_info("no conntrack!\n");
-			/* FIXME: need to drop invalid ones, since replies
-			 * to outgoing connections of other nodes will be
-			 * marked as INVALID */
+	if (ct == NULL)
 		return NF_DROP;
-	}
 
 	/* special case: ICMP error handling. conntrack distinguishes between
 	 * error messages (RELATED) and information requests (see below) */
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 72ffc8fda2e..d76d6c9ed94 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
 	}
 #endif
 
-	/* MAC logging for input path only. */
-	if (in && !out)
+	if (in != NULL)
 		dump_mac_header(m, loginfo, skb);
 
 	dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 294a2a32f29..aef5d1fbe77 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
 			   dev_net(out)->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
-	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
 
 		if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 63f60fc5d26..5585980fce2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
 
 struct ct_iter_state {
 	struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	for (st->bucket = 0;
 	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(net->ct.hash[st->bucket].first);
+		n = rcu_dereference(
+			hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
 			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
-		head = rcu_dereference(net->ct.hash[st->bucket].first);
+		head = rcu_dereference(
+			hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
 	}
 	return head;
 }
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		n = rcu_dereference(
+			hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 		if (n)
 			return n;
 	}
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_next_rcu(head));
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		head = rcu_dereference(
+			hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 	}
 	return head;
 }
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f23b3f06df..703f366fd23 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
 
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		int ret;
+		int res;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
-		if (ret == 0)
+		res = nf_ct_expect_related(exp);
+		if (res == 0)
 			break;
-		else if (ret != -EBUSY) {
+		else if (res != -EBUSY) {
 			port = 0;
 			break;
 		}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c04787ce1a7..21bcf471b25 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   manips not an issue.  */
 	if (maniptype == IP_NAT_MANIP_SRC &&
 	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
+		/* try the original tuple first */
+		if (in_range(orig_tuple, range)) {
+			if (!nf_nat_used_tuple(orig_tuple, ct)) {
+				*tuple = *orig_tuple;
+				return;
+			}
+		} else if (find_appropriate_src(net, zone, orig_tuple, tuple,
+			   range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!nf_nat_used_tuple(tuple, ct))
 				return;
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct,
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 	struct nf_conn_nat *nat;
-	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
 
 	/* nat helper or nfctnetlink also setup binding */
 	nat = nfct_nat(ct);
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 			ct->status |= IPS_DST_NAT;
 	}
 
-	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
+	if (maniptype == IP_NAT_MANIP_SRC) {
 		unsigned int srchash;
 
 		srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 	/* It's done. */
 	if (maniptype == IP_NAT_MANIP_DST)
-		set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+		ct->status |= IPS_DST_NAT_DONE;
 	else
-		set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+		ct->status |= IPS_SRC_NAT_DONE;
 
 	return NF_ACCEPT;
 }
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
 	int ret = 0;
 
 	spin_lock_bh(&nf_nat_lock);
-	if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+	if (rcu_dereference_protected(
+			nf_nat_protos[proto->protonum],
+			lockdep_is_held(&nf_nat_lock)
+			) != &nf_nat_unknown_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 	if (nat == NULL || nat->ct == NULL)
 		return;
 
-	NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
+	NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
 
 	spin_lock_bh(&nf_nat_lock);
 	hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old)
 	struct nf_conn_nat *old_nat = old;
 	struct nf_conn *ct = old_nat->ct;
 
-	if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
+	if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
 		return;
 
 	spin_lock_bh(&nf_nat_lock);
-	new_nat->ct = ct;
 	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
 	spin_unlock_bh(&nf_nat_lock);
 }
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net)
 {
 	/* Leave them the same for the moment. */
 	net->ipv4.nat_htable_size = net->ct.htable_size;
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
-						       &net->ipv4.nat_vmalloced, 0);
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
-	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     net->ipv4.nat_htable_size);
+	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ee5f419d0a5..8812a02078a 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_helper_register(&snmp_helper);
-	if (ret < 0)
-		return ret;
+	BUG_ON(nf_nat_snmp_hook != NULL);
+	rcu_assign_pointer(nf_nat_snmp_hook, help);
+
 	ret = nf_conntrack_helper_register(&snmp_trap_helper);
 	if (ret < 0) {
 		nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
 
 static void __exit nf_nat_snmp_basic_fini(void)
 {
-	nf_conntrack_helper_unregister(&snmp_helper);
+	rcu_assign_pointer(nf_nat_snmp_hook, NULL);
 	nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a3d5ab786e8..6390ba299b3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -76,6 +76,7 @@
 #include <linux/seq_file.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/compat.h>
 
 static struct raw_hashinfo raw_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
@@ -838,6 +839,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	}
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SIOCOUTQ:
+	case SIOCINQ:
+		return -ENOIOCTLCMD;
+	default:
+#ifdef CONFIG_IP_MROUTE
+		return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg));
+#else
+		return -ENOIOCTLCMD;
+#endif
+	}
+}
+#endif
+
 struct proto raw_prot = {
 	.name		   = "RAW",
 	.owner		   = THIS_MODULE,
@@ -860,6 +878,7 @@ struct proto raw_prot = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_raw_setsockopt,
 	.compat_getsockopt = compat_raw_getsockopt,
+	.compat_ioctl	   = compat_raw_ioctl,
 #endif
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 351dc4e8524..b1e5d3ac346 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -152,6 +152,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
 }
 
+static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	struct rtable *rt = (struct rtable *) dst;
+	struct inet_peer *peer;
+	u32 *p = NULL;
+
+	if (!rt->peer)
+		rt_bind_peer(rt, 1);
+
+	peer = rt->peer;
+	if (peer) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		p = peer->metrics;
+		if (inet_metrics_new(peer))
+			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		} else {
+			if (rt->fi) {
+				fib_info_put(rt->fi);
+				rt->fi = NULL;
+			}
+		}
+	}
+	return p;
+}
+
 static struct dst_ops ipv4_dst_ops = {
 	.family =		AF_INET,
 	.protocol =		cpu_to_be16(ETH_P_IP),
@@ -159,6 +194,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.check =		ipv4_dst_check,
 	.default_advmss =	ipv4_default_advmss,
 	.default_mtu =		ipv4_default_mtu,
+	.cow_metrics =		ipv4_cow_metrics,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
 	.negative_advice =	ipv4_negative_advice,
@@ -514,7 +550,7 @@ static const struct file_operations rt_cpu_seq_fops = {
 	.release = seq_release,
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static int rt_acct_proc_show(struct seq_file *m, void *v)
 {
 	struct ip_rt_acct *dst, *src;
@@ -567,14 +603,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 	if (!pde)
 		goto err2;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
 	if (!pde)
 		goto err3;
 #endif
 	return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 err3:
 	remove_proc_entry("rt_cache", net->proc_net_stat);
 #endif
@@ -588,7 +624,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 {
 	remove_proc_entry("rt_cache", net->proc_net_stat);
 	remove_proc_entry("rt_cache", net->proc_net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	remove_proc_entry("rt_acct", net->proc_net);
 #endif
 }
@@ -1441,6 +1477,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				if (rt->peer)
 					atomic_inc(&rt->peer->refcnt);
+				if (rt->fi)
+					atomic_inc(&rt->fi->fib_clntref);
 
 				if (arp_bind_neighbour(&rt->dst) ||
 				    !(rt->dst.neighbour->nud_state &
@@ -1720,6 +1758,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct rtable *rt = (struct rtable *) dst;
 	struct inet_peer *peer = rt->peer;
 
+	if (rt->fi) {
+		fib_info_put(rt->fi);
+		rt->fi = NULL;
+	}
 	if (peer) {
 		rt->peer = NULL;
 		inet_putpeer(peer);
@@ -1775,7 +1817,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 	memcpy(addr, &src, 4);
 }
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
 	if (!(rt->dst.tclassid & 0xFFFF))
@@ -1815,6 +1857,30 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
 	return mtu;
 }
 
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
+{
+	if (!(rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS)) {
+	no_cow:
+		if (fi->fib_metrics != (u32 *) dst_default_metrics) {
+			rt->fi = fi;
+			atomic_inc(&fi->fib_clntref);
+		}
+		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+	} else {
+		struct inet_peer *peer;
+
+		if (!rt->peer)
+			rt_bind_peer(rt, 1);
+		peer = rt->peer;
+		if (!peer)
+			goto no_cow;
+		if (inet_metrics_new(peer))
+			memcpy(peer->metrics, fi->fib_metrics,
+			       sizeof(u32) * RTAX_MAX);
+		dst_init_metrics(&rt->dst, peer->metrics, false);
+	}
+}
+
 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 {
 	struct dst_entry *dst = &rt->dst;
@@ -1824,8 +1890,8 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		dst_import_metrics(dst, fi->fib_metrics);
-#ifdef CONFIG_NET_CLS_ROUTE
+		rt_init_metrics(rt, fi);
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	}
@@ -1835,7 +1901,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 	if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
 		dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	set_class_tag(rt, fib_rules_tclass(res));
 #endif
@@ -1891,7 +1957,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.mark    = skb->mark;
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
@@ -2208,7 +2274,7 @@ local_input:
 	rth->fl.mark    = skb->mark;
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
@@ -2752,6 +2818,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		rt->peer = ort->peer;
 		if (rt->peer)
 			atomic_inc(&rt->peer->refcnt);
+		rt->fi = ort->fi;
+		if (rt->fi)
+			atomic_inc(&rt->fi->fib_clntref);
 
 		dst_free(new);
 	}
@@ -2828,7 +2897,7 @@ static int rt_fill_info(struct net *net,
 	}
 	if (rt->dst.dev)
 		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (rt->dst.tclassid)
 		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
@@ -3249,9 +3318,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
 };
 
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
-#endif /* CONFIG_NET_CLS_ROUTE */
+#endif /* CONFIG_IP_ROUTE_CLASSID */
 
 static __initdata unsigned long rhash_entries;
 static int __init set_rhash_entries(char *str)
@@ -3267,7 +3336,7 @@ int __init ip_rt_init(void)
 {
 	int rc = 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)
 		panic("IP: failed to allocate ip_rt_acct\n");
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6c11eece262..f9867d2dbef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct tcphdr *th;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2549b29b062..eb7f82ebf4a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4399,7 +4399,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
 				tp->ucopy.len -= chunk;
 				tp->copied_seq += chunk;
-				eaten = (chunk == skb->len && !th->fin);
+				eaten = (chunk == skb->len);
 				tcp_rcv_space_adjust(sk);
 			}
 			local_bh_disable();
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 856f68466d4..02f583b3744 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1994,7 +1994,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 				}
 				req = req->dl_next;
 			}
-			st->offset = 0;
 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
 				break;
 get_req:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8157b17959e..d37baaa1dbe 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2199,7 +2199,7 @@ int udp4_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b057d40adde..19fbdec6baa 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -196,8 +196,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
+	dst_destroy_metrics_generic(dst);
+
 	if (likely(xdst->u.rt.peer))
 		inet_putpeer(xdst->u.rt.peer);
+
 	xfrm_dst_destroy(xdst);
 }
 
@@ -215,6 +218,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
+	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 24a1cf110d8..fd6782e3a03 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2661,14 +2661,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa;
-	LIST_HEAD(keep_list);
-	int state;
+	int state, i;
 
 	ASSERT_RTNL();
 
-	/* Flush routes if device is being removed or it is not loopback */
-	if (how || !(dev->flags & IFF_LOOPBACK))
-		rt6_ifdown(net, dev);
+	rt6_ifdown(net, dev);
+	neigh_ifdown(&nd_tbl, dev);
 
 	idev = __in6_dev_get(dev);
 	if (idev == NULL)
@@ -2689,6 +2687,23 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	}
 
+	/* Step 2: clear hash table */
+	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+		struct hlist_head *h = &inet6_addr_lst[i];
+		struct hlist_node *n;
+
+		spin_lock_bh(&addrconf_hash_lock);
+	restart:
+		hlist_for_each_entry_rcu(ifa, n, h, addr_lst) {
+			if (ifa->idev == idev) {
+				hlist_del_init_rcu(&ifa->addr_lst);
+				addrconf_del_timer(ifa);
+				goto restart;
+			}
+		}
+		spin_unlock_bh(&addrconf_hash_lock);
+	}
+
 	write_lock_bh(&idev->lock);
 
 	/* Step 2: clear flags for stateless addrconf */
@@ -2722,52 +2737,23 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 				       struct inet6_ifaddr, if_list);
 		addrconf_del_timer(ifa);
 
-		/* If just doing link down, and address is permanent
-		   and not link-local, then retain it. */
-		if (!how &&
-		    (ifa->flags&IFA_F_PERMANENT) &&
-		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-			list_move_tail(&ifa->if_list, &keep_list);
-
-			/* If not doing DAD on this address, just keep it. */
-			if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
-			    idev->cnf.accept_dad <= 0 ||
-			    (ifa->flags & IFA_F_NODAD))
-				continue;
+		list_del(&ifa->if_list);
 
-			/* If it was tentative already, no need to notify */
-			if (ifa->flags & IFA_F_TENTATIVE)
-				continue;
+		write_unlock_bh(&idev->lock);
 
-			/* Flag it for later restoration when link comes up */
-			ifa->flags |= IFA_F_TENTATIVE;
-			ifa->state = INET6_IFADDR_STATE_DAD;
-		} else {
-			list_del(&ifa->if_list);
-
-			/* clear hash table */
-			spin_lock_bh(&addrconf_hash_lock);
-			hlist_del_init_rcu(&ifa->addr_lst);
-			spin_unlock_bh(&addrconf_hash_lock);
-
-			write_unlock_bh(&idev->lock);
-			spin_lock_bh(&ifa->state_lock);
-			state = ifa->state;
-			ifa->state = INET6_IFADDR_STATE_DEAD;
-			spin_unlock_bh(&ifa->state_lock);
-
-			if (state != INET6_IFADDR_STATE_DEAD) {
-				__ipv6_ifa_notify(RTM_DELADDR, ifa);
-				atomic_notifier_call_chain(&inet6addr_chain,
-							   NETDEV_DOWN, ifa);
-			}
+		spin_lock_bh(&ifa->state_lock);
+		state = ifa->state;
+		ifa->state = INET6_IFADDR_STATE_DEAD;
+		spin_unlock_bh(&ifa->state_lock);
 
-			in6_ifa_put(ifa);
-			write_lock_bh(&idev->lock);
+		if (state != INET6_IFADDR_STATE_DEAD) {
+			__ipv6_ifa_notify(RTM_DELADDR, ifa);
+			atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
 		}
-	}
+		in6_ifa_put(ifa);
 
-	list_splice(&keep_list, &idev->addr_list);
+		write_lock_bh(&idev->lock);
+	}
 
 	write_unlock_bh(&idev->lock);
 
@@ -4156,8 +4142,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->dst);
 
-		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
-		    ip6_del_rt(ifp->rt))
+		if (ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->dst);
 		break;
 	}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 978e80e2c4a..3194aa90987 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -772,7 +772,7 @@ out:
 	return err;
 }
 
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d227c644f7..47b7b8df7fa 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(AF_INET6, info->number);
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
 	xt_compat_lock(AF_INET6);
+	xt_compat_init_offsets(AF_INET6, number);
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c88891a75..05027b75372 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
 	       in ? in->name : "",
 	       out ? out->name : "");
 
-	/* MAC logging for input path only. */
-	if (in && !out)
+	if (in != NULL)
 		dump_mac_header(m, loginfo, skb);
 
 	dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa8fa8..08572726381 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,6 +45,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 
 struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
 		.data		= &nf_init_frags.timeout,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 86c39526ba5..2bc6cd7bb8e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -123,18 +123,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
 }
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
 
-int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
-					   struct sk_buff *skb))
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
 {
 	rcu_assign_pointer(mh_filter, filter);
 	return 0;
 }
 EXPORT_SYMBOL(rawv6_mh_filter_register);
 
-int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
-					     struct sk_buff *skb))
+int rawv6_mh_filter_unregister(mh_filter_t filter)
 {
 	rcu_assign_pointer(mh_filter, NULL);
 	synchronize_rcu();
@@ -192,10 +192,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 			 * policy is placed in rawv6_rcv() because it is
 			 * required for each socket.
 			 */
-			int (*filter)(struct sock *sock, struct sk_buff *skb);
+			mh_filter_t *filter;
 
 			filter = rcu_dereference(mh_filter);
-			filtered = filter ? filter(sk, skb) : 0;
+			filtered = filter ? (*filter)(sk, skb) : 0;
 			break;
 		}
 #endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 373bd0416f6..72609f1c615 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -72,8 +72,6 @@
 #define RT6_TRACE(x...) do { ; } while (0)
 #endif
 
-#define CLONE_OFFLINK_ROUTE 0
-
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
@@ -99,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 					   struct in6_addr *gwaddr, int ifindex);
 #endif
 
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+	struct inet_peer *peer;
+	u32 *p = NULL;
+
+	if (!rt->rt6i_peer)
+		rt6_bind_peer(rt, 1);
+
+	peer = rt->rt6i_peer;
+	if (peer) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		p = peer->metrics;
+		if (inet_metrics_new(peer))
+			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		}
+	}
+	return p;
+}
+
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
@@ -107,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
 	.default_mtu		=	ip6_default_mtu,
+	.cow_metrics		=	ipv6_cow_metrics,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -127,6 +156,10 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 };
 
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+	[RTAX_HOPLIMIT - 1] = 255,
+};
+
 static struct rt6_info ip6_null_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
@@ -196,7 +229,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		in6_dev_put(idev);
 	}
 	if (peer) {
-		BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
 		rt->rt6i_peer = NULL;
 		inet_putpeer(peer);
 	}
@@ -206,9 +238,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
 {
 	struct inet_peer *peer;
 
-	if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
-		return;
-
 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
@@ -738,13 +767,8 @@ restart:
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
-	else {
-#if CLONE_OFFLINK_ROUTE
+	else
 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
-		goto out2;
-#endif
-	}
 
 	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -2688,7 +2712,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-	dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
+	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+			 ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2699,7 +2724,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-	dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
+	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+			 ip6_template_metrics, true);
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2709,7 +2735,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-	dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
+	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+			 ip6_template_metrics, true);
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a54..b1599a345c1 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
 
 	p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
 	do {
-		n = p->next;
+		n = rcu_dereference_protected(p->next, 1);
 		kfree(p);
 		p = n;
 	} while (p);
@@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
 static int
 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 {
-	struct ip_tunnel_prl_entry *x, **p;
+	struct ip_tunnel_prl_entry *x;
+	struct ip_tunnel_prl_entry __rcu **p;
 	int err = 0;
 
 	ASSERT_RTNL();
 
 	if (a && a->addr != htonl(INADDR_ANY)) {
-		for (p = &t->prl; *p; p = &(*p)->next) {
-			if ((*p)->addr == a->addr) {
-				x = *p;
+		for (p = &t->prl;
+		     (x = rtnl_dereference(*p)) != NULL;
+		     p = &x->next) {
+			if (x->addr == a->addr) {
 				*p = x->next;
 				call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
 				t->prl_count--;
@@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		}
 		err = -ENXIO;
 	} else {
-		if (t->prl) {
+		x = rtnl_dereference(t->prl);
+		if (x) {
 			t->prl_count = 0;
-			x = t->prl;
 			call_rcu(&x->rcu_head, prl_list_destroy_rcu);
 			t->prl = NULL;
 		}
@@ -1179,7 +1181,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 	dev_hold(dev);
-	sitn->tunnels_wc[0]	= tunnel;
+	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
 	return 0;
 }
 
@@ -1196,11 +1198,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
-			struct ip_tunnel *t = sitn->tunnels[prio][h];
+			struct ip_tunnel *t;
 
+			t = rtnl_dereference(sitn->tunnels[prio][h]);
 			while (t != NULL) {
 				unregister_netdevice_queue(t->dev, head);
-				t = t->next;
+				t = rtnl_dereference(t->next);
 			}
 		}
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9a009c66c8a..a419a787eb6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1299,7 +1299,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7e74023ea6e..834dc02f1d4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -98,6 +98,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	if (!xdst->u.rt6.rt6i_idev)
 		return -ENODEV;
 
+	xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
+	if (rt->rt6i_peer)
+		atomic_inc(&rt->rt6i_peer->refcnt);
+
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
@@ -216,6 +220,9 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
+	dst_destroy_metrics_generic(dst);
+	if (likely(xdst->u.rt6.rt6i_peer))
+		inet_putpeer(xdst->u.rt6.rt6i_peer);
 	xfrm_dst_destroy(xdst);
 }
 
@@ -251,6 +258,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.protocol =		cpu_to_be16(ETH_P_IPV6),
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
+	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 227ca82eef7..0c9d0c07eae 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -76,7 +76,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
-			     &sta->sta, tid, NULL))
+			     &sta->sta, tid, NULL, 0))
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
 
@@ -232,6 +232,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	if (buf_size == 0)
 		buf_size = IEEE80211_MAX_AMPDU_BUF;
 
+	/* make sure the size doesn't exceed the maximum supported by the hw */
+	if (buf_size > local->hw.max_rx_aggregation_subframes)
+		buf_size = local->hw.max_rx_aggregation_subframes;
 
 	/* examine state machine */
 	mutex_lock(&sta->ampdu_mlme.mtx);
@@ -287,7 +290,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
-			       &sta->sta, tid, &start_seq_num);
+			       &sta->sta, tid, &start_seq_num, 0);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9cc472c6a6a..63d852cb4ca 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -190,7 +190,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 
 	ret = drv_ampdu_action(local, sta->sdata,
 			       IEEE80211_AMPDU_TX_STOP,
-			       &sta->sta, tid, NULL);
+			       &sta->sta, tid, NULL, 0);
 
 	/* HW shall not deny going back to legacy */
 	if (WARN_ON(ret)) {
@@ -311,7 +311,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
-			       &sta->sta, tid, &start_seq_num);
+			       &sta->sta, tid, &start_seq_num, 0);
 	if (ret) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "BA request denied - HW unavailable for"
@@ -342,7 +342,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, start_seq_num,
-				     0x40, tid_tx->timeout);
+				     local->hw.max_tx_aggregation_subframes,
+				     tid_tx->timeout);
 }
 
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -487,7 +488,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 
 	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
-			 &sta->sta, tid, NULL);
+			 &sta->sta, tid, NULL,
+			 sta->ampdu_mlme.tid_tx[tid]->buf_size);
 
 	/*
 	 * synchronize with TX path, while splicing the TX path
@@ -742,9 +744,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 {
 	struct tid_ampdu_tx *tid_tx;
 	u16 capab, tid;
+	u8 buf_size;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
+	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
@@ -767,12 +771,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 
 	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
 			== WLAN_STATUS_SUCCESS) {
+		/*
+		 * IEEE 802.11-2007 7.3.1.14:
+		 * In an ADDBA Response frame, when the Status Code field
+		 * is set to 0, the Buffer Size subfield is set to a value
+		 * of at least 1.
+		 */
+		if (!buf_size)
+			goto out;
+
 		if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
 				     &tid_tx->state)) {
 			/* ignore duplicate response */
 			goto out;
 		}
 
+		tid_tx->buf_size = buf_size;
+
 		if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
 			ieee80211_agg_tx_operational(local, sta, tid);
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 98d589960a4..78af32d4bc5 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -382,17 +382,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 				   struct ieee80211_sub_if_data *sdata,
 				   enum ieee80211_ampdu_mlme_action action,
 				   struct ieee80211_sta *sta, u16 tid,
-				   u16 *ssn)
+				   u16 *ssn, u8 buf_size)
 {
 	int ret = -EOPNOTSUPP;
 
 	might_sleep();
 
-	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn);
+	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
 
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
-					       sta, tid, ssn);
+					       sta, tid, ssn, buf_size);
 
 	trace_drv_return_int(local, ret);
 
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 49c84218b2f..e5cce19a7d6 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -9,6 +9,11 @@
 #undef TRACE_EVENT
 #define TRACE_EVENT(name, proto, ...) \
 static inline void trace_ ## name(proto) {}
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(...)
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(evt_class, name, proto, ...) \
+static inline void trace_ ## name(proto) {}
 #endif
 
 #undef TRACE_SYSTEM
@@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {}
  * Tracing for driver callbacks.
  */
 
-TRACE_EVENT(drv_return_void,
+DECLARE_EVENT_CLASS(local_only_evt,
 	TP_PROTO(struct ieee80211_local *local),
 	TP_ARGS(local),
 	TP_STRUCT__entry(
@@ -50,6 +55,11 @@ TRACE_EVENT(drv_return_void,
 	TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
 );
 
+DEFINE_EVENT(local_only_evt, drv_return_void,
+	TP_PROTO(struct ieee80211_local *local),
+	TP_ARGS(local)
+);
+
 TRACE_EVENT(drv_return_int,
 	TP_PROTO(struct ieee80211_local *local, int ret),
 	TP_ARGS(local, ret),
@@ -78,40 +88,14 @@ TRACE_EVENT(drv_return_u64,
 	TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
 );
 
-TRACE_EVENT(drv_start,
+DEFINE_EVENT(local_only_evt, drv_start,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(drv_stop,
+DEFINE_EVENT(local_only_evt, drv_stop,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_add_interface,
@@ -439,40 +423,14 @@ TRACE_EVENT(drv_hw_scan,
 	)
 );
 
-TRACE_EVENT(drv_sw_scan_start,
+DEFINE_EVENT(local_only_evt, drv_sw_scan_start,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(drv_sw_scan_complete,
+DEFINE_EVENT(local_only_evt, drv_sw_scan_complete,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_get_stats,
@@ -702,23 +660,9 @@ TRACE_EVENT(drv_conf_tx,
 	)
 );
 
-TRACE_EVENT(drv_get_tsf,
+DEFINE_EVENT(local_only_evt, drv_get_tsf,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT,
-		LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_set_tsf,
@@ -742,41 +686,14 @@ TRACE_EVENT(drv_set_tsf,
 	)
 );
 
-TRACE_EVENT(drv_reset_tsf,
+DEFINE_EVENT(local_only_evt, drv_reset_tsf,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(drv_tx_last_beacon,
+DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT,
-		LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_ampdu_action,
@@ -784,9 +701,9 @@ TRACE_EVENT(drv_ampdu_action,
 		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_ampdu_mlme_action action,
 		 struct ieee80211_sta *sta, u16 tid,
-		 u16 *ssn),
+		 u16 *ssn, u8 buf_size),
 
-	TP_ARGS(local, sdata, action, sta, tid, ssn),
+	TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -794,6 +711,7 @@ TRACE_EVENT(drv_ampdu_action,
 		__field(u32, action)
 		__field(u16, tid)
 		__field(u16, ssn)
+		__field(u8, buf_size)
 		VIF_ENTRY
 	),
 
@@ -804,11 +722,13 @@ TRACE_EVENT(drv_ampdu_action,
 		__entry->action = action;
 		__entry->tid = tid;
 		__entry->ssn = ssn ? *ssn : 0;
+		__entry->buf_size = buf_size;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
+		__entry->tid, __entry->buf_size
 	)
 );
 
@@ -959,22 +879,9 @@ TRACE_EVENT(drv_remain_on_channel,
 	)
 );
 
-TRACE_EVENT(drv_cancel_remain_on_channel,
+DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 /*
@@ -1069,23 +976,9 @@ TRACE_EVENT(api_stop_tx_ba_cb,
 	)
 );
 
-TRACE_EVENT(api_restart_hw,
+DEFINE_EVENT(local_only_evt, api_restart_hw,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT,
-		LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(api_beacon_loss,
@@ -1214,40 +1107,14 @@ TRACE_EVENT(api_chswitch_done,
 	)
 );
 
-TRACE_EVENT(api_ready_on_channel,
+DEFINE_EVENT(local_only_evt, api_ready_on_channel,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(api_remain_on_channel_expired,
+DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 /*
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 53c7077ffd4..775fb63471c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -270,7 +270,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	enum ieee80211_band band = rx_status->band;
 
 	if (elems->ds_params && elems->ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0],
+						      band);
 	else
 		freq = rx_status->freq;
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a46ff06d7cb..09a27449f3f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -554,6 +554,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
 	local->hw.max_report_rates = 0;
+	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
 	local->user_power_level = -1;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ca3af4685b0..2a57cc02c61 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			       &elems);
 
 	if (elems.ds_params && elems.ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
 	else
 		freq = rx_status->freq;
 
@@ -645,7 +645,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
 	if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags))
 		mesh_mpath_table_grow();
 
-	if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags))
+	if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags))
 		mesh_mpp_table_grow();
 
 	if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 45fbb9e3374..32210695b8b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -176,7 +176,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 
 	/* check that channel matches the right operating channel */
 	if (local->hw.conf.channel->center_freq !=
-	    ieee80211_channel_to_frequency(hti->control_chan))
+	    ieee80211_channel_to_frequency(hti->control_chan, sband->band))
 		enable_ht = false;
 
 	if (enable_ht) {
@@ -429,7 +429,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		container_of((void *)bss, struct cfg80211_bss, priv);
 	struct ieee80211_channel *new_ch;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num);
+	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num,
+						      cbss->channel->band);
 
 	ASSERT_MGD_MTX(ifmgd);
 
@@ -1519,7 +1520,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (elems->ds_params && elems->ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0],
+						      rx_status->band);
 	else
 		freq = rx_status->freq;
 
@@ -1972,9 +1974,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 				wiphy_debug(local->hw.wiphy,
 					    "%s: No ack for nullfunc frame to"
-					    " AP %pM, try %d\n",
+					    " AP %pM, try %d/%i\n",
 					    sdata->name, bssid,
-					    ifmgd->probe_send_count);
+					    ifmgd->probe_send_count, max_tries);
 #endif
 				ieee80211_mgd_probe_ap_send(sdata);
 			} else {
@@ -2001,10 +2003,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			wiphy_debug(local->hw.wiphy,
 				    "%s: No probe response from AP %pM"
-				    " after %dms, try %d\n",
+				    " after %dms, try %d/%i\n",
 				    sdata->name,
 				    bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ,
-				    ifmgd->probe_send_count);
+				    ifmgd->probe_send_count, max_tries);
 #endif
 			ieee80211_mgd_probe_ap_send(sdata);
 		} else {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a6701ed87f0..f36d70f5b06 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1556,17 +1556,36 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	bool check_port_control = false;
+	struct ethhdr *ehdr;
+	int ret;
 
 	if (ieee80211_has_a4(hdr->frame_control) &&
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
 		return -1;
 
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) {
+
+		if (!sdata->u.mgd.use_4addr)
+			return -1;
+		else
+			check_port_control = true;
+	}
+
 	if (is_multicast_ether_addr(hdr->addr1) &&
-	    ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) ||
-	     (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr)))
+	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta)
 		return -1;
 
-	return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
+	ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
+	if (ret < 0 || !check_port_control)
+		return ret;
+
+	ehdr = (struct ethhdr *) rx->skb->data;
+	if (ehdr->h_proto != rx->sdata->control_port_protocol)
+		return -1;
+
+	return 0;
 }
 
 /*
@@ -2692,7 +2711,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
 		if (!skb) {
 			if (net_ratelimit())
 				wiphy_debug(local->hw.wiphy,
-					"failed to copy multicast frame for %s\n",
+					"failed to copy skb for %s\n",
 					sdata->name);
 			return true;
 		}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fb274db77e3..1ef73be76b2 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -196,7 +196,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
 
 	if (elems.ds_params && elems.ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems.ds_params[0],
+						      rx_status->band);
 	else
 		freq = rx_status->freq;
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index bbdd2a86a94..ca0b69060ef 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -82,6 +82,7 @@ enum ieee80211_sta_info_flags {
  * @state: session state (see above)
  * @stop_initiator: initiator of a session stop
  * @tx_stop: TX DelBA frame when stopping
+ * @buf_size: reorder buffer size at receiver
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -101,6 +102,7 @@ struct tid_ampdu_tx {
 	u8 dialog_token;
 	u8 stop_initiator;
 	bool tx_stop;
+	u8 buf_size;
 };
 
 /**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5950e3abead..ffc67491c38 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1750,7 +1750,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	__le16 fc;
 	struct ieee80211_hdr hdr;
 	struct ieee80211s_hdr mesh_hdr __maybe_unused;
-	struct mesh_path *mppath = NULL;
+	struct mesh_path __maybe_unused *mppath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
 	int nh_pos, h_pos;
@@ -1815,19 +1815,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			mppath = mpp_path_lookup(skb->data, sdata);
 
 		/*
-		 * Do not use address extension, if it is a packet from
-		 * the same interface and the destination is not being
-		 * proxied by any other mest point.
+		 * Use address extension if it is a packet from
+		 * another interface or if we know the destination
+		 * is being proxied by a portal (i.e. portal address
+		 * differs from proxied address)
 		 */
 		if (compare_ether_addr(sdata->vif.addr,
 				       skb->data + ETH_ALEN) == 0 &&
-		    (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) {
+		    !(mppath && compare_ether_addr(mppath->mpp, skb->data))) {
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
 					sdata, NULL, NULL);
 		} else {
-			/* packet from other interface */
 			int is_mesh_mcast = 1;
 			const u8 *mesh_da;
 
@@ -2230,6 +2230,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 	sdata = vif_to_sdata(vif);
 
+	if (!ieee80211_sdata_running(sdata))
+		goto out;
+
 	if (tim_offset)
 		*tim_offset = 0;
 	if (tim_length)
@@ -2299,6 +2302,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		struct ieee80211_mgmt *mgmt;
 		u8 *pos;
 
+#ifdef CONFIG_MAC80211_MESH
+		if (!sdata->u.mesh.mesh_id_len)
+			goto out;
+#endif
+
 		/* headroom, head length, tail length and maximum TIM length */
 		skb = dev_alloc_skb(local->tx_headroom + 400 +
 				sdata->u.mesh.vendor_ie_len);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1534f2b44ca..faf7412ea45 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
 
 	  If unsure, say `N'.
 
+config NF_CONNTRACK_TIMESTAMP
+	bool  'Connection tracking timestamping'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables support for connection tracking timestamping.
+	  This allows you to store the flow start-time and to obtain
+	  the flow-stop time (once it has been destroyed) via Connection
+	  tracking events.
+
+	  If unsure, say `N'.
+
 config NF_CT_PROTO_DCCP
 	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_BROADCAST
+	tristate
+
 config NF_CONNTRACK_NETBIOS_NS
 	tristate "NetBIOS name service protocol support"
 	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_BROADCAST
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
 	  unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_SNMP
+	tristate "SNMP service protocol support"
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_BROADCAST
+	help
+	  SNMP service requests are sent as broadcast messages from an
+	  unprivileged port and responded to with unicast messages to the
+	  same port. This make them hard to firewall properly because connection
+	  tracking doesn't deal with broadcasts. This helper tracks locally
+	  originating SNMP service requests and the corresponding
+	  responses. It relies on correct IP address configuration, specifically
+	  netmask and broadcast address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NF_CONNTRACK_PPTP
 	tristate "PPtP protocol support"
 	depends on NETFILTER_ADVANCED
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_AUDIT
+	tristate "AUDIT target support"
+	depends on AUDIT
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option adds a 'AUDIT' target, which can be used to create
+	  audit records for packets dropped/accepted.
+
+	  To compileit as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_TARGET_CHECKSUM
 	tristate "CHECKSUM target support"
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG
 config NETFILTER_XT_TARGET_NFQUEUE
 	tristate '"NFQUEUE" target Support'
 	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_QUEUE
 	help
 	  This target replaced the old obsolete QUEUE target.
 
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST
 config NETFILTER_XT_MATCH_REALM
 	tristate  '"realm" match support'
 	depends on NETFILTER_ADVANCED
-	select NET_CLS_ROUTE
+	select IP_ROUTE_CLASSID
 	help
 	  This option adds a `realm' match, which allows you to use the realm
 	  key from the routing subsystem inside iptables.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 441050f3111..9ae6878a85b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
 nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
 obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
 obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
 obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
 obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
 obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 32fcbe290c0..1e00bf7d27c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -175,13 +175,21 @@ next_hook:
 		ret = 1;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
 		kfree_skb(skb);
-		ret = -(verdict >> NF_VERDICT_BITS);
+		ret = NF_DROP_GETERR(verdict);
 		if (ret == 0)
 			ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-			      verdict >> NF_VERDICT_BITS))
-			goto next_hook;
+		ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+			       verdict >> NF_VERDICT_QBITS);
+		if (ret < 0) {
+			if (ret == -ECANCELED)
+				goto next_hook;
+			if (ret == -ESRCH &&
+			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+				goto next_hook;
+			kfree_skb(skb);
+		}
+		ret = 0;
 	}
 	rcu_read_unlock();
 	return ret;
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
 EXPORT_SYMBOL(ip_ct_attach);
 
 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nf_ct_attach);
 
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 EXPORT_SYMBOL(nf_ct_destroy);
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index a475edee091..5c48ffb60c2 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
 /*
  *	Get an ip_vs_app object
  */
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
  *	Allocate/initialize app incarnation and register it in proto apps.
  */
 static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+		  __u16 port)
 {
 	struct ip_vs_protocol *pp;
 	struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
 		}
 	}
 
-	ret = pp->register_app(inc);
+	ret = pp->register_app(net, inc);
 	if (ret)
 		goto out;
 
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
  *	Release app incarnation
  */
 static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_protocol *pp;
 
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
 		return;
 
 	if (pp->unregister_app)
-		pp->unregister_app(inc);
+		pp->unregister_app(net, inc);
 
 	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 		  pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
  *	Register an application incarnation in protocol applications
  */
 int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+		       __u16 port)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	int result;
 
-	mutex_lock(&__ip_vs_app_mutex);
+	mutex_lock(&ipvs->app_mutex);
 
-	result = ip_vs_app_inc_new(app, proto, port);
+	result = ip_vs_app_inc_new(net, app, proto, port);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+	mutex_unlock(&ipvs->app_mutex);
 
 	return result;
 }
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
 /*
  *	ip_vs_app registration routine
  */
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	mutex_lock(&__ip_vs_app_mutex);
+	mutex_lock(&ipvs->app_mutex);
 
-	list_add(&app->a_list, &ip_vs_app_list);
+	list_add(&app->a_list, &ipvs->app_list);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+	mutex_unlock(&ipvs->app_mutex);
 
 	return 0;
 }
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
  *	ip_vs_app unregistration routine
  *	We are sure there are no app incarnations attached to services
  */
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *inc, *nxt;
 
-	mutex_lock(&__ip_vs_app_mutex);
+	mutex_lock(&ipvs->app_mutex);
 
 	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-		ip_vs_app_inc_release(inc);
+		ip_vs_app_inc_release(net, inc);
 	}
 
 	list_del(&app->a_list);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+	mutex_unlock(&ipvs->app_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 /*
  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+		   struct ip_vs_protocol *pp)
 {
 	return pp->app_conn_bind(cp);
 }
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
  *	/proc/net/ip_vs_app entry function
  */
 
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 {
 	struct ip_vs_app *app, *inc;
 
-	list_for_each_entry(app, &ip_vs_app_list, a_list) {
+	list_for_each_entry(app, &ipvs->app_list, a_list) {
 		list_for_each_entry(inc, &app->incs_list, a_list) {
 			if (pos-- == 0)
 				return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
 
 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	mutex_lock(&__ip_vs_app_mutex);
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+	mutex_lock(&ipvs->app_mutex);
+
+	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
 
 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ip_vs_app *inc, *app;
 	struct list_head *e;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
-		return ip_vs_app_idx(0);
+		return ip_vs_app_idx(ipvs, 0);
 
 	inc = v;
 	app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return list_entry(e, struct ip_vs_app, a_list);
 
 	/* go on to next application */
-	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
 		app = list_entry(e, struct ip_vs_app, a_list);
 		list_for_each_entry(inc, &app->incs_list, a_list) {
 			return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-	mutex_unlock(&__ip_vs_app_mutex);
+	struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+	mutex_unlock(&ipvs->app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
 
 static int ip_vs_app_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_app_seq_ops);
+	return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-int __init ip_vs_app_init(void)
+static int __net_init __ip_vs_app_init(struct net *net)
 {
-	/* we will replace it with proc_net_ipvs_create() soon */
-	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	INIT_LIST_HEAD(&ipvs->app_list);
+	__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
+	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
 	return 0;
 }
 
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+	proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+	.init = __ip_vs_app_init,
+	.exit = __ip_vs_app_cleanup,
+};
+
+int __init ip_vs_app_init(void)
+{
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_app_ops);
+	return rv;
+}
+
 
 void ip_vs_app_cleanup(void)
 {
-	proc_net_remove(&init_net, "ip_vs_app");
+	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecdc8ca..83233fe24a0 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
 /*
  * Connection hash size. Default is what was selected at compile time.
 */
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
 
 /* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
 
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
 
 /*  SLAB cache for IPVS connections */
 static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
 
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
 /*  counter for no client port connections */
 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
 
 /* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
 
 /*
  *  Fine locking granularity for big connection hash table
  */
-#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_BITS  5
 #define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
 #define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
 
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *	Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
 				       const union nf_inet_addr *addr,
 				       __be16 port)
 {
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
-				    (__force u32)port, proto, ip_vs_conn_rnd)
-			& ip_vs_conn_tab_mask;
+		return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+				    (__force u32)port, proto, ip_vs_conn_rnd) ^
+			((size_t)net>>8)) & ip_vs_conn_tab_mask;
 #endif
-	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
-			    ip_vs_conn_rnd)
-		& ip_vs_conn_tab_mask;
+	return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+			    ip_vs_conn_rnd) ^
+		((size_t)net>>8)) & ip_vs_conn_tab_mask;
 }
 
 static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
 		port = p->vport;
 	}
 
-	return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+	return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
 }
 
 static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 {
 	struct ip_vs_conn_param p;
 
-	ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
-			      NULL, 0, &p);
+	ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+			      &cp->caddr, cp->cport, NULL, 0, &p);
 
-	if (cp->dest && cp->dest->svc->pe) {
-		p.pe = cp->dest->svc->pe;
+	if (cp->pe) {
+		p.pe = cp->pe;
 		p.pe_data = cp->pe_data;
 		p.pe_data_len = cp->pe_data_len;
 	}
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 }
 
 /*
- *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
  *	returns bool success.
  */
 static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == p->af &&
+		    p->cport == cp->cport && p->vport == cp->vport &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
-		    p->cport == cp->cport && p->vport == cp->vport &&
 		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-		    p->protocol == cp->protocol) {
+		    p->protocol == cp->protocol &&
+		    ip_vs_conn_net_eq(cp, p->net)) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ct_read_unlock(hash);
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
 			    struct ip_vs_conn_param *p)
 {
 	__be16 _ports[2], *pptr;
+	struct net *net = skb_net(skb);
 
 	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return 1;
 
 	if (likely(!inverse))
-		ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
-				      &iph->daddr, pptr[1], p);
+		ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+				      pptr[0], &iph->daddr, pptr[1], p);
 	else
-		ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
-				      &iph->saddr, pptr[0], p);
+		ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+				      pptr[1], &iph->saddr, pptr[0], p);
 	return 0;
 }
 
 struct ip_vs_conn *
 ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-			struct ip_vs_protocol *pp,
 			const struct ip_vs_iphdr *iph,
 			unsigned int proto_off, int inverse)
 {
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (!ip_vs_conn_net_eq(cp, p->net))
+			continue;
 		if (p->pe_data && p->pe->ct_match) {
-			if (p->pe->ct_match(p, cp))
+			if (p->pe == cp->pe && p->pe->ct_match(p, cp))
 				goto out;
 			continue;
 		}
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == p->af &&
+		    p->vport == cp->cport && p->cport == cp->dport &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
-		    p->vport == cp->cport && p->cport == cp->dport &&
-		    p->protocol == cp->protocol) {
+		    p->protocol == cp->protocol &&
+		    ip_vs_conn_net_eq(cp, p->net)) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ret = cp;
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 struct ip_vs_conn *
 ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-			 struct ip_vs_protocol *pp,
 			 const struct ip_vs_iphdr *iph,
 			 unsigned int proto_off, int inverse)
 {
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 	struct ip_vs_dest *dest;
 
 	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
-				       &cp->vaddr, cp->vport,
-				       cp->protocol);
+		dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+				       cp->dport, &cp->vaddr, cp->vport,
+				       cp->protocol, cp->fwmark);
 		ip_vs_bind_dest(cp, dest);
 		return dest;
 	} else
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 int ip_vs_check_template(struct ip_vs_conn *ct)
 {
 	struct ip_vs_dest *dest = ct->dest;
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
 
 	/*
 	 * Checking the dest server status.
 	 */
 	if ((dest == NULL) ||
 	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-	    (sysctl_ip_vs_expire_quiescent_template &&
+	    (ipvs->sysctl_expire_quiescent_template &&
 	     (atomic_read(&dest->weight) == 0))) {
 		IP_VS_DBG_BUF(9, "check_template: dest not available for "
 			      "protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 static void ip_vs_conn_expire(unsigned long data)
 {
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 
 	cp->timeout = 60*HZ;
 
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data)
 		if (cp->flags & IP_VS_CONN_F_NFCT)
 			ip_vs_conn_drop_conntrack(cp);
 
+		ip_vs_pe_put(cp->pe);
 		kfree(cp->pe_data);
 		if (unlikely(cp->app != NULL))
 			ip_vs_unbind_app(cp);
 		ip_vs_unbind_dest(cp);
 		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
 			atomic_dec(&ip_vs_conn_no_cport_cnt);
-		atomic_dec(&ip_vs_conn_count);
+		atomic_dec(&ipvs->conn_count);
 
 		kmem_cache_free(ip_vs_conn_cachep, cp);
 		return;
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 struct ip_vs_conn *
 ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest)
+	       struct ip_vs_dest *dest, __u32 fwmark)
 {
 	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+	struct netns_ipvs *ipvs = net_ipvs(p->net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+							   p->protocol);
 
 	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 
 	INIT_LIST_HEAD(&cp->c_list);
 	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+	ip_vs_conn_net_set(cp, p->net);
 	cp->af		   = p->af;
 	cp->protocol	   = p->protocol;
 	ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 			&cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
-	if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+	cp->fwmark         = fwmark;
+	if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+		ip_vs_pe_get(p->pe);
+		cp->pe = p->pe;
 		cp->pe_data = p->pe_data;
 		cp->pe_data_len = p->pe_data_len;
 	}
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	atomic_set(&cp->n_control, 0);
 	atomic_set(&cp->in_pkts, 0);
 
-	atomic_inc(&ip_vs_conn_count);
+	atomic_inc(&ipvs->conn_count);
 	if (flags & IP_VS_CONN_F_NO_CPORT)
 		atomic_inc(&ip_vs_conn_no_cport_cnt);
 
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 #endif
 		ip_vs_bind_xmit(cp);
 
-	if (unlikely(pp && atomic_read(&pp->appcnt)))
-		ip_vs_bind_app(cp, pp);
+	if (unlikely(pd && atomic_read(&pd->appcnt)))
+		ip_vs_bind_app(cp, pd->pp);
 
 	/*
 	 * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	 * IP_VS_CONN_F_ONE_PACKET too.
 	 */
 
-	if (ip_vs_conntrack_enabled())
+	if (ip_vs_conntrack_enabled(ipvs))
 		cp->flags |= IP_VS_CONN_F_NFCT;
 
 	/* Hash it in the ip_vs_conn_tab finally */
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
  *	/proc/net/ip_vs_conn entries
  */
 #ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+	struct seq_net_private p;
+	struct list_head *l;
+};
 
 static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct ip_vs_iter_state *iter = seq->private;
 
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		ct_read_lock_bh(idx);
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (pos-- == 0) {
-				seq->private = &ip_vs_conn_tab[idx];
+				iter->l = &ip_vs_conn_tab[idx];
 			return cp;
 			}
 		}
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 
 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	seq->private = NULL;
+	struct ip_vs_iter_state *iter = seq->private;
+
+	iter->l = NULL;
 	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
 }
 
 static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ip_vs_conn *cp = v;
-	struct list_head *e, *l = seq->private;
+	struct ip_vs_iter_state *iter = seq->private;
+	struct list_head *e, *l = iter->l;
 	int idx;
 
 	++*pos;
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	while (++idx < ip_vs_conn_tab_size) {
 		ct_read_lock_bh(idx);
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-			seq->private = &ip_vs_conn_tab[idx];
+			iter->l = &ip_vs_conn_tab[idx];
 			return cp;
 		}
 		ct_read_unlock_bh(idx);
 	}
-	seq->private = NULL;
+	iter->l = NULL;
 	return NULL;
 }
 
 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
 {
-	struct list_head *l = seq->private;
+	struct ip_vs_iter_state *iter = seq->private;
+	struct list_head *l = iter->l;
 
 	if (l)
 		ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
 	else {
 		const struct ip_vs_conn *cp = v;
+		struct net *net = seq_file_net(seq);
 		char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
 		size_t len = 0;
 
-		if (cp->dest && cp->pe_data &&
-		    cp->dest->svc->pe->show_pe_data) {
+		if (!ip_vs_conn_net_eq(cp, net))
+			return 0;
+		if (cp->pe_data) {
 			pe_data[0] = ' ';
-			len = strlen(cp->dest->svc->pe->name);
-			memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+			len = strlen(cp->pe->name);
+			memcpy(pe_data + 1, cp->pe->name, len);
 			pe_data[len + 1] = ' ';
 			len += 2;
-			len += cp->dest->svc->pe->show_pe_data(cp,
-							       pe_data + len);
+			len += cp->pe->show_pe_data(cp, pe_data + len);
 		}
 		pe_data[len] = '\0';
 
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
 
 static int ip_vs_conn_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_conn_seq_ops);
+	return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+			    sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
 	else {
 		const struct ip_vs_conn *cp = v;
+		struct net *net = seq_file_net(seq);
+
+		if (!ip_vs_conn_net_eq(cp, net))
+			return 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 		if (cp->af == AF_INET6)
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 
 static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_conn_sync_seq_ops);
+	return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+			    sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 }
 
 /* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
 {
 	int idx;
 	struct ip_vs_conn *cp;
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void)
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
-
+			if (!ip_vs_conn_net_eq(cp, net))
+				continue;
 			if (cp->protocol == IPPROTO_TCP) {
 				switch(cp->state) {
 				case IP_VS_TCP_S_SYN_RECV:
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void)
 /*
  *      Flush all the connection entries in the ip_vs_conn_tab
  */
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-  flush_again:
+flush_again:
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		/*
 		 *  Lock is actually needed in this loop.
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void)
 		ct_write_lock_bh(idx);
 
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+			if (!ip_vs_conn_net_eq(cp, net))
+				continue;
 			IP_VS_DBG(4, "del connection\n");
 			ip_vs_conn_expire_now(cp);
 			if (cp->control) {
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void)
 
 	/* the counter may be not NULL, because maybe some conn entries
 	   are run by slow timer handler or unhashed but still referred */
-	if (atomic_read(&ip_vs_conn_count) != 0) {
+	if (atomic_read(&ipvs->conn_count) != 0) {
 		schedule();
 		goto flush_again;
 	}
 }
+/*
+ * per netns init and exit
+ */
+int __net_init __ip_vs_conn_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	atomic_set(&ipvs->conn_count, 0);
+
+	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	return 0;
+}
 
+static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush(net);
+	proc_net_remove(net, "ip_vs_conn");
+	proc_net_remove(net, "ip_vs_conn_sync");
+}
+static struct pernet_operations ipvs_conn_ops = {
+	.init = __ip_vs_conn_init,
+	.exit = __ip_vs_conn_cleanup,
+};
 
 int __init ip_vs_conn_init(void)
 {
 	int idx;
+	int retc;
 
 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void)
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}
 
-	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	retc = register_pernet_subsys(&ipvs_conn_ops);
 
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
 
-	return 0;
+	return retc;
 }
 
-
 void ip_vs_conn_cleanup(void)
 {
-	/* flush all the connection entries first */
-	ip_vs_conn_flush();
-
+	unregister_pernet_subsys(&ipvs_conn_ops);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
-	proc_net_remove(&init_net, "ip_vs_conn");
-	proc_net_remove(&init_net, "ip_vs_conn_sync");
 	vfree(ip_vs_conn_tab);
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9c5a0..d889f4f6be9 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
 #include <net/icmp.h>                   /* for icmp_send */
 #include <net/route.h>
 #include <net/ip6_checksum.h>
+#include <net/netns/generic.h>		/* net_generic() */
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 
+int ip_vs_net_id __read_mostly;
+#ifdef IP_VS_GENERIC_NETNS
+EXPORT_SYMBOL(ip_vs_net_id);
+#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
+	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		spin_lock(&dest->stats.lock);
-		dest->stats.ustats.inpkts++;
-		dest->stats.ustats.inbytes += skb->len;
-		spin_unlock(&dest->stats.lock);
-
-		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.ustats.inpkts++;
-		dest->svc->stats.ustats.inbytes += skb->len;
-		spin_unlock(&dest->svc->stats.lock);
-
-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.inpkts++;
-		ip_vs_stats.ustats.inbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
+		struct ip_vs_cpu_stats *s;
+
+		s = this_cpu_ptr(dest->stats.cpustats);
+		s->ustats.inpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.inbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(dest->svc->stats.cpustats);
+		s->ustats.inpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.inbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(ipvs->cpustats);
+		s->ustats.inpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.inbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
 	}
 }
 
@@ -131,21 +145,28 @@ static inline void
 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
+	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		spin_lock(&dest->stats.lock);
-		dest->stats.ustats.outpkts++;
-		dest->stats.ustats.outbytes += skb->len;
-		spin_unlock(&dest->stats.lock);
-
-		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.ustats.outpkts++;
-		dest->svc->stats.ustats.outbytes += skb->len;
-		spin_unlock(&dest->svc->stats.lock);
-
-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.outpkts++;
-		ip_vs_stats.ustats.outbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
+		struct ip_vs_cpu_stats *s;
+
+		s = this_cpu_ptr(dest->stats.cpustats);
+		s->ustats.outpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.outbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(dest->svc->stats.cpustats);
+		s->ustats.outpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.outbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(ipvs->cpustats);
+		s->ustats.outpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.outbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
 	}
 }
 
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
-	spin_lock(&cp->dest->stats.lock);
-	cp->dest->stats.ustats.conns++;
-	spin_unlock(&cp->dest->stats.lock);
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct ip_vs_cpu_stats *s;
+
+	s = this_cpu_ptr(cp->dest->stats.cpustats);
+	s->ustats.conns++;
 
-	spin_lock(&svc->stats.lock);
-	svc->stats.ustats.conns++;
-	spin_unlock(&svc->stats.lock);
+	s = this_cpu_ptr(svc->stats.cpustats);
+	s->ustats.conns++;
 
-	spin_lock(&ip_vs_stats.lock);
-	ip_vs_stats.ustats.conns++;
-	spin_unlock(&ip_vs_stats.lock);
+	s = this_cpu_ptr(ipvs->cpustats);
+	s->ustats.conns++;
 }
 
 
 static inline int
 ip_vs_set_state(struct ip_vs_conn *cp, int direction,
 		const struct sk_buff *skb,
-		struct ip_vs_protocol *pp)
+		struct ip_vs_proto_data *pd)
 {
-	if (unlikely(!pp->state_transition))
+	if (unlikely(!pd->pp->state_transition))
 		return 0;
-	return pp->state_transition(cp, direction, skb, pp);
+	return pd->pp->state_transition(cp, direction, skb, pd);
 }
 
-static inline void
+static inline int
 ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 			      struct sk_buff *skb, int protocol,
 			      const union nf_inet_addr *caddr, __be16 cport,
 			      const union nf_inet_addr *vaddr, __be16 vport,
 			      struct ip_vs_conn_param *p)
 {
-	ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+			      vport, p);
 	p->pe = svc->pe;
 	if (p->pe && p->pe->fill_param)
-		p->pe->fill_param(p, skb);
+		return p->pe->fill_param(p, skb);
+
+	return 0;
 }
 
 /*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
 		    struct sk_buff *skb,
-		    __be16 ports[2])
+		    __be16 src_port, __be16 dst_port, int *ignored)
 {
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 
 	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
 		      "mnet %s\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
 		      IP_VS_DBG_ADDR(svc->af, &snet));
 
 	/*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
 		__be16 vport = 0;
 
-		if (ports[1] == svc->port) {
+		if (dst_port == svc->port) {
 			/* non-FTP template:
 			 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
 			 * FTP template:
 			 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
 			 */
 			if (svc->port != FTPPORT)
-				vport = ports[1];
+				vport = dst_port;
 		} else {
 			/* Note: persistent fwmark-based services and
 			 * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				vaddr = &fwmark;
 			}
 		}
-		ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
-					      vaddr, vport, &param);
+		/* return *ignored = -1 so NF_DROP can be used */
+		if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+						  vaddr, vport, &param) < 0) {
+			*ignored = -1;
+			return NULL;
+		}
 	}
 
 	/* Check if a template already exists */
 	ct = ip_vs_ct_in_get(&param);
 	if (!ct || !ip_vs_check_template(ct)) {
-		/* No template found or the dest of the connection
+		/*
+		 * No template found or the dest of the connection
 		 * template is not available.
+		 * return *ignored=0 i.e. ICMP and NF_DROP
 		 */
 		dest = svc->scheduler->schedule(svc, skb);
 		if (!dest) {
 			IP_VS_DBG(1, "p-schedule: no dest found.\n");
 			kfree(param.pe_data);
+			*ignored = 0;
 			return NULL;
 		}
 
-		if (ports[1] == svc->port && svc->port != FTPPORT)
+		if (dst_port == svc->port && svc->port != FTPPORT)
 			dport = dest->port;
 
 		/* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 * and thus param.pe_data will be destroyed
 		 * when the template expires */
 		ct = ip_vs_conn_new(&param, &dest->addr, dport,
-				    IP_VS_CONN_F_TEMPLATE, dest);
+				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
 		if (ct == NULL) {
 			kfree(param.pe_data);
+			*ignored = -1;
 			return NULL;
 		}
 
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		kfree(param.pe_data);
 	}
 
-	dport = ports[1];
+	dport = dst_port;
 	if (dport == svc->port && dest->port)
 		dport = dest->port;
 
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
-			      &iph.daddr, ports[1], &param);
-	cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+	ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+			      src_port, &iph.daddr, dst_port, &param);
+
+	cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
+		*ignored = -1;
 		return NULL;
 	}
 
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  It selects a server according to the virtual service, and
  *  creates a connection entry.
  *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
  */
 struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-	       struct ip_vs_protocol *pp, int *ignored)
+	       struct ip_vs_proto_data *pd, int *ignored)
 {
+	struct ip_vs_protocol *pp = pd->pp;
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	}
 
 	/*
-	 * Do not schedule replies from local real server. It is risky
-	 * for fwmark services but mostly for persistent services.
+	 *    Do not schedule replies from local real server.
 	 */
 	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-	    (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
-	    (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+	    (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
 		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
 			      "Not scheduling reply for existing connection");
 		__ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	/*
 	 *    Persistent service
 	 */
-	if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
-		*ignored = 0;
-		return ip_vs_sched_persist(svc, skb, pptr);
-	}
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+	*ignored = 0;
 
 	/*
 	 *    Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 		return NULL;
 	}
 
-	*ignored = 0;
-
 	dest = svc->scheduler->schedule(svc, skb);
 	if (dest == NULL) {
 		IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	 */
 	{
 		struct ip_vs_conn_param p;
-		ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
-				      pptr[0], &iph.daddr, pptr[1], &p);
+
+		ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+				      &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+				      &p);
 		cp = ip_vs_conn_new(&p, &dest->addr,
 				    dest->port ? dest->port : pptr[1],
-				    flags, dest);
-		if (!cp)
+				    flags, dest, skb->mark);
+		if (!cp) {
+			*ignored = -1;
 			return NULL;
+		}
 	}
 
 	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
  *  no destination is available for a new connection.
  */
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-		struct ip_vs_protocol *pp)
+		struct ip_vs_proto_data *pd)
 {
+	struct net *net;
+	struct netns_ipvs *ipvs;
 	__be16 _ports[2], *pptr;
 	struct ip_vs_iphdr iph;
 	int unicast;
+
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_service_put(svc);
 		return NF_DROP;
 	}
+	net = skb_net(skb);
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
 		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
 	else
 #endif
-		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+		unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
 
 	/* if it is fwmark-based service, the cache_bypass sysctl is up
 	   and the destination is a non-local unicast, then create
 	   a cache_bypass connection entry */
-	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+	ipvs = net_ipvs(net);
+	if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
 		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
 		{
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(svc->af, iph.protocol,
+			ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
 					      &iph.saddr, pptr[0],
 					      &iph.daddr, pptr[1], &p);
 			cp = ip_vs_conn_new(&p, &daddr, 0,
 					    IP_VS_CONN_F_BYPASS | flags,
-					    NULL);
+					    NULL, skb->mark);
 			if (!cp)
 				return NF_DROP;
 		}
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_in_stats(cp, skb);
 
 		/* set state */
-		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 
 		/* transmit the first SYN packet */
-		ret = cp->packet_xmit(skb, cp, pp);
+		ret = cp->packet_xmit(skb, cp, pd->pp);
 		/* do not touch skb anymore */
 
 		atomic_inc(&cp->in_pkts);
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 				struct ip_vs_protocol *pp,
 				unsigned int offset, unsigned int ihl)
 {
+	struct netns_ipvs *ipvs;
 	unsigned int verdict = NF_DROP;
 
 	if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 	if (!skb_make_writable(skb, offset))
 		goto out;
 
+	ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
 		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+		if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
 			goto out;
 	} else
 #endif
-		if ((sysctl_ip_vs_snat_reroute ||
+		if ((ipvs->sysctl_snat_reroute ||
 		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
 		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			goto out;
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 
 	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+	cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 
 	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
  * Used for NAT and local client.
  */
 static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		struct ip_vs_conn *cp, int ihl)
 {
+	struct ip_vs_protocol *pp = pd->pp;
+	struct netns_ipvs *ipvs;
+
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
 
 	if (!skb_make_writable(skb, ihl))
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
+	ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+		if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
 			goto drop;
 	} else
 #endif
-		if ((sysctl_ip_vs_snat_reroute ||
+		if ((ipvs->sysctl_snat_reroute ||
 		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
 		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			goto drop;
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
 
 	ip_vs_out_stats(cp, skb);
-	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
 	skb->ipvs_property = 1;
 	if (!(cp->flags & IP_VS_CONN_F_NFCT))
 		ip_vs_notrack(skb);
@@ -999,9 +1062,12 @@ drop:
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+	struct net *net = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs;
 
 	EnterFunction(11);
 
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (unlikely(!skb_dst(skb)))
 		return NF_ACCEPT;
 
+	net = skb_net(skb);
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 
-	pp = ip_vs_proto_get(iph.protocol);
-	if (unlikely(!pp))
+	pd = ip_vs_proto_data_get(net, iph.protocol);
+	if (unlikely(!pd))
 		return NF_ACCEPT;
+	pp = pd->pp;
 
 	/* reassemble IP fragments */
 #ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+	cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+	ipvs = net_ipvs(net);
 
 	if (likely(cp))
-		return handle_response(af, skb, pp, cp, iph.len);
-	if (sysctl_ip_vs_nat_icmp_send &&
+		return handle_response(af, skb, pd, cp, iph.len);
+	if (ipvs->sysctl_nat_icmp_send &&
 	    (pp->protocol == IPPROTO_TCP ||
 	     pp->protocol == IPPROTO_UDP ||
 	     pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
 			return NF_ACCEPT;	/* Not for me */
-		if (ip_vs_lookup_real_service(af, iph.protocol,
+		if (ip_vs_lookup_real_service(net, af, iph.protocol,
 					      &iph.saddr,
 					      pptr[0])) {
 			/*
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
 static int
 ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+	struct net *net = NULL;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	unsigned int offset, ihl, verdict;
 	union nf_inet_addr snet;
 
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-	pp = ip_vs_proto_get(cih->protocol);
-	if (!pp)
+	net = skb_net(skb);
+	pd = ip_vs_proto_data_get(net, cih->protocol);
+	if (!pd)
 		return NF_ACCEPT;
+	pp = pd->pp;
 
 	/* Is the embedded protocol header present? */
 	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+	cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
 	if (!cp) {
 		/* The packet could also belong to a local client */
-		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+		cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
 		if (cp) {
 			snet.ip = iph->saddr;
 			return handle_response_icmp(AF_INET, skb, &snet,
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 static int
 ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+	struct net *net = NULL;
 	struct ipv6hdr *iph;
 	struct icmp6hdr	_icmph, *ic;
 	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	unsigned int offset, verdict;
 	union nf_inet_addr snet;
 	struct rt6_info *rt;
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-	pp = ip_vs_proto_get(cih->nexthdr);
-	if (!pp)
+	net = skb_net(skb);
+	pd = ip_vs_proto_data_get(net, cih->nexthdr);
+	if (!pd)
 		return NF_ACCEPT;
+	pp = pd->pp;
 
 	/* Is the embedded protocol header present? */
 	/* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
 	if (!cp) {
 		/* The packet could also belong to a local client */
-		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+		cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
 		if (cp) {
 			ipv6_addr_copy(&snet.in6, &iph->saddr);
 			return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 static unsigned int
 ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+	struct net *net;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
 	int ret, restart, pkts;
+	struct netns_ipvs *ipvs;
 
 	/* Already marked as IPVS request or reply? */
 	if (skb->ipvs_property)
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 
+	net = skb_net(skb);
 	/* Protocol supported? */
-	pp = ip_vs_proto_get(iph.protocol);
-	if (unlikely(!pp))
+	pd = ip_vs_proto_data_get(net, iph.protocol);
+	if (unlikely(!pd))
 		return NF_ACCEPT;
-
+	pp = pd->pp;
 	/*
 	 * Check if the packet belongs to an existing connection entry
 	 */
-	cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+	cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
 
 	if (unlikely(!cp)) {
 		int v;
 
-		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+		if (!pp->conn_schedule(af, skb, pd, &v, &cp))
 			return v;
 	}
 
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+	net = skb_net(skb);
+	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
 
-		if (sysctl_ip_vs_expire_nodest_conn) {
+		if (ipvs->sysctl_expire_nodest_conn) {
 			/* try to expire the connection immediately */
 			ip_vs_conn_expire_now(cp);
 		}
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	ip_vs_in_stats(cp, skb);
-	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 	if (cp->packet_xmit)
 		ret = cp->packet_xmit(skb, cp, pp);
 		/* do not touch skb anymore */
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	 *
 	 * Sync connection if it is about to close to
 	 * encorage the standby servers to update the connections timeout
+	 *
+	 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
 	 */
-	pkts = atomic_add_return(1, &cp->in_pkts);
-	if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		pkts = ipvs->sysctl_sync_threshold[0];
+	else
+		pkts = atomic_add_return(1, &cp->in_pkts);
+
+	if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    cp->protocol == IPPROTO_SCTP) {
 		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-			(pkts % sysctl_ip_vs_sync_threshold[1]
-			 == sysctl_ip_vs_sync_threshold[0])) ||
+			(pkts % ipvs->sysctl_sync_threshold[1]
+			 == ipvs->sysctl_sync_threshold[0])) ||
 				(cp->old_state != cp->state &&
 				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-			ip_vs_sync_conn(cp);
+			ip_vs_sync_conn(net, cp);
 			goto out;
 		}
 	}
 
 	/* Keep this block last: TCP and others with pp->num_states <= 1 */
-	else if (af == AF_INET &&
-	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
 	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % sysctl_ip_vs_sync_threshold[1]
-	       == sysctl_ip_vs_sync_threshold[0])) ||
+	      (pkts % ipvs->sysctl_sync_threshold[1]
+	       == ipvs->sysctl_sync_threshold[0])) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-		ip_vs_sync_conn(cp);
+		ip_vs_sync_conn(net, cp);
 out:
 	cp->old_state = cp->state;
 
@@ -1782,7 +1870,39 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	},
 #endif
 };
+/*
+ *	Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+	struct netns_ipvs *ipvs;
+
+	ipvs = net_generic(net, ip_vs_net_id);
+	if (ipvs == NULL) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	ipvs->net = net;
+	/* Counters used for creating unique names */
+	ipvs->gen = atomic_read(&ipvs_netns_cnt);
+	atomic_inc(&ipvs_netns_cnt);
+	net->ipvs = ipvs;
+	printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
+			 sizeof(struct netns_ipvs), ipvs->gen);
+	return 0;
+}
 
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+	.init = __ip_vs_init,
+	.exit = __ip_vs_cleanup,
+	.id   = &ip_vs_net_id,
+	.size = sizeof(struct netns_ipvs),
+};
 
 /*
  *	Initialize IP Virtual Server
@@ -1791,8 +1911,11 @@ static int __init ip_vs_init(void)
 {
 	int ret;
 
-	ip_vs_estimator_init();
+	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		return ret;
 
+	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
 		pr_err("can't setup control.\n");
@@ -1813,15 +1936,23 @@ static int __init ip_vs_init(void)
 		goto cleanup_app;
 	}
 
+	ret = ip_vs_sync_init();
+	if (ret < 0) {
+		pr_err("can't setup sync data.\n");
+		goto cleanup_conn;
+	}
+
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_conn;
+		goto cleanup_sync;
 	}
 
 	pr_info("ipvs loaded.\n");
 	return ret;
 
+cleanup_sync:
+	ip_vs_sync_cleanup();
   cleanup_conn:
 	ip_vs_conn_cleanup();
   cleanup_app:
@@ -1831,17 +1962,20 @@ static int __init ip_vs_init(void)
 	ip_vs_control_cleanup();
   cleanup_estimator:
 	ip_vs_estimator_cleanup();
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }
 
 static void __exit ip_vs_cleanup(void)
 {
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101a..98df59a1245 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 
 #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
 #include <net/ip.h>
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
 /* lock for service table */
 static DEFINE_RWLOCK(__ip_vs_svc_lock);
 
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
 /* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void)
 
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+				    const struct in6_addr *addr)
 {
 	struct rt6_info *rt;
 	struct flowi fl = {
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
 		.fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
 	};
 
-	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
 	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
 			return 1;
 
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
  *	update_defense_level is called from keventd and from sysctl,
  *	so it needs to protect itself from softirqs
  */
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
 {
 	struct sysinfo i;
 	static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
 	/* si_swapinfo(&i); */
 	/* availmem = availmem - (i.totalswap - i.freeswap); */
 
-	nomem = (availmem < sysctl_ip_vs_amemthresh);
+	nomem = (availmem < ipvs->sysctl_amemthresh);
 
 	local_bh_disable();
 
 	/* drop_entry */
-	spin_lock(&__ip_vs_dropentry_lock);
-	switch (sysctl_ip_vs_drop_entry) {
+	spin_lock(&ipvs->dropentry_lock);
+	switch (ipvs->sysctl_drop_entry) {
 	case 0:
-		atomic_set(&ip_vs_dropentry, 0);
+		atomic_set(&ipvs->dropentry, 0);
 		break;
 	case 1:
 		if (nomem) {
-			atomic_set(&ip_vs_dropentry, 1);
-			sysctl_ip_vs_drop_entry = 2;
+			atomic_set(&ipvs->dropentry, 1);
+			ipvs->sysctl_drop_entry = 2;
 		} else {
-			atomic_set(&ip_vs_dropentry, 0);
+			atomic_set(&ipvs->dropentry, 0);
 		}
 		break;
 	case 2:
 		if (nomem) {
-			atomic_set(&ip_vs_dropentry, 1);
+			atomic_set(&ipvs->dropentry, 1);
 		} else {
-			atomic_set(&ip_vs_dropentry, 0);
-			sysctl_ip_vs_drop_entry = 1;
+			atomic_set(&ipvs->dropentry, 0);
+			ipvs->sysctl_drop_entry = 1;
 		};
 		break;
 	case 3:
-		atomic_set(&ip_vs_dropentry, 1);
+		atomic_set(&ipvs->dropentry, 1);
 		break;
 	}
-	spin_unlock(&__ip_vs_dropentry_lock);
+	spin_unlock(&ipvs->dropentry_lock);
 
 	/* drop_packet */
-	spin_lock(&__ip_vs_droppacket_lock);
-	switch (sysctl_ip_vs_drop_packet) {
+	spin_lock(&ipvs->droppacket_lock);
+	switch (ipvs->sysctl_drop_packet) {
 	case 0:
-		ip_vs_drop_rate = 0;
+		ipvs->drop_rate = 0;
 		break;
 	case 1:
 		if (nomem) {
-			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
-			sysctl_ip_vs_drop_packet = 2;
+			ipvs->drop_rate = ipvs->drop_counter
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
+			ipvs->sysctl_drop_packet = 2;
 		} else {
-			ip_vs_drop_rate = 0;
+			ipvs->drop_rate = 0;
 		}
 		break;
 	case 2:
 		if (nomem) {
-			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
+			ipvs->drop_rate = ipvs->drop_counter
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
 		} else {
-			ip_vs_drop_rate = 0;
-			sysctl_ip_vs_drop_packet = 1;
+			ipvs->drop_rate = 0;
+			ipvs->sysctl_drop_packet = 1;
 		}
 		break;
 	case 3:
-		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		ipvs->drop_rate = ipvs->sysctl_am_droprate;
 		break;
 	}
-	spin_unlock(&__ip_vs_droppacket_lock);
+	spin_unlock(&ipvs->droppacket_lock);
 
 	/* secure_tcp */
-	spin_lock(&ip_vs_securetcp_lock);
-	switch (sysctl_ip_vs_secure_tcp) {
+	spin_lock(&ipvs->securetcp_lock);
+	switch (ipvs->sysctl_secure_tcp) {
 	case 0:
 		if (old_secure_tcp >= 2)
 			to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
 		if (nomem) {
 			if (old_secure_tcp < 2)
 				to_change = 1;
-			sysctl_ip_vs_secure_tcp = 2;
+			ipvs->sysctl_secure_tcp = 2;
 		} else {
 			if (old_secure_tcp >= 2)
 				to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
 		} else {
 			if (old_secure_tcp >= 2)
 				to_change = 0;
-			sysctl_ip_vs_secure_tcp = 1;
+			ipvs->sysctl_secure_tcp = 1;
 		}
 		break;
 	case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
 			to_change = 1;
 		break;
 	}
-	old_secure_tcp = sysctl_ip_vs_secure_tcp;
+	old_secure_tcp = ipvs->sysctl_secure_tcp;
 	if (to_change >= 0)
-		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-	spin_unlock(&ip_vs_securetcp_lock);
+		ip_vs_protocol_timeout_change(ipvs,
+					      ipvs->sysctl_secure_tcp > 1);
+	spin_unlock(&ipvs->securetcp_lock);
 
 	local_bh_enable();
 }
@@ -250,16 +218,16 @@ static void update_defense_level(void)
  *	Timer for checking the defense
  */
 #define DEFENSE_TIMER_PERIOD	1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 
 static void defense_work_handler(struct work_struct *work)
 {
-	update_defense_level();
-	if (atomic_read(&ip_vs_dropentry))
-		ip_vs_random_dropentry();
+	struct netns_ipvs *ipvs =
+		container_of(work, struct netns_ipvs, defense_work.work);
 
-	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+	update_defense_level(ipvs);
+	if (atomic_read(&ipvs->dropentry))
+		ip_vs_random_dropentry(ipvs->net);
+	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
 
 int
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 /* the service table hashed by fwmark */
 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 
-/*
- *	Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *	Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *	FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
 
 /*
  *	Returns hash value for virtual service
  */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-		  __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+		  const union nf_inet_addr *addr, __be16 port)
 {
 	register unsigned porth = ntohs(port);
 	__be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
+	addr_fold ^= ((size_t)net>>8);
 
 	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 		& IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 /*
  *	Returns hash value of fwmark for virtual service lookup
  */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 {
-	return fwmark & IP_VS_SVC_TAB_MASK;
+	return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 }
 
 /*
- *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *	Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
  *	or in the ip_vs_svc_fwm_table by fwmark.
  *	Should be called with locked tables.
  */
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 	if (svc->fwmark == 0) {
 		/*
-		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+		 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 		 */
-		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-					 svc->port);
+		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+					 &svc->addr, svc->port);
 		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 	} else {
 		/*
-		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 *  Hash it by fwmark in svc_fwm_table
 		 */
-		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
 		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 	}
 
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 
 /*
- *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *	Unhashes a service from svc_table / svc_fwm_table.
  *	Should be called with locked tables.
  */
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 	}
 
 	if (svc->fwmark == 0) {
-		/* Remove it from the ip_vs_svc_table table */
+		/* Remove it from the svc_table table */
 		list_del(&svc->s_list);
 	} else {
-		/* Remove it from the ip_vs_svc_fwm_table table */
+		/* Remove it from the svc_fwm_table table */
 		list_del(&svc->f_list);
 	}
 
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 
 
 /*
- *	Get service by {proto,addr,port} in the service table.
+ *	Get service by {netns, proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-		    __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+		     const union nf_inet_addr *vaddr, __be16 vport)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
 
 	/* Check for "full" addressed entries */
-	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 
 	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
 		if ((svc->af == af)
 		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 		    && (svc->port == vport)
-		    && (svc->protocol == protocol)) {
+		    && (svc->protocol == protocol)
+		    && net_eq(svc->net, net)) {
 			/* HIT */
 			return svc;
 		}
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
  *	Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
 
 	/* Check for fwmark addressed entries */
-	hash = ip_vs_svc_fwm_hashkey(fwmark);
+	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 
 	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-		if (svc->fwmark == fwmark && svc->af == af) {
+		if (svc->fwmark == fwmark && svc->af == af
+		    && net_eq(svc->net, net)) {
 			/* HIT */
 			return svc;
 		}
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	read_lock(&__ip_vs_svc_lock);
 
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
-	if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+	svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+	if (fwmark && svc)
 		goto out;
 
 	/*
 	 *	Check the table hashed by <protocol,addr,port>
 	 *	for "full" addressed entries
 	 */
-	svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+	svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 
 	if (svc == NULL
 	    && protocol == IPPROTO_TCP
-	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && atomic_read(&ipvs->ftpsvc_counter)
 	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 		/*
 		 * Check if ftp service entry exists, the packet
 		 * might belong to FTP data connections.
 		 */
-		svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+		svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
 	}
 
 	if (svc == NULL
-	    && atomic_read(&ip_vs_nullsvc_counter)) {
+	    && atomic_read(&ipvs->nullsvc_counter)) {
 		/*
 		 * Check if the catch-all port (port zero) exists
 		 */
-		svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+		svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
 	}
 
   out:
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
 			      ntohs(svc->port), atomic_read(&svc->usecnt));
+		free_percpu(svc->stats.cpustats);
 		kfree(svc);
 	}
 }
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
 }
 
 /*
- *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *	Hashes ip_vs_dest in rs_table by <proto,addr,port>.
  *	should be called with locked tables.
  */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
 	unsigned hash;
 
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 	 */
 	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 
-	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+	list_add(&dest->d_list, &ipvs->rs_table[hash]);
 
 	return 1;
 }
 
 /*
- *	UNhashes ip_vs_dest from ip_vs_rtable.
+ *	UNhashes ip_vs_dest from rs_table.
  *	should be called with locked tables.
  */
 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
 	/*
-	 * Remove it from the ip_vs_rtable table.
+	 * Remove it from the rs_table table.
 	 */
 	if (!list_empty(&dest->d_list)) {
 		list_del(&dest->d_list);
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *	Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 			  const union nf_inet_addr *daddr,
 			  __be16 dport)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned hash;
 	struct ip_vs_dest *dest;
 
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
 	 */
 	hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-	read_lock(&__ip_vs_rs_lock);
-	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+	read_lock(&ipvs->rs_lock);
+	list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
 		if ((dest->af == af)
 		    && ip_vs_addr_equal(af, &dest->addr, daddr)
 		    && (dest->port == dport)
 		    && ((dest->protocol == protocol) ||
 			dest->vfwmark)) {
 			/* HIT */
-			read_unlock(&__ip_vs_rs_lock);
+			read_unlock(&ipvs->rs_lock);
 			return dest;
 		}
 	}
-	read_unlock(&__ip_vs_rs_lock);
+	read_unlock(&ipvs->rs_lock);
 
 	return NULL;
 }
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+				   const union nf_inet_addr *daddr,
 				   __be16 dport,
 				   const union nf_inet_addr *vaddr,
-				   __be16 vport, __u16 protocol)
+				   __be16 vport, __u16 protocol, __u32 fwmark)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_service *svc;
 
-	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
 	if (!svc)
 		return NULL;
 	dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 		     __be16 dport)
 {
 	struct ip_vs_dest *dest, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	/*
 	 * Find the destination in trash
 	 */
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 			      "dest->refcnt=%d\n",
 			      dest->vfwmark,
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 			list_del(&dest->n_list);
 			ip_vs_dst_reset(dest);
 			__ip_vs_unbind_svc(dest);
+			free_percpu(dest->stats.cpustats);
 			kfree(dest);
 		}
 	}
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  are expired, and the refcnt of each destination in the trash must
  *  be 1, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
 {
 	struct ip_vs_dest *dest, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 		list_del(&dest->n_list);
 		ip_vs_dst_reset(dest);
 		__ip_vs_unbind_svc(dest);
+		free_percpu(dest->stats.cpustats);
 		kfree(dest);
 	}
 }
@@ -768,6 +728,7 @@ static void
 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		    struct ip_vs_dest_user_kern *udest, int add)
 {
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 	int conn_flags;
 
 	/* set the weight and the flags */
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 	} else {
 		/*
-		 *    Put the real service in ip_vs_rtable if not present.
+		 *    Put the real service in rs_table if not present.
 		 *    For now only for NAT!
 		 */
-		write_lock_bh(&__ip_vs_rs_lock);
-		ip_vs_rs_hash(dest);
-		write_unlock_bh(&__ip_vs_rs_lock);
+		write_lock_bh(&ipvs->rs_lock);
+		ip_vs_rs_hash(ipvs, dest);
+		write_unlock_bh(&ipvs->rs_lock);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
 
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	spin_unlock(&dest->dst_lock);
 
 	if (add)
-		ip_vs_new_estimator(&dest->stats);
+		ip_vs_new_estimator(svc->net, &dest->stats);
 
 	write_lock_bh(&__ip_vs_svc_lock);
 
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		atype = ipv6_addr_type(&udest->addr.in6);
 		if ((!(atype & IPV6_ADDR_UNICAST) ||
 			atype & IPV6_ADDR_LINKLOCAL) &&
-			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
+			!__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
 			return -EINVAL;
 	} else
 #endif
 	{
-		atype = inet_addr_type(&init_net, udest->addr.ip);
+		atype = inet_addr_type(svc->net, udest->addr.ip);
 		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 			return -EINVAL;
 	}
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
+	dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!dest->stats.cpustats) {
+		pr_err("%s() alloc_percpu failed\n", __func__);
+		goto err_alloc;
+	}
 
 	dest->af = svc->af;
 	dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
 	LeaveFunction(2);
 	return 0;
+
+err_alloc:
+	kfree(dest);
+	return -ENOMEM;
 }
 
 
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *	Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
-	ip_vs_kill_estimator(&dest->stats);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_kill_estimator(net, &dest->stats);
 
 	/*
 	 *  Remove it from the d-linked list with the real services.
 	 */
-	write_lock_bh(&__ip_vs_rs_lock);
+	write_lock_bh(&ipvs->rs_lock);
 	ip_vs_rs_unhash(dest);
-	write_unlock_bh(&__ip_vs_rs_lock);
+	write_unlock_bh(&ipvs->rs_lock);
 
 	/*
 	 *  Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 		   and only one user context can update virtual service at a
 		   time, so the operation here is OK */
 		atomic_dec(&dest->svc->refcnt);
+		free_percpu(dest->stats.cpustats);
 		kfree(dest);
 	} else {
 		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 			      ntohs(dest->port),
 			      atomic_read(&dest->refcnt));
-		list_add(&dest->n_list, &ip_vs_dest_trash);
+		list_add(&dest->n_list, &ipvs->dest_trash);
 		atomic_inc(&dest->refcnt);
 	}
 }
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 *	Delete the destination
 	 */
-	__ip_vs_del_dest(dest);
+	__ip_vs_del_dest(svc->net, dest);
 
 	LeaveFunction(2);
 
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  *	Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		  struct ip_vs_service **svc_p)
 {
 	int ret = 0;
 	struct ip_vs_scheduler *sched = NULL;
 	struct ip_vs_pe *pe = NULL;
 	struct ip_vs_service *svc = NULL;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/* increase the module use count */
 	ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	}
 
 	if (u->pe_name && *u->pe_name) {
-		pe = ip_vs_pe_get(u->pe_name);
+		pe = ip_vs_pe_getbyname(u->pe_name);
 		if (pe == NULL) {
 			pr_info("persistence engine module ip_vs_pe_%s "
 				"not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		ret = -ENOMEM;
 		goto out_err;
 	}
+	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!svc->stats.cpustats) {
+		pr_err("%s() alloc_percpu failed\n", __func__);
+		goto out_err;
+	}
 
 	/* I'm the first user of the service */
 	atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	svc->flags = u->flags;
 	svc->timeout = u->timeout * HZ;
 	svc->netmask = u->netmask;
+	svc->net = net;
 
 	INIT_LIST_HEAD(&svc->destinations);
 	rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 
 	/* Update the virtual service counters */
 	if (svc->port == FTPPORT)
-		atomic_inc(&ip_vs_ftpsvc_counter);
+		atomic_inc(&ipvs->ftpsvc_counter);
 	else if (svc->port == 0)
-		atomic_inc(&ip_vs_nullsvc_counter);
+		atomic_inc(&ipvs->nullsvc_counter);
 
-	ip_vs_new_estimator(&svc->stats);
+	ip_vs_new_estimator(net, &svc->stats);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
-		ip_vs_num_services++;
+		ipvs->num_services++;
 
 	/* Hash the service into the service table */
 	write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	*svc_p = svc;
 	return 0;
 
+
  out_err:
 	if (svc != NULL) {
 		ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 			ip_vs_app_inc_put(svc->inc);
 			local_bh_enable();
 		}
+		if (svc->stats.cpustats)
+			free_percpu(svc->stats.cpustats);
 		kfree(svc);
 	}
 	ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	old_sched = sched;
 
 	if (u->pe_name && *u->pe_name) {
-		pe = ip_vs_pe_get(u->pe_name);
+		pe = ip_vs_pe_getbyname(u->pe_name);
 		if (pe == NULL) {
 			pr_info("persistence engine module ip_vs_pe_%s "
 				"not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
 	struct ip_vs_pe *old_pe;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	pr_info("%s: enter\n", __func__);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
-		ip_vs_num_services--;
+		ipvs->num_services--;
 
-	ip_vs_kill_estimator(&svc->stats);
+	ip_vs_kill_estimator(svc->net, &svc->stats);
 
 	/* Unbind scheduler */
 	old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	 */
 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
 		__ip_vs_unlink_dest(svc, dest, 0);
-		__ip_vs_del_dest(dest);
+		__ip_vs_del_dest(svc->net, dest);
 	}
 
 	/*
 	 *    Update the virtual service counters
 	 */
 	if (svc->port == FTPPORT)
-		atomic_dec(&ip_vs_ftpsvc_counter);
+		atomic_dec(&ipvs->ftpsvc_counter);
 	else if (svc->port == 0)
-		atomic_dec(&ip_vs_nullsvc_counter);
+		atomic_dec(&ipvs->nullsvc_counter);
 
 	/*
 	 *    Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
 			      ntohs(svc->port), atomic_read(&svc->usecnt));
+		free_percpu(svc->stats.cpustats);
 		kfree(svc);
 	}
 
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *	Flush all the virtual services
  */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
 {
 	int idx;
 	struct ip_vs_service *svc, *nxt;
 
 	/*
-	 * Flush the service table hashed by <protocol,addr,port>
+	 * Flush the service table hashed by <netns,protocol,addr,port>
 	 */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-			ip_vs_unlink_service(svc);
+		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+					 s_list) {
+			if (net_eq(svc->net, net))
+				ip_vs_unlink_service(svc);
 		}
 	}
 
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void)
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry_safe(svc, nxt,
 					 &ip_vs_svc_fwm_table[idx], f_list) {
-			ip_vs_unlink_service(svc);
+			if (net_eq(svc->net, net))
+				ip_vs_unlink_service(svc);
 		}
 	}
 
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
 	return 0;
 }
 
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
 {
 	int idx;
 	struct ip_vs_service *svc;
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			ip_vs_zero_service(svc);
+			if (net_eq(svc->net, net))
+				ip_vs_zero_service(svc);
 		}
 	}
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			ip_vs_zero_service(svc);
+			if (net_eq(svc->net, net))
+				ip_vs_zero_service(svc);
 		}
 	}
 
-	ip_vs_zero_stats(&ip_vs_stats);
+	ip_vs_zero_stats(net_ipvs(net)->tot_stats);
 	return 0;
 }
 
@@ -1498,6 +1488,7 @@ static int
 proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct net *net = current->nsproxy->net_ns;
 	int *valp = table->data;
 	int val = *valp;
 	int rc;
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write,
 			/* Restore the correct value */
 			*valp = val;
 		} else {
-			update_defense_level();
+			update_defense_level(net_ipvs(net));
 		}
 	}
 	return rc;
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
 	return rc;
 }
 
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if ((*valp < 0) || (*valp > 1)) {
+			/* Restore the correct value */
+			*valp = val;
+		} else {
+			struct net *net = current->nsproxy->net_ns;
+			ip_vs_sync_switch_mode(net, val);
+		}
+	}
+	return rc;
+}
 
 /*
  *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *	Do not change order or insert new entries without
+ *	align with netns init in __ip_vs_control_init()
  */
 
 static struct ctl_table vs_vars[] = {
 	{
 		.procname	= "amemthresh",
-		.data		= &sysctl_ip_vs_amemthresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_IP_VS_DEBUG
-	{
-		.procname	= "debug_level",
-		.data		= &sysctl_ip_vs_debug_level,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-#endif
 	{
 		.procname	= "am_droprate",
-		.data		= &sysctl_ip_vs_am_droprate,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "drop_entry",
-		.data		= &sysctl_ip_vs_drop_entry,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
 	{
 		.procname	= "drop_packet",
-		.data		= &sysctl_ip_vs_drop_packet,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = {
 #ifdef CONFIG_IP_VS_NFCT
 	{
 		.procname	= "conntrack",
-		.data		= &sysctl_ip_vs_conntrack,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = {
 #endif
 	{
 		.procname	= "secure_tcp",
-		.data		= &sysctl_ip_vs_secure_tcp,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
 	{
 		.procname	= "snat_reroute",
-		.data		= &sysctl_ip_vs_snat_reroute,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.procname	= "sync_version",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_mode,
+	},
+	{
+		.procname	= "cache_bypass",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_nodest_conn",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_quiescent_template",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_threshold",
+		.maxlen		=
+			sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= proc_do_sync_threshold,
+	},
+	{
+		.procname	= "nat_icmp_send",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_IP_VS_DEBUG
+	{
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 #if 0
 	{
 		.procname	= "timeout_established",
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
-	{
-		.procname	= "cache_bypass",
-		.data		= &sysctl_ip_vs_cache_bypass,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "expire_nodest_conn",
-		.data		= &sysctl_ip_vs_expire_nodest_conn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "expire_quiescent_template",
-		.data		= &sysctl_ip_vs_expire_quiescent_template,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "sync_threshold",
-		.data		= &sysctl_ip_vs_sync_threshold,
-		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
-		.mode		= 0644,
-		.proc_handler	= proc_do_sync_threshold,
-	},
-	{
-		.procname	= "nat_icmp_send",
-		.data		= &sysctl_ip_vs_nat_icmp_send,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{ }
 };
 
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
 
-static struct ctl_table_header * sysctl_header;
-
 #ifdef CONFIG_PROC_FS
 
 struct ip_vs_iter {
+	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
 	struct list_head *table;
 	int bucket;
 };
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
 /* Get the Nth entry in the two lists */
 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_vs_iter *iter = seq->private;
 	int idx;
 	struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	/* look in hash by protocol */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			if (pos-- == 0){
+			if (net_eq(svc->net, net) && pos-- == 0) {
 				iter->table = ip_vs_svc_table;
 				iter->bucket = idx;
 				return svc;
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	/* keep looking in fwmark */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			if (pos-- == 0) {
+			if (net_eq(svc->net, net) && pos-- == 0) {
 				iter->table = ip_vs_svc_fwm_table;
 				iter->bucket = idx;
 				return svc;
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
 
 static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &ip_vs_info_seq_ops,
+	return seq_open_net(inode, file, &ip_vs_info_seq_ops,
 			sizeof(struct ip_vs_iter));
 }
 
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = {
 
 #endif
 
-struct ip_vs_stats ip_vs_stats = {
-	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
+	struct net *net = seq_file_single_net(seq);
+	struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-	spin_lock_bh(&ip_vs_stats.lock);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-		   (unsigned long long) ip_vs_stats.ustats.inbytes,
-		   (unsigned long long) ip_vs_stats.ustats.outbytes);
+	spin_lock_bh(&tot_stats->lock);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
+		   tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
+		   (unsigned long long) tot_stats->ustats.inbytes,
+		   (unsigned long long) tot_stats->ustats.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
 		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
 	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-			ip_vs_stats.ustats.cps,
-			ip_vs_stats.ustats.inpps,
-			ip_vs_stats.ustats.outpps,
-			ip_vs_stats.ustats.inbps,
-			ip_vs_stats.ustats.outbps);
-	spin_unlock_bh(&ip_vs_stats.lock);
+			tot_stats->ustats.cps,
+			tot_stats->ustats.inpps,
+			tot_stats->ustats.outpps,
+			tot_stats->ustats.inbps,
+			tot_stats->ustats.outbps);
+	spin_unlock_bh(&tot_stats->lock);
 
 	return 0;
 }
 
 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ip_vs_stats_show, NULL);
+	return single_open_net(inode, file, ip_vs_stats_show);
 }
 
 static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,70 @@ static const struct file_operations ip_vs_stats_fops = {
 	.release = single_release,
 };
 
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+	struct net *net = seq_file_single_net(seq);
+	struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+	int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "       Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_printf(seq,
+		   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+		seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+			    i, u->ustats.conns, u->ustats.inpkts,
+			    u->ustats.outpkts, (__u64)u->ustats.inbytes,
+			    (__u64)u->ustats.outbytes);
+	}
+
+	spin_lock_bh(&tot_stats->lock);
+	seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
+		   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+		   tot_stats->ustats.outpkts,
+		   (unsigned long long) tot_stats->ustats.inbytes,
+		   (unsigned long long) tot_stats->ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
+			tot_stats->ustats.cps,
+			tot_stats->ustats.inpps,
+			tot_stats->ustats.outpps,
+			tot_stats->ustats.inbps,
+			tot_stats->ustats.outbps);
+	spin_unlock_bh(&tot_stats->lock);
+
+	return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+	.owner = THIS_MODULE,
+	.open = ip_vs_stats_percpu_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
 #endif
 
 /*
  *	Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 {
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+	struct ip_vs_proto_data *pd;
+#endif
+
 	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
 		  u->tcp_timeout,
 		  u->tcp_fin_timeout,
@@ -2010,19 +2073,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	if (u->tcp_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
 			= u->tcp_timeout * HZ;
 	}
 
 	if (u->tcp_fin_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
 			= u->tcp_fin_timeout * HZ;
 	}
 #endif
 
 #ifdef CONFIG_IP_VS_PROTO_UDP
 	if (u->udp_timeout) {
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+		pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+		pd->timeout_table[IP_VS_UDP_S_NORMAL]
 			= u->udp_timeout * HZ;
 	}
 #endif
@@ -2087,6 +2153,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
+	struct net *net = sock_net(sk);
 	int ret;
 	unsigned char arg[MAX_ARG_LEN];
 	struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2188,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	if (cmd == IP_VS_SO_SET_FLUSH) {
 		/* Flush the virtual service */
-		ret = ip_vs_flush();
+		ret = ip_vs_flush(net);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
 		/* Set timeout values for (tcp tcpfin udp) */
-		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+		ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+		ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+					dm->syncid);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = stop_sync_thread(dm->state);
+		ret = stop_sync_thread(net, dm->state);
 		goto out_unlock;
 	}
 
@@ -2148,7 +2216,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	if (cmd == IP_VS_SO_SET_ZERO) {
 		/* if no service address is set, zero counters in all */
 		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-			ret = ip_vs_zero_all();
+			ret = ip_vs_zero_all(net);
 			goto out_unlock;
 		}
 	}
@@ -2165,10 +2233,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
 					   &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
 
 	if (cmd != IP_VS_SO_SET_ADD
 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2249,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		if (svc != NULL)
 			ret = -EEXIST;
 		else
-			ret = ip_vs_add_service(&usvc, &svc);
+			ret = ip_vs_add_service(net, &usvc, &svc);
 		break;
 	case IP_VS_SO_SET_EDIT:
 		ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2309,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }
 
 static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+			    const struct ip_vs_get_services *get,
 			    struct ip_vs_get_services __user *uptr)
 {
 	int idx, count=0;
@@ -2252,7 +2321,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
 			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET)
+			if (svc->af != AF_INET || !net_eq(svc->net, net))
 				continue;
 
 			if (count >= get->num_services)
@@ -2271,7 +2340,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
 			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET)
+			if (svc->af != AF_INET || !net_eq(svc->net, net))
 				continue;
 
 			if (count >= get->num_services)
@@ -2291,7 +2360,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 }
 
 static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 			 struct ip_vs_get_dests __user *uptr)
 {
 	struct ip_vs_service *svc;
@@ -2299,9 +2368,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 	int ret = 0;
 
 	if (get->fwmark)
-		svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
 	else
-		svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
 					   get->port);
 
 	if (svc) {
@@ -2336,17 +2405,21 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 }
 
 static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 {
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+	struct ip_vs_proto_data *pd;
+#endif
+
 #ifdef CONFIG_IP_VS_PROTO_TCP
-	u->tcp_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-	u->tcp_fin_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
+	pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 	u->udp_timeout =
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
 #endif
 }
 
@@ -2375,7 +2448,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	unsigned char arg[128];
 	int ret = 0;
 	unsigned int copylen;
+	struct net *net = sock_net(sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	BUG_ON(!net);
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
@@ -2418,7 +2494,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		struct ip_vs_getinfo info;
 		info.version = IP_VS_VERSION_CODE;
 		info.size = ip_vs_conn_tab_size;
-		info.num_services = ip_vs_num_services;
+		info.num_services = ipvs->num_services;
 		if (copy_to_user(user, &info, sizeof(info)) != 0)
 			ret = -EFAULT;
 	}
@@ -2437,7 +2513,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_service_entries(get, user);
+		ret = __ip_vs_get_service_entries(net, get, user);
 	}
 	break;
 
@@ -2450,10 +2526,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		entry = (struct ip_vs_service_entry *)arg;
 		addr.ip = entry->addr;
 		if (entry->fwmark)
-			svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
 		else
-			svc = __ip_vs_service_find(AF_INET, entry->protocol,
-						   &addr, entry->port);
+			svc = __ip_vs_service_find(net, AF_INET,
+						   entry->protocol, &addr,
+						   entry->port);
 		if (svc) {
 			ip_vs_copy_service(entry, svc);
 			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2553,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_dest_entries(get, user);
+		ret = __ip_vs_get_dest_entries(net, get, user);
 	}
 	break;
 
@@ -2484,7 +2561,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_timeout_user t;
 
-		__ip_vs_get_timeouts(&t);
+		__ip_vs_get_timeouts(net, &t);
 		if (copy_to_user(user, &t, sizeof(t)) != 0)
 			ret = -EFAULT;
 	}
@@ -2495,15 +2572,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		struct ip_vs_daemon_user d[2];
 
 		memset(&d, 0, sizeof(d));
-		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
 			d[0].state = IP_VS_STATE_MASTER;
-			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-			d[0].syncid = ip_vs_master_syncid;
+			strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+				sizeof(d[0].mcast_ifn));
+			d[0].syncid = ipvs->master_syncid;
 		}
-		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
 			d[1].state = IP_VS_STATE_BACKUP;
-			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-			d[1].syncid = ip_vs_backup_syncid;
+			strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+				sizeof(d[1].mcast_ifn));
+			d[1].syncid = ipvs->backup_syncid;
 		}
 		if (copy_to_user(user, &d, sizeof(d)) != 0)
 			ret = -EFAULT;
@@ -2542,6 +2621,7 @@ static struct genl_family ip_vs_genl_family = {
 	.name		= IPVS_GENL_NAME,
 	.version	= IPVS_GENL_VERSION,
 	.maxattr	= IPVS_CMD_MAX,
+	.netnsok        = true,         /* Make ipvsadm to work on netns */
 };
 
 /* Policy used for first-level command attributes */
@@ -2696,11 +2776,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 	int idx = 0, i;
 	int start = cb->args[0];
 	struct ip_vs_service *svc;
+	struct net *net = skb_sknet(skb);
 
 	mutex_lock(&__ip_vs_mutex);
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-			if (++idx <= start)
+			if (++idx <= start || !net_eq(svc->net, net))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
 				idx--;
@@ -2711,7 +2792,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-			if (++idx <= start)
+			if (++idx <= start || !net_eq(svc->net, net))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
 				idx--;
@@ -2727,7 +2808,8 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+				    struct ip_vs_service_user_kern *usvc,
 				    struct nlattr *nla, int full_entry,
 				    struct ip_vs_service **ret_svc)
 {
@@ -2770,9 +2852,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 	}
 
 	if (usvc->fwmark)
-		svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
 	else
-		svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
 					   &usvc->addr, usvc->port);
 	*ret_svc = svc;
 
@@ -2809,13 +2891,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 	return 0;
 }
 
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+						     struct nlattr *nla)
 {
 	struct ip_vs_service_user_kern usvc;
 	struct ip_vs_service *svc;
 	int ret;
 
-	ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+	ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
 	return ret ? ERR_PTR(ret) : svc;
 }
 
@@ -2883,6 +2966,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 	struct ip_vs_service *svc;
 	struct ip_vs_dest *dest;
 	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+	struct net *net = skb_sknet(skb);
 
 	mutex_lock(&__ip_vs_mutex);
 
@@ -2891,7 +2975,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
 		goto out_err;
 
-	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+
+	svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
 	if (IS_ERR(svc) || svc == NULL)
 		goto out_err;
 
@@ -3005,20 +3090,23 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 				   struct netlink_callback *cb)
 {
+	struct net *net = skb_net(skb);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
 	mutex_lock(&__ip_vs_mutex);
-	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-					   ip_vs_master_mcast_ifn,
-					   ip_vs_master_syncid, cb) < 0)
+					   ipvs->master_mcast_ifn,
+					   ipvs->master_syncid, cb) < 0)
 			goto nla_put_failure;
 
 		cb->args[0] = 1;
 	}
 
-	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-					   ip_vs_backup_mcast_ifn,
-					   ip_vs_backup_syncid, cb) < 0)
+					   ipvs->backup_mcast_ifn,
+					   ipvs->backup_syncid, cb) < 0)
 			goto nla_put_failure;
 
 		cb->args[1] = 1;
@@ -3030,31 +3118,33 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 {
 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
 		return -EINVAL;
 
-	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+	return start_sync_thread(net,
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
 				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
 				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
 }
 
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 {
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 		return -EINVAL;
 
-	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	return stop_sync_thread(net,
+				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 }
 
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 {
 	struct ip_vs_timeout_user t;
 
-	__ip_vs_get_timeouts(&t);
+	__ip_vs_get_timeouts(net, &t);
 
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
 		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3156,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
 		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
 
-	return ip_vs_set_timeout(&t);
+	return ip_vs_set_timeout(net, &t);
 }
 
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3166,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct ip_vs_dest_user_kern udest;
 	int ret = 0, cmd;
 	int need_full_svc = 0, need_full_dest = 0;
+	struct net *net;
+	struct netns_ipvs *ipvs;
 
+	net = skb_sknet(skb);
+	ipvs = net_ipvs(net);
 	cmd = info->genlhdr->cmd;
 
 	mutex_lock(&__ip_vs_mutex);
 
 	if (cmd == IPVS_CMD_FLUSH) {
-		ret = ip_vs_flush();
+		ret = ip_vs_flush(net);
 		goto out;
 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
-		ret = ip_vs_genl_set_config(info->attrs);
+		ret = ip_vs_genl_set_config(net, info->attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
 		   cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3195,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		if (cmd == IPVS_CMD_NEW_DAEMON)
-			ret = ip_vs_genl_new_daemon(daemon_attrs);
+			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
 		else
-			ret = ip_vs_genl_del_daemon(daemon_attrs);
+			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_ZERO &&
 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-		ret = ip_vs_zero_all();
+		ret = ip_vs_zero_all(net);
 		goto out;
 	}
 
@@ -3117,7 +3211,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
 		need_full_svc = 1;
 
-	ret = ip_vs_genl_parse_service(&usvc,
+	ret = ip_vs_genl_parse_service(net, &usvc,
 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
 				       need_full_svc, &svc);
 	if (ret)
@@ -3147,7 +3241,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	switch (cmd) {
 	case IPVS_CMD_NEW_SERVICE:
 		if (svc == NULL)
-			ret = ip_vs_add_service(&usvc, &svc);
+			ret = ip_vs_add_service(net, &usvc, &svc);
 		else
 			ret = -EEXIST;
 		break;
@@ -3185,7 +3279,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	void *reply;
 	int ret, cmd, reply_cmd;
+	struct net *net;
+	struct netns_ipvs *ipvs;
 
+	net = skb_sknet(skb);
+	ipvs = net_ipvs(net);
 	cmd = info->genlhdr->cmd;
 
 	if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3312,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_service *svc;
 
-		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		svc = ip_vs_genl_find_service(net,
+					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
 		if (IS_ERR(svc)) {
 			ret = PTR_ERR(svc);
 			goto out_err;
@@ -3234,7 +3333,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_timeout_user t;
 
-		__ip_vs_get_timeouts(&t);
+		__ip_vs_get_timeouts(net, &t);
 #ifdef CONFIG_IP_VS_PROTO_TCP
 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3479,172 @@ static void ip_vs_genl_unregister(void)
 
 /* End of Generic Netlink interface definitions */
 
+/*
+ * per netns intit/exit func.
+ */
+int __net_init __ip_vs_control_init(struct net *net)
+{
+	int idx;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ctl_table *tbl;
+
+	atomic_set(&ipvs->dropentry, 0);
+	spin_lock_init(&ipvs->dropentry_lock);
+	spin_lock_init(&ipvs->droppacket_lock);
+	spin_lock_init(&ipvs->securetcp_lock);
+	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+	/* Initialize rs_table */
+	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+	INIT_LIST_HEAD(&ipvs->dest_trash);
+	atomic_set(&ipvs->ftpsvc_counter, 0);
+	atomic_set(&ipvs->nullsvc_counter, 0);
+
+	/* procfs stats */
+	ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+	if (ipvs->tot_stats == NULL) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!ipvs->cpustats) {
+		pr_err("%s() alloc_percpu failed\n", __func__);
+		goto err_alloc;
+	}
+	spin_lock_init(&ipvs->tot_stats->lock);
+
+	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+			     &ip_vs_stats_percpu_fops);
+
+	if (!net_eq(net, &init_net)) {
+		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_dup;
+	} else
+		tbl = vs_vars;
+	/* Initialize sysctl defaults */
+	idx = 0;
+	ipvs->sysctl_amemthresh = 1024;
+	tbl[idx++].data = &ipvs->sysctl_amemthresh;
+	ipvs->sysctl_am_droprate = 10;
+	tbl[idx++].data = &ipvs->sysctl_am_droprate;
+	tbl[idx++].data = &ipvs->sysctl_drop_entry;
+	tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+	tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+	ipvs->sysctl_snat_reroute = 1;
+	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+	ipvs->sysctl_sync_ver = 1;
+	tbl[idx++].data = &ipvs->sysctl_sync_ver;
+	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+	ipvs->sysctl_sync_threshold[0] = 3;
+	ipvs->sysctl_sync_threshold[1] = 50;
+	tbl[idx].data = &ipvs->sysctl_sync_threshold;
+	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+	ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
+						     tbl);
+	if (ipvs->sysctl_hdr == NULL)
+		goto err_reg;
+	ip_vs_new_estimator(net, ipvs->tot_stats);
+	ipvs->sysctl_tbl = tbl;
+	/* Schedule defense work */
+	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(tbl);
+err_dup:
+	free_percpu(ipvs->cpustats);
+err_alloc:
+	kfree(ipvs->tot_stats);
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_trash_cleanup(net);
+	ip_vs_kill_estimator(net, ipvs->tot_stats);
+	cancel_delayed_work_sync(&ipvs->defense_work);
+	cancel_work_sync(&ipvs->defense_work.work);
+	unregister_net_sysctl_table(ipvs->sysctl_hdr);
+	proc_net_remove(net, "ip_vs_stats_percpu");
+	proc_net_remove(net, "ip_vs_stats");
+	proc_net_remove(net, "ip_vs");
+	free_percpu(ipvs->cpustats);
+	kfree(ipvs->tot_stats);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+	.init = __ip_vs_control_init,
+	.exit = __ip_vs_control_cleanup,
+};
 
 int __init ip_vs_control_init(void)
 {
-	int ret;
 	int idx;
+	int ret;
 
 	EnterFunction(2);
 
-	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
 		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}
-	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+	ret = register_pernet_subsys(&ipvs_control_ops);
+	if (ret) {
+		pr_err("cannot register namespace.\n");
+		goto err;
 	}
-	smp_wmb();
+
+	smp_wmb();	/* Do we really need it now ? */
 
 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
 		pr_err("cannot register sockopt.\n");
-		return ret;
+		goto err_net;
 	}
 
 	ret = ip_vs_genl_register();
 	if (ret) {
 		pr_err("cannot register Generic Netlink interface.\n");
 		nf_unregister_sockopt(&ip_vs_sockopts);
-		return ret;
+		goto err_net;
 	}
 
-	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
-	ip_vs_new_estimator(&ip_vs_stats);
-
-	/* Hook the defense timer */
-	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
 	LeaveFunction(2);
 	return 0;
+
+err_net:
+	unregister_pernet_subsys(&ipvs_control_ops);
+err:
+	return ret;
 }
 
 
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	ip_vs_trash_cleanup();
-	cancel_delayed_work_sync(&defense_work);
-	cancel_work_sync(&defense_work.work);
-	ip_vs_kill_estimator(&ip_vs_stats);
-	unregister_sysctl_table(sysctl_header);
-	proc_net_remove(&init_net, "ip_vs_stats");
-	proc_net_remove(&init_net, "ip_vs");
+	unregister_pernet_subsys(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801962e..f560a05c965 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
- *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              Affected data: est_list and est_lock.
+ *              estimation_timer() runs with timer per netns.
+ *              get_stats()) do the per cpu summing.
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
  */
 
 
-static void estimation_timer(unsigned long arg);
+/*
+ * Make a summary from each cpu
+ */
+static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+				 struct ip_vs_cpu_stats *stats)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
+		unsigned int start;
+		__u64 inbytes, outbytes;
+		if (i) {
+			sum->conns += s->ustats.conns;
+			sum->inpkts += s->ustats.inpkts;
+			sum->outpkts += s->ustats.outpkts;
+			do {
+				start = u64_stats_fetch_begin_bh(&s->syncp);
+				inbytes = s->ustats.inbytes;
+				outbytes = s->ustats.outbytes;
+			} while (u64_stats_fetch_retry_bh(&s->syncp, start));
+			sum->inbytes += inbytes;
+			sum->outbytes += outbytes;
+		} else {
+			sum->conns = s->ustats.conns;
+			sum->inpkts = s->ustats.inpkts;
+			sum->outpkts = s->ustats.outpkts;
+			do {
+				start = u64_stats_fetch_begin_bh(&s->syncp);
+				sum->inbytes = s->ustats.inbytes;
+				sum->outbytes = s->ustats.outbytes;
+			} while (u64_stats_fetch_retry_bh(&s->syncp, start));
+		}
+	}
+}
 
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
 
 static void estimation_timer(unsigned long arg)
 {
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg)
 	u32 n_inpkts, n_outpkts;
 	u64 n_inbytes, n_outbytes;
 	u32 rate;
+	struct net *net = (struct net *)arg;
+	struct netns_ipvs *ipvs;
 
-	spin_lock(&est_lock);
-	list_for_each_entry(e, &est_list, list) {
+	ipvs = net_ipvs(net);
+	ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
+	spin_lock(&ipvs->est_lock);
+	list_for_each_entry(e, &ipvs->est_list, list) {
 		s = container_of(e, struct ip_vs_stats, est);
 
+		ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
 		spin_lock(&s->lock);
 		n_conns = s->ustats.conns;
 		n_inpkts = s->ustats.inpkts;
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg)
 		n_outbytes = s->ustats.outbytes;
 
 		/* scaled by 2^10, but divided 2 seconds */
-		rate = (n_conns - e->last_conns)<<9;
+		rate = (n_conns - e->last_conns) << 9;
 		e->last_conns = n_conns;
-		e->cps += ((long)rate - (long)e->cps)>>2;
-		s->ustats.cps = (e->cps+0x1FF)>>10;
+		e->cps += ((long)rate - (long)e->cps) >> 2;
+		s->ustats.cps = (e->cps + 0x1FF) >> 10;
 
-		rate = (n_inpkts - e->last_inpkts)<<9;
+		rate = (n_inpkts - e->last_inpkts) << 9;
 		e->last_inpkts = n_inpkts;
-		e->inpps += ((long)rate - (long)e->inpps)>>2;
-		s->ustats.inpps = (e->inpps+0x1FF)>>10;
+		e->inpps += ((long)rate - (long)e->inpps) >> 2;
+		s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
 
-		rate = (n_outpkts - e->last_outpkts)<<9;
+		rate = (n_outpkts - e->last_outpkts) << 9;
 		e->last_outpkts = n_outpkts;
-		e->outpps += ((long)rate - (long)e->outpps)>>2;
-		s->ustats.outpps = (e->outpps+0x1FF)>>10;
+		e->outpps += ((long)rate - (long)e->outpps) >> 2;
+		s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
 
-		rate = (n_inbytes - e->last_inbytes)<<4;
+		rate = (n_inbytes - e->last_inbytes) << 4;
 		e->last_inbytes = n_inbytes;
-		e->inbps += ((long)rate - (long)e->inbps)>>2;
-		s->ustats.inbps = (e->inbps+0xF)>>5;
+		e->inbps += ((long)rate - (long)e->inbps) >> 2;
+		s->ustats.inbps = (e->inbps + 0xF) >> 5;
 
-		rate = (n_outbytes - e->last_outbytes)<<4;
+		rate = (n_outbytes - e->last_outbytes) << 4;
 		e->last_outbytes = n_outbytes;
-		e->outbps += ((long)rate - (long)e->outbps)>>2;
-		s->ustats.outbps = (e->outbps+0xF)>>5;
+		e->outbps += ((long)rate - (long)e->outbps) >> 2;
+		s->ustats.outbps = (e->outbps + 0xF) >> 5;
 		spin_unlock(&s->lock);
 	}
-	spin_unlock(&est_lock);
-	mod_timer(&est_timer, jiffies + 2*HZ);
+	spin_unlock(&ipvs->est_lock);
+	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }
 
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
 	INIT_LIST_HEAD(&est->list);
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
 	est->last_outbytes = stats->ustats.outbytes;
 	est->outbps = stats->ustats.outbps<<5;
 
-	spin_lock_bh(&est_lock);
-	list_add(&est->list, &est_list);
-	spin_unlock_bh(&est_lock);
+	spin_lock_bh(&ipvs->est_lock);
+	list_add(&est->list, &ipvs->est_list);
+	spin_unlock_bh(&ipvs->est_lock);
 }
 
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
-	spin_lock_bh(&est_lock);
+	spin_lock_bh(&ipvs->est_lock);
 	list_del(&est->list);
-	spin_unlock_bh(&est_lock);
+	spin_unlock_bh(&ipvs->est_lock);
 }
 
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 	est->outbps = 0;
 }
 
-int __init ip_vs_estimator_init(void)
+static int __net_init __ip_vs_estimator_init(struct net *net)
 {
-	mod_timer(&est_timer, jiffies + 2 * HZ);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	INIT_LIST_HEAD(&ipvs->est_list);
+	spin_lock_init(&ipvs->est_lock);
+	setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
 	return 0;
 }
 
+static void __net_exit __ip_vs_estimator_exit(struct net *net)
+{
+	del_timer_sync(&net_ipvs(net)->est_timer);
+}
+static struct pernet_operations ip_vs_app_ops = {
+	.init = __ip_vs_estimator_init,
+	.exit = __ip_vs_estimator_exit,
+};
+
+int __init ip_vs_estimator_init(void)
+{
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_app_ops);
+	return rv;
+}
+
 void ip_vs_estimator_cleanup(void)
 {
-	del_timer_sync(&est_timer);
+	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 75455000ad1..6b5dd6ddaae 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	int ret = 0;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
+	struct net *net;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		{
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(AF_INET, iph->protocol,
-					      &from, port, &cp->caddr, 0, &p);
+			ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+					      iph->protocol, &from, port,
+					      &cp->caddr, 0, &p);
 			n_cp = ip_vs_conn_out_get(&p);
 		}
 		if (!n_cp) {
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+			ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+					      AF_INET, IPPROTO_TCP, &cp->caddr,
 					      0, &cp->vaddr, port, &p);
 			n_cp = ip_vs_conn_new(&p, &from, port,
 					      IP_VS_CONN_F_NO_CPORT |
 					      IP_VS_CONN_F_NFCT,
-					      cp->dest);
+					      cp->dest, skb->mark);
 			if (!n_cp)
 				return 0;
 
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 * would be adjusted twice.
 		 */
 
+		net = skb_net(skb);
 		cp->app_data = NULL;
-		ip_vs_tcp_conn_listen(n_cp);
+		ip_vs_tcp_conn_listen(net, n_cp);
 		ip_vs_conn_put(n_cp);
 		return ret;
 	}
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
+	struct net *net;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
 	{
 		struct ip_vs_conn_param p;
-		ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
-				      &cp->vaddr, htons(ntohs(cp->vport)-1),
-				      &p);
+		ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+				      iph->protocol, &to, port, &cp->vaddr,
+				      htons(ntohs(cp->vport)-1), &p);
 		n_cp = ip_vs_conn_in_get(&p);
 		if (!n_cp) {
 			n_cp = ip_vs_conn_new(&p, &cp->daddr,
 					      htons(ntohs(cp->dport)-1),
-					      IP_VS_CONN_F_NFCT, cp->dest);
+					      IP_VS_CONN_F_NFCT, cp->dest,
+					      skb->mark);
 			if (!n_cp)
 				return 0;
 
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	/*
 	 *	Move tunnel to listen state
 	 */
-	ip_vs_tcp_conn_listen(n_cp);
+	net = skb_net(skb);
+	ip_vs_tcp_conn_listen(net, n_cp);
 	ip_vs_conn_put(n_cp);
 
 	return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
 	.pkt_in =	ip_vs_ftp_in,
 };
 
-
 /*
- *	ip_vs_ftp initialization
+ *	per netns ip_vs_ftp initialization
  */
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
 {
 	int i, ret;
 	struct ip_vs_app *app = &ip_vs_ftp;
 
-	ret = register_ip_vs_app(app);
+	ret = register_ip_vs_app(net, app);
 	if (ret)
 		return ret;
 
 	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
 		if (!ports[i])
 			continue;
-		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+		ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
 		if (ret)
 			break;
 		pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
 	}
 
 	if (ret)
-		unregister_ip_vs_app(app);
+		unregister_ip_vs_app(net, app);
 
 	return ret;
 }
+/*
+ *	netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+	struct ip_vs_app *app = &ip_vs_ftp;
+
+	unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+	.init = __ip_vs_ftp_init,
+	.exit = __ip_vs_ftp_exit,
+};
 
+int __init ip_vs_ftp_init(void)
+{
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_ftp_ops);
+	return rv;
+}
 
 /*
  *	ip_vs_ftp finish.
  */
 static void __exit ip_vs_ftp_exit(void)
 {
-	unregister_ip_vs_app(&ip_vs_ftp);
+	unregister_pernet_subsys(&ip_vs_ftp_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9323f894419..d5bec337187 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
  *    entries that haven't been touched for a day.
  */
 #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
 
 
 /*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
 static ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblc_expiration",
-		.data		= &sysctl_ip_vs_lblc_expiration,
+		.data		= NULL,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
 	{ }
 };
 
-static struct ctl_table_header * sysctl_header;
-
 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
 	list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 	struct ip_vs_lblc_entry *en, *nxt;
 	unsigned long now = jiffies;
 	int i, j;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_before(now,
-					en->lastuse + sysctl_ip_vs_lblc_expiration))
+					en->lastuse +
+					ipvs->sysctl_lblc_expiration))
 				continue;
 
 			ip_vs_lblc_free(en);
@@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 	.schedule =		ip_vs_lblc_schedule,
 };
 
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblc_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!net_eq(net, &init_net)) {
+		ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+						sizeof(vs_vars_table),
+						GFP_KERNEL);
+		if (ipvs->lblc_ctl_table == NULL)
+			goto err_dup;
+	} else
+		ipvs->lblc_ctl_table = vs_vars_table;
+	ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+	ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+	ipvs->lblc_ctl_header =
+		register_net_sysctl_table(net, net_vs_ctl_path,
+					  ipvs->lblc_ctl_table);
+	if (!ipvs->lblc_ctl_header)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblc_ctl_table);
+
+err_dup:
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblc_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	unregister_net_sysctl_table(ipvs->lblc_ctl_header);
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblc_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblc_ops = {
+	.init = __ip_vs_lblc_init,
+	.exit = __ip_vs_lblc_exit,
+};
 
 static int __init ip_vs_lblc_init(void)
 {
 	int ret;
 
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_pernet_subsys(&ip_vs_lblc_ops);
+	if (ret)
+		return ret;
+
 	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	if (ret)
-		unregister_sysctl_table(sysctl_header);
+		unregister_pernet_subsys(&ip_vs_lblc_ops);
 	return ret;
 }
 
-
 static void __exit ip_vs_lblc_cleanup(void)
 {
-	unregister_sysctl_table(sysctl_header);
 	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+	unregister_pernet_subsys(&ip_vs_lblc_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index dbeed8ea421..61ae8cfcf0b 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -70,8 +70,6 @@
  *    entries that haven't been touched for a day.
  */
 #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
 
 /*
  *     for IPVS lblcr entry hash table
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table {
 static ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblcr_expiration",
-		.data		= &sysctl_ip_vs_lblcr_expiration,
+		.data		= NULL,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = {
 	{ }
 };
 
-static struct ctl_table_header * sysctl_header;
-
 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 {
 	list_del(&en->list);
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 	unsigned long now = jiffies;
 	int i, j;
 	struct ip_vs_lblcr_entry *en, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
 		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
-				       now))
+			if (time_after(en->lastuse
+					+ ipvs->sysctl_lblcr_expiration, now))
 				continue;
 
 			ip_vs_lblcr_free(en);
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	read_lock(&svc->sched_lock);
 	en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
 	if (en) {
+		struct netns_ipvs *ipvs = net_ipvs(svc->net);
 		/* We only hold a read lock, but this is atomic */
 		en->lastuse = jiffies;
 
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		/* More than one destination + enough time passed by, cleanup */
 		if (atomic_read(&en->set.size) > 1 &&
 				time_after(jiffies, en->set.lastmod +
-				sysctl_ip_vs_lblcr_expiration)) {
+				ipvs->sysctl_lblcr_expiration)) {
 			struct ip_vs_dest *m;
 
 			write_lock(&en->set.lock);
@@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 	.schedule =		ip_vs_lblcr_schedule,
 };
 
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblcr_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!net_eq(net, &init_net)) {
+		ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+						sizeof(vs_vars_table),
+						GFP_KERNEL);
+		if (ipvs->lblcr_ctl_table == NULL)
+			goto err_dup;
+	} else
+		ipvs->lblcr_ctl_table = vs_vars_table;
+	ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+	ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+	ipvs->lblcr_ctl_header =
+		register_net_sysctl_table(net, net_vs_ctl_path,
+					  ipvs->lblcr_ctl_table);
+	if (!ipvs->lblcr_ctl_header)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblcr_ctl_table);
+
+err_dup:
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblcr_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblcr_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblcr_ops = {
+	.init = __ip_vs_lblcr_init,
+	.exit = __ip_vs_lblcr_exit,
+};
 
 static int __init ip_vs_lblcr_init(void)
 {
 	int ret;
 
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+	if (ret)
+		return ret;
+
 	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	if (ret)
-		unregister_sysctl_table(sysctl_header);
+		unregister_pernet_subsys(&ip_vs_lblcr_ops);
 	return ret;
 }
 
-
 static void __exit ip_vs_lblcr_cleanup(void)
 {
-	unregister_sysctl_table(sysctl_header);
 	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+	unregister_pernet_subsys(&ip_vs_lblcr_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647cd45..f454c80df0a 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 	struct nf_conntrack_tuple *orig, new_reply;
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
+	struct net *net = nf_ct_net(ct);
 
 	if (exp->tuple.src.l3num != PF_INET)
 		return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 
 	/* RS->CLIENT */
 	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+	ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
 			      &orig->src.u3, orig->src.u.tcp.port,
 			      &orig->dst.u3, orig->dst.u.tcp.port, &p);
 	cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 		" for conn " FMT_CONN "\n",
 		__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
 
-	h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+	h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+				  &tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af70ee1..5cf859ccb31 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
 }
 
 /* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
 {
 	struct ip_vs_pe *pe;
 
-	IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+	IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
 		  pe_name);
 
 	spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
 }
 
 /* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
 {
 	struct ip_vs_pe *pe;
 
 	/* Search for the pe by name */
-	pe = ip_vs_pe_getbyname(name);
+	pe = __ip_vs_pe_getbyname(name);
 
 	/* If pe not found, load the module and search again */
 	if (!pe) {
 		request_module("ip_vs_pe_%s", name);
-		pe = ip_vs_pe_getbyname(name);
+		pe = __ip_vs_pe_getbyname(name);
 	}
 
 	return pe;
 }
 
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
-	if (pe && pe->module)
-		module_put(pe->module);
-}
-
 /* Register a pe in the pe list */
 int register_ip_vs_pe(struct ip_vs_pe *pe)
 {
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e9620f3..0d83bc01fed 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	struct ip_vs_iphdr iph;
 	unsigned int dataoff, datalen, matchoff, matchlen;
 	const char *dptr;
+	int retc;
 
 	ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
 
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	if (dataoff >= skb->len)
 		return -EINVAL;
 
+	if ((retc=skb_linearize(skb)) < 0)
+		return retc;
 	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index c5399839087..17484a4416e 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,35 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
 	return 0;
 }
 
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \
+    defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \
+    defined(CONFIG_IP_VS_PROTO_ESP)
+/*
+ *	register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+	struct ip_vs_proto_data *pd =
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+	if (!pd) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	pd->pp = pp;	/* For speed issues */
+	pd->next = ipvs->proto_data_table[hash];
+	ipvs->proto_data_table[hash] = pd;
+	atomic_set(&pd->appcnt, 0);	/* Init app counter */
+
+	if (pp->init_netns != NULL)
+		pp->init_netns(net, pd);
+
+	return 0;
+}
+#endif
 
 /*
  *	unregister an ipvs protocol
@@ -82,6 +111,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
 	return -ESRCH;
 }
 
+/*
+ *	unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data **pd_p;
+	unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+	pd_p = &ipvs->proto_data_table[hash];
+	for (; *pd_p; pd_p = &(*pd_p)->next) {
+		if (*pd_p == pd) {
+			*pd_p = pd->next;
+			if (pd->pp->exit_netns != NULL)
+				pd->pp->exit_netns(net, pd);
+			kfree(pd);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
 
 /*
  *	get ip_vs_protocol object by its proto.
@@ -100,19 +152,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
 }
 EXPORT_SYMBOL(ip_vs_proto_get);
 
+/*
+ *	get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+{
+	struct ip_vs_proto_data *pd;
+	unsigned hash = IP_VS_PROTO_HASH(proto);
+
+	for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+		if (pd->pp->protocol == proto)
+			return pd;
+	}
+
+	return NULL;
+}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	return __ipvs_proto_data_get(ipvs, proto);
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
 
 /*
  *	Propagate event for state change to all protocols
  */
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
 {
-	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	int i;
 
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-		for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
-			if (pp->timeout_change)
-				pp->timeout_change(pp, flags);
+		for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+			if (pd->pp->timeout_change)
+				pd->pp->timeout_change(pd, flags);
 		}
 	}
 }
@@ -236,6 +313,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
 }
 
+/*
+ * per network name-space init
+ */
+static int __net_init __ip_vs_protocol_init(struct net *net)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+	return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd;
+	int i;
+
+	/* unregister all the ipvs proto data for this netns */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pd = ipvs->proto_data_table[i]) != NULL)
+			unregister_ip_vs_proto_netns(net, pd);
+	}
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+	.init = __ip_vs_protocol_init,
+	.exit = __ip_vs_protocol_cleanup,
+};
 
 int __init ip_vs_protocol_init(void)
 {
@@ -265,6 +382,7 @@ int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
+	return register_pernet_subsys(&ipvs_proto_ops);
 
 	return 0;
 }
@@ -275,6 +393,7 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;
 
+	unregister_pernet_subsys(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a0461117d3..5b8eb8b12c3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@ struct isakmp_hdr {
 #define PORT_ISAKMP	500
 
 static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
-			     int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+			     const struct ip_vs_iphdr *iph, int inverse,
+			     struct ip_vs_conn_param *p)
 {
 	if (likely(!inverse))
-		ip_vs_conn_fill_param(af, IPPROTO_UDP,
+		ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
 				      &iph->saddr, htons(PORT_ISAKMP),
 				      &iph->daddr, htons(PORT_ISAKMP), p);
 	else
-		ip_vs_conn_fill_param(af, IPPROTO_UDP,
+		ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
 				      &iph->daddr, htons(PORT_ISAKMP),
 				      &iph->saddr, htons(PORT_ISAKMP), p);
 }
 
 static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
 		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
 		   int inverse)
 {
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
+	struct net *net = skb_net(skb);
 
-	ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
 	cp = ip_vs_conn_in_get(&p);
 	if (!cp) {
 		/*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
 			      "%s%s %s->%s\n",
 			      inverse ? "ICMP+" : "",
-			      pp->name,
+			      ip_vs_proto_get(iph->protocol)->name,
 			      IP_VS_DBG_ADDR(af, &iph->saddr),
 			      IP_VS_DBG_ADDR(af, &iph->daddr));
 	}
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 static struct ip_vs_conn *
 ah_esp_conn_out_get(int af, const struct sk_buff *skb,
-		    struct ip_vs_protocol *pp,
 		    const struct ip_vs_iphdr *iph,
 		    unsigned int proto_off,
 		    int inverse)
 {
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
+	struct net *net = skb_net(skb);
 
-	ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
 	cp = ip_vs_conn_out_get(&p);
 	if (!cp) {
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
 			      "%s%s %s->%s\n",
 			      inverse ? "ICMP+" : "",
-			      pp->name,
+			      ip_vs_proto_get(iph->protocol)->name,
 			      IP_VS_DBG_ADDR(af, &iph->saddr),
 			      IP_VS_DBG_ADDR(af, &iph->daddr));
 	}
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 
 
 static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		     int *verdict, struct ip_vs_conn **cpp)
 {
 	/*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	return 0;
 }
 
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
 #ifdef CONFIG_IP_VS_PROTO_AH
 struct ip_vs_protocol ip_vs_protocol_ah = {
 	.name =			"AH",
 	.protocol =		IPPROTO_AH,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
+	.init =			NULL,
+	.exit =			NULL,
 	.conn_schedule =	ah_esp_conn_schedule,
 	.conn_in_get =		ah_esp_conn_in_get,
 	.conn_out_get =		ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
 	.app_conn_bind =	NULL,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,		/* ISAKMP */
-	.set_state_timeout =	NULL,
 };
 #endif
 
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
 	.protocol =		IPPROTO_ESP,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
+	.init =			NULL,
+	.exit =			NULL,
 	.conn_schedule =	ah_esp_conn_schedule,
 	.conn_in_get =		ah_esp_conn_in_get,
 	.conn_out_get =		ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd342..fb2d04ac5d4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
 #include <net/ip_vs.h>
 
 static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		   int *verdict, struct ip_vs_conn **cpp)
 {
+	struct net *net;
 	struct ip_vs_service *svc;
 	sctp_chunkhdr_t _schunkh, *sch;
 	sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 				 sizeof(_schunkh), &_schunkh);
 	if (sch == NULL)
 		return 0;
-
+	net = skb_net(skb);
 	if ((sch->type == SCTP_CID_INIT) &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+	    (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
 				     &iph.daddr, sh->dest))) {
 		int ignored;
 
-		if (ip_vs_todrop()) {
+		if (ip_vs_todrop(net_ipvs(net))) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-		if (!*cpp && !ignored) {
-			*verdict = ip_vs_leave(svc, skb, pp);
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd);
+			else {
+				ip_vs_service_put(svc);
+				*verdict = NF_DROP;
+			}
 			return 0;
 		}
 		ip_vs_service_put(svc);
 	}
-
+	/* NF_ACCEPT */
 	return 1;
 }
 
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
 /*
  *      Timeout table[state]
  */
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
 	[IP_VS_SCTP_S_NONE]         =     2 * HZ,
 	[IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
 	[IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
 	return "?";
 }
 
-static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-}
-
-static int
-sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-
-return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
-				sctp_state_name_table, sname, to);
-}
-
 static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 		int direction, const struct sk_buff *skb)
 {
 	sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 
 		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
 				"%s:%d state: %s->%s conn->refcnt:%d\n",
-				pp->name,
+				pd->pp->name,
 				((direction == IP_VS_DIR_OUTPUT) ?
 				 "output " : "input "),
 				IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 			}
 		}
 	}
+	if (likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = next_state];
+	else	/* What to do ? */
+		cp->timeout = sctp_timeouts[cp->state = next_state];
 
-	 cp->timeout = pp->timeout_table[cp->state = next_state];
-
-	 return 1;
+	return 1;
 }
 
 static int
 sctp_state_transition(struct ip_vs_conn *cp, int direction,
-		const struct sk_buff *skb, struct ip_vs_protocol *pp)
+		const struct sk_buff *skb, struct ip_vs_proto_data *pd)
 {
 	int ret = 0;
 
 	spin_lock(&cp->lock);
-	ret = set_sctp_state(pp, cp, direction, skb);
+	ret = set_sctp_state(pd, cp, direction, skb);
 	spin_unlock(&cp->lock);
 
 	return ret;
 }
 
-/*
- *      Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS        4
-#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
 static inline __u16 sctp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
 		& SCTP_APP_TAB_MASK;
 }
 
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
 
 	hash = sctp_app_hashkey(port);
 
-	spin_lock_bh(&sctp_app_lock);
-	list_for_each_entry(i, &sctp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->sctp_app_lock);
+	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &sctp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_sctp.appcnt);
+	list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 out:
-	spin_unlock_bh(&sctp_app_lock);
+	spin_unlock_bh(&ipvs->sctp_app_lock);
 
 	return ret;
 }
 
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&sctp_app_lock);
-	atomic_dec(&ip_vs_protocol_sctp.appcnt);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+	spin_lock_bh(&ipvs->sctp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&sctp_app_lock);
+	spin_unlock_bh(&ipvs->sctp_app_lock);
 }
 
 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = sctp_app_hashkey(cp->vport);
 
-	spin_lock(&sctp_app_lock);
-	list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+	spin_lock(&ipvs->sctp_app_lock);
+	list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&sctp_app_lock);
+			spin_unlock(&ipvs->sctp_app_lock);
 
 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
 					"%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&sctp_app_lock);
+	spin_unlock(&ipvs->sctp_app_lock);
 out:
 	return result;
 }
 
-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(sctp_apps);
-	pp->timeout_table = sctp_timeouts;
-}
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+	spin_lock_init(&ipvs->tcp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
+							sizeof(sctp_timeouts));
+}
 
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
-
+	kfree(pd->timeout_table);
 }
 
 struct ip_vs_protocol ip_vs_protocol_sctp = {
-	.name = "SCTP",
-	.protocol = IPPROTO_SCTP,
-	.num_states = IP_VS_SCTP_S_LAST,
-	.dont_defrag = 0,
-	.appcnt = ATOMIC_INIT(0),
-	.init = ip_vs_sctp_init,
-	.exit = ip_vs_sctp_exit,
-	.register_app = sctp_register_app,
+	.name		= "SCTP",
+	.protocol	= IPPROTO_SCTP,
+	.num_states	= IP_VS_SCTP_S_LAST,
+	.dont_defrag	= 0,
+	.init		= NULL,
+	.exit		= NULL,
+	.init_netns	= __ip_vs_sctp_init,
+	.exit_netns	= __ip_vs_sctp_exit,
+	.register_app	= sctp_register_app,
 	.unregister_app = sctp_unregister_app,
-	.conn_schedule = sctp_conn_schedule,
-	.conn_in_get = ip_vs_conn_in_get_proto,
-	.conn_out_get = ip_vs_conn_out_get_proto,
-	.snat_handler = sctp_snat_handler,
-	.dnat_handler = sctp_dnat_handler,
-	.csum_check = sctp_csum_check,
-	.state_name = sctp_state_name,
+	.conn_schedule	= sctp_conn_schedule,
+	.conn_in_get	= ip_vs_conn_in_get_proto,
+	.conn_out_get	= ip_vs_conn_out_get_proto,
+	.snat_handler	= sctp_snat_handler,
+	.dnat_handler	= sctp_dnat_handler,
+	.csum_check	= sctp_csum_check,
+	.state_name	= sctp_state_name,
 	.state_transition = sctp_state_transition,
-	.app_conn_bind = sctp_app_conn_bind,
-	.debug_packet = ip_vs_tcpudp_debug_packet,
-	.timeout_change = sctp_timeout_change,
-	.set_state_timeout = sctp_set_state_timeout,
+	.app_conn_bind	= sctp_app_conn_bind,
+	.debug_packet	= ip_vs_tcpudp_debug_packet,
+	.timeout_change	= NULL,
 };
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200e214..c0cc341b840 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              tcp_timeouts table has copy per netns in a hash table per
+ *              protocol ip_vs_proto_data and is handled by netns
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
 #include <net/ip_vs.h>
 
 static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp)
 {
+	struct net *net;
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
 	struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		*verdict = NF_DROP;
 		return 0;
 	}
-
+	net = skb_net(skb);
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
 	if (th->syn &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
-				     th->dest))) {
+	    (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+				     &iph.daddr, th->dest))) {
 		int ignored;
 
-		if (ip_vs_todrop()) {
+		if (ip_vs_todrop(net_ipvs(net))) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-		if (!*cpp && !ignored) {
-			*verdict = ip_vs_leave(svc, skb, pp);
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd);
+			else {
+				ip_vs_service_put(svc);
+				*verdict = NF_DROP;
+			}
 			return 0;
 		}
 		ip_vs_service_put(svc);
 	}
+	/* NF_ACCEPT */
 	return 1;
 }
 
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
 /*
  *	Timeout table[state]
  */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	2*HZ,
 	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
 	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
 {
 	int on = (flags & 1);		/* secure_tcp */
 
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
 	** for most if not for all of the applications. Something
 	** like "capabilities" (flags) for each object.
 	*/
-	tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
-				       tcp_state_name_table, sname, to);
+	pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
 }
 
 static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
 }
 
 static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	      int direction, struct tcphdr *th)
 {
 	int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		goto tcp_state_out;
 	}
 
-	new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+	new_state =
+		pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
 
   tcp_state_out:
 	if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 
 		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
 			      "%s:%d state: %s->%s conn->refcnt:%d\n",
-			      pp->name,
+			      pd->pp->name,
 			      ((state_off == TCP_DIR_OUTPUT) ?
 			       "output " : "input "),
 			      th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		}
 	}
 
-	cp->timeout = pp->timeout_table[cp->state = new_state];
+	if (likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = new_state];
+	else	/* What to do ? */
+		cp->timeout = tcp_timeouts[cp->state = new_state];
 }
 
-
 /*
  *	Handle state transitions
  */
 static int
 tcp_state_transition(struct ip_vs_conn *cp, int direction,
 		     const struct sk_buff *skb,
-		     struct ip_vs_protocol *pp)
+		     struct ip_vs_proto_data *pd)
 {
 	struct tcphdr _tcph, *th;
 
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 		return 0;
 
 	spin_lock(&cp->lock);
-	set_tcp_state(pp, cp, direction, th);
+	set_tcp_state(pd, cp, direction, th);
 	spin_unlock(&cp->lock);
 
 	return 1;
 }
 
-
-/*
- *	Hash table for TCP application incarnations
- */
-#define	TCP_APP_TAB_BITS	4
-#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
-#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
 static inline __u16 tcp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
 }
 
 
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
 
 	hash = tcp_app_hashkey(port);
 
-	spin_lock_bh(&tcp_app_lock);
-	list_for_each_entry(i, &tcp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->tcp_app_lock);
+	list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &tcp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_tcp.appcnt);
+	list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 
   out:
-	spin_unlock_bh(&tcp_app_lock);
+	spin_unlock_bh(&ipvs->tcp_app_lock);
 	return ret;
 }
 
 
 static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&tcp_app_lock);
-	atomic_dec(&ip_vs_protocol_tcp.appcnt);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+	spin_lock_bh(&ipvs->tcp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&tcp_app_lock);
+	spin_unlock_bh(&ipvs->tcp_app_lock);
 }
 
 
 static int
 tcp_app_conn_bind(struct ip_vs_conn *cp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = tcp_app_hashkey(cp->vport);
 
-	spin_lock(&tcp_app_lock);
-	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+	spin_lock(&ipvs->tcp_app_lock);
+	list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&tcp_app_lock);
+			spin_unlock(&ipvs->tcp_app_lock);
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&tcp_app_lock);
+	spin_unlock(&ipvs->tcp_app_lock);
 
   out:
 	return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 /*
  *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
 {
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
 	spin_lock(&cp->lock);
 	cp->state = IP_VS_TCP_S_LISTEN;
-	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+	cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+			   : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
 	spin_unlock(&cp->lock);
 }
 
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(tcp_apps);
-	pp->timeout_table = tcp_timeouts;
-}
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+	spin_lock_init(&ipvs->tcp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+							sizeof(tcp_timeouts));
+	pd->tcp_state_table =  tcp_states;
+}
 
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+	kfree(pd->timeout_table);
 }
 
 
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.protocol =		IPPROTO_TCP,
 	.num_states =		IP_VS_TCP_S_LAST,
 	.dont_defrag =		0,
-	.appcnt =		ATOMIC_INIT(0),
-	.init =			ip_vs_tcp_init,
-	.exit =			ip_vs_tcp_exit,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__ip_vs_tcp_init,
+	.exit_netns =		__ip_vs_tcp_exit,
 	.register_app =		tcp_register_app,
 	.unregister_app =	tcp_unregister_app,
 	.conn_schedule =	tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.app_conn_bind =	tcp_app_conn_bind,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	tcp_timeout_change,
-	.set_state_timeout =	tcp_set_state_timeout,
 };
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a06bb0..f1282cbe6fe 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
  *
  */
 
@@ -28,9 +29,10 @@
 #include <net/ip6_checksum.h>
 
 static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp)
 {
+	struct net *net;
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
 	struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		*verdict = NF_DROP;
 		return 0;
 	}
-
-	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+	net = skb_net(skb);
+	svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
 				&iph.daddr, uh->dest);
 	if (svc) {
 		int ignored;
 
-		if (ip_vs_todrop()) {
+		if (ip_vs_todrop(net_ipvs(net))) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-		if (!*cpp && !ignored) {
-			*verdict = ip_vs_leave(svc, skb, pp);
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd);
+			else {
+				ip_vs_service_put(svc);
+				*verdict = NF_DROP;
+			}
 			return 0;
 		}
 		ip_vs_service_put(svc);
 	}
+	/* NF_ACCEPT */
 	return 1;
 }
 
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 	return 1;
 }
 
-
-/*
- *	Note: the caller guarantees that only one of register_app,
- *	unregister_app or app_conn_bind is called each time.
- */
-
-#define	UDP_APP_TAB_BITS	4
-#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
-#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
 static inline __u16 udp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
 }
 
 
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 
 	hash = udp_app_hashkey(port);
 
 
-	spin_lock_bh(&udp_app_lock);
-	list_for_each_entry(i, &udp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->udp_app_lock);
+	list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &udp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_udp.appcnt);
+	list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 
   out:
-	spin_unlock_bh(&udp_app_lock);
+	spin_unlock_bh(&ipvs->udp_app_lock);
 	return ret;
 }
 
 
 static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&udp_app_lock);
-	atomic_dec(&ip_vs_protocol_udp.appcnt);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	spin_lock_bh(&ipvs->udp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&udp_app_lock);
+	spin_unlock_bh(&ipvs->udp_app_lock);
 }
 
 
 static int udp_app_conn_bind(struct ip_vs_conn *cp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = udp_app_hashkey(cp->vport);
 
-	spin_lock(&udp_app_lock);
-	list_for_each_entry(inc, &udp_apps[hash], p_list) {
+	spin_lock(&ipvs->udp_app_lock);
+	list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&udp_app_lock);
+			spin_unlock(&ipvs->udp_app_lock);
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&udp_app_lock);
+	spin_unlock(&ipvs->udp_app_lock);
 
   out:
 	return result;
 }
 
 
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
 	[IP_VS_UDP_S_LAST]		=	2*HZ,
 };
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_LAST]		=	"BUG!",
 };
 
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-				       udp_state_name_table, sname, to);
-}
-
 static const char * udp_state_name(int state)
 {
 	if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
 static int
 udp_state_transition(struct ip_vs_conn *cp, int direction,
 		     const struct sk_buff *skb,
-		     struct ip_vs_protocol *pp)
+		     struct ip_vs_proto_data *pd)
 {
-	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+	if (unlikely(!pd)) {
+		pr_err("UDP no ns data\n");
+		return 0;
+	}
+
+	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 	return 1;
 }
 
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(udp_apps);
-	pp->timeout_table = udp_timeouts;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+	spin_lock_init(&ipvs->udp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+							sizeof(udp_timeouts));
 }
 
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+	kfree(pd->timeout_table);
 }
 
 
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.protocol =		IPPROTO_UDP,
 	.num_states =		IP_VS_UDP_S_LAST,
 	.dont_defrag =		0,
-	.init =			udp_init,
-	.exit =			udp_exit,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__udp_init,
+	.exit_netns =		__udp_exit,
 	.conn_schedule =	udp_conn_schedule,
 	.conn_in_get =		ip_vs_conn_in_get_proto,
 	.conn_out_get =		ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.app_conn_bind =	udp_app_conn_bind,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,
-	.set_state_timeout =	udp_set_state_timeout,
 };
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aedea17..d5a6e640ea4 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
+ * Version 1,   is capable of handling both version 0 and 1 messages.
+ *              Version 0 is the plain old format.
+ *              Note Version 0 receivers will just drop Ver 1 messages.
+ *              Version 1 is capable of handle IPv6, Persistence data,
+ *              time-outs, and firewall marks.
+ *              In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ *              Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions  Message: is a complete datagram
+ *              Sync_conn: is a part of a Message
+ *              Param Data is an option to a Sync_conn.
+ *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  * ip_vs_sync:  sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
  *	Alexandre Cassen	:	Added SyncID support for incoming sync
  *					messages filtering.
  *	Justin Ossevoort	:	Fix endian problem on sync message size.
+ *	Hans Schillstrom	:	Added Version 1: i.e. IPv6,
+ *					Persistence support, fwmark and time-out.
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
 #include <linux/wait.h>
 #include <linux/kernel.h>
 
+#include <asm/unaligned.h>		/* Used for ntoh_seq and hton_seq */
+
 #include <net/ip.h>
 #include <net/sock.h>
 
@@ -43,11 +59,13 @@
 #define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
 #define IP_VS_SYNC_PORT  8848          /* multicast port */
 
+#define SYNC_PROTO_VER  1		/* Protocol version in header */
 
 /*
  *	IPVS sync connection entry
+ *	Version 0, i.e. original version.
  */
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
 	__u8			reserved;
 
 	/* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
 	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 };
 
+/*
+     Sync Connection format (sync_conn)
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |    Type       |    Protocol   | Ver.  |        Size           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             Flags                             |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            State              |         cport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            vport              |         dport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             fwmark                            |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             timeout  (in sec.)                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                              ...                              |
+      |                        IP-Addresses  (v4 or v6)               |
+      |                              ...                              |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  Optional Parameters.
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | Param. Type    | Param. Length |   Param. data                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
+      |                              ...                              |
+      |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                               | Param Type    | Param. Length |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                           Param  data                         |
+      |         Last Param data should be padded for 32 bit alignment |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ *  Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+	__u8			type;
+	__u8			protocol;	/* Which protocol (TCP/UDP) */
+	__be16			ver_size;	/* Version msb 4 bits */
+	/* Flags and state transition */
+	__be32			flags;		/* status flags */
+	__be16			state;		/* state info 	*/
+	/* Protocol, addresses and port numbers */
+	__be16			cport;
+	__be16			vport;
+	__be16			dport;
+	__be32			fwmark;		/* Firewall mark from skb */
+	__be32			timeout;	/* cp timeout */
+	__be32			caddr;		/* client address */
+	__be32			vaddr;		/* virtual address */
+	__be32			daddr;		/* destination address */
+	/* The sequence options start here */
+	/* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+	__u8			type;
+	__u8			protocol;	/* Which protocol (TCP/UDP) */
+	__be16			ver_size;	/* Version msb 4 bits */
+	/* Flags and state transition */
+	__be32			flags;		/* status flags */
+	__be16			state;		/* state info 	*/
+	/* Protocol, addresses and port numbers */
+	__be16			cport;
+	__be16			vport;
+	__be16			dport;
+	__be32			fwmark;		/* Firewall mark from skb */
+	__be32			timeout;	/* cp timeout */
+	struct in6_addr		caddr;		/* client address */
+	struct in6_addr		vaddr;		/* virtual address */
+	struct in6_addr		daddr;		/* destination address */
+	/* The sequence options start here */
+	/* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+	struct ip_vs_sync_v4	v4;
+	struct ip_vs_sync_v6	v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6		0
+#define STYPE_F_INET6		(1 << STYPE_INET6)
+
+#define SVER_SHIFT		12		/* Shift to get version */
+#define SVER_MASK		0x0fff		/* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA	1
+#define IPVS_OPT_PE_DATA	2
+#define IPVS_OPT_PE_NAME	3
+#define IPVS_OPT_PARAM		7
+
+#define IPVS_OPT_F_SEQ_DATA	(1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA	(1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME	(1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM	(1 << (IPVS_OPT_PARAM-1))
+
 struct ip_vs_sync_thread_data {
+	struct net *net;
 	struct socket *sock;
 	char *buf;
 };
 
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn_v0))
 #define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
 
 
 /*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
+  The master mulitcasts messages (Datagrams) to the backup load balancers
+  in the following format.
+
+ Version 1:
+  Note, first byte should be Zero, so ver 0 receivers will drop the packet.
 
        0                   1                   2                   3
        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
+      |      0        |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    Version    |    Reserved, set to Zero      |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (1)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                            .                                  |
-      |                            .                                  |
+      ~                            .                                  ~
       |                            .                                  |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (n)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                    IPVS Sync Connection (1)                   |
 */
 
 #define SYNC_MESG_HEADER_LEN	4
 #define MAX_CONNS_PER_SYNCBUFF	255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
 
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
 	__u8                    nr_conns;
 	__u8                    syncid;
 	__u16                   size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
 	/* ip_vs_sync_conn entries start here */
 };
 
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+	__u8			reserved;	/* must be zero */
+	__u8			syncid;
+	__u16			size;
+	__u8			nr_conns;
+	__s8			version;	/* SYNC_PROTO_VER  */
+	__u16			spare;
+	/* ip_vs_sync_conn entries start here */
+};
 
 struct ip_vs_sync_buff {
 	struct list_head        list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
 	unsigned char           *end;
 };
 
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
 /* multicast addr */
 static struct sockaddr_in mcast_addr = {
 	.sin_family		= AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
 	.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
 };
 
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+	ho->init_seq       = get_unaligned_be32(&no->init_seq);
+	ho->delta          = get_unaligned_be32(&no->delta);
+	ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+	put_unaligned_be32(ho->init_seq, &no->init_seq);
+	put_unaligned_be32(ho->delta, &no->delta);
+	put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
 
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
 {
 	struct ip_vs_sync_buff *sb;
 
-	spin_lock_bh(&ip_vs_sync_lock);
-	if (list_empty(&ip_vs_sync_queue)) {
+	spin_lock_bh(&ipvs->sync_lock);
+	if (list_empty(&ipvs->sync_queue)) {
 		sb = NULL;
 	} else {
-		sb = list_entry(ip_vs_sync_queue.next,
+		sb = list_entry(ipvs->sync_queue.next,
 				struct ip_vs_sync_buff,
 				list);
 		list_del(&sb->list);
 	}
-	spin_unlock_bh(&ip_vs_sync_lock);
+	spin_unlock_bh(&ipvs->sync_lock);
 
 	return sb;
 }
 
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
 {
 	struct ip_vs_sync_buff *sb;
 
 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 		return NULL;
 
-	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+	sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+	if (!sb->mesg) {
 		kfree(sb);
 		return NULL;
 	}
+	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zeo now */
+	sb->mesg->version = SYNC_PROTO_VER;
+	sb->mesg->syncid = ipvs->master_syncid;
+	sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
 	sb->mesg->nr_conns = 0;
-	sb->mesg->syncid = ip_vs_master_syncid;
-	sb->mesg->size = 4;
-	sb->head = (unsigned char *)sb->mesg + 4;
-	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+	sb->mesg->spare = 0;
+	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
+	sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+
 	sb->firstuse = jiffies;
 	return sb;
 }
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
 	kfree(sb);
 }
 
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
 {
-	spin_lock(&ip_vs_sync_lock);
-	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-		list_add_tail(&sb->list, &ip_vs_sync_queue);
+	struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+	spin_lock(&ipvs->sync_lock);
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		list_add_tail(&sb->list, &ipvs->sync_queue);
 	else
 		ip_vs_sync_buff_release(sb);
-	spin_unlock(&ip_vs_sync_lock);
+	spin_unlock(&ipvs->sync_lock);
 }
 
 /*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
  *	than the specified time or the specified time is zero.
  */
 static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
 {
 	struct ip_vs_sync_buff *sb;
 
-	spin_lock_bh(&curr_sb_lock);
-	if (curr_sb && (time == 0 ||
-			time_before(jiffies - curr_sb->firstuse, time))) {
-		sb = curr_sb;
-		curr_sb = NULL;
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	if (ipvs->sync_buff && (time == 0 ||
+	    time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+		sb = ipvs->sync_buff;
+		ipvs->sync_buff = NULL;
 	} else
 		sb = NULL;
-	spin_unlock_bh(&curr_sb_lock);
+	spin_unlock_bh(&ipvs->sync_buff_lock);
 	return sb;
 }
 
+/*
+ * Switch mode from sending version 0 or 1
+ *  - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!ipvs->sync_state & IP_VS_STATE_MASTER)
+		return;
+	if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+		return;
+
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	/* Buffer empty ? then let buf_create do the job  */
+	if (ipvs->sync_buff->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
+		kfree(ipvs->sync_buff);
+		ipvs->sync_buff = NULL;
+	} else {
+		spin_lock_bh(&ipvs->sync_lock);
+		if (ipvs->sync_state & IP_VS_STATE_MASTER)
+			list_add_tail(&ipvs->sync_buff->list,
+				      &ipvs->sync_queue);
+		else
+			ip_vs_sync_buff_release(ipvs->sync_buff);
+		spin_unlock_bh(&ipvs->sync_lock);
+	}
+	spin_unlock_bh(&ipvs->sync_buff_lock);
+}
 
 /*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+{
+	struct ip_vs_sync_buff *sb;
+	struct ip_vs_sync_mesg_v0 *mesg;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+	if (!sb->mesg) {
+		kfree(sb);
+		return NULL;
+	}
+	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+	mesg->nr_conns = 0;
+	mesg->syncid = ipvs->master_syncid;
+	mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+	sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+/*
+ *      Version 0 , could be switched in by sys_ctl.
  *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
  */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
 {
-	struct ip_vs_sync_mesg *m;
-	struct ip_vs_sync_conn *s;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_mesg_v0 *m;
+	struct ip_vs_sync_conn_v0 *s;
 	int len;
 
-	spin_lock(&curr_sb_lock);
-	if (!curr_sb) {
-		if (!(curr_sb=ip_vs_sync_buff_create())) {
-			spin_unlock(&curr_sb_lock);
+	if (unlikely(cp->af != AF_INET))
+		return;
+	/* Do not sync ONE PACKET */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return;
+
+	spin_lock(&ipvs->sync_buff_lock);
+	if (!ipvs->sync_buff) {
+		ipvs->sync_buff =
+			ip_vs_sync_buff_create_v0(ipvs);
+		if (!ipvs->sync_buff) {
+			spin_unlock(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
 	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
 		SIMPLE_CONN_SIZE;
-	m = curr_sb->mesg;
-	s = (struct ip_vs_sync_conn *)curr_sb->head;
+	m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+	s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
 
 	/* copy members */
+	s->reserved = 0;
 	s->protocol = cp->protocol;
 	s->cport = cp->cport;
 	s->vport = cp->vport;
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
 	m->nr_conns++;
 	m->size += len;
-	curr_sb->head += len;
+	ipvs->sync_buff->head += len;
 
 	/* check if there is a space for next one */
-	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-		sb_queue_tail(curr_sb);
-		curr_sb = NULL;
+	if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+		sb_queue_tail(ipvs);
+		ipvs->sync_buff = NULL;
 	}
-	spin_unlock(&curr_sb_lock);
+	spin_unlock(&ipvs->sync_buff_lock);
 
 	/* synchronize its controller if it has */
 	if (cp->control)
-		ip_vs_sync_conn(cp->control);
+		ip_vs_sync_conn(net, cp->control);
+}
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ *      Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_mesg *m;
+	union ip_vs_sync_conn *s;
+	__u8 *p;
+	unsigned int len, pe_name_len, pad;
+
+	/* Handle old version of the protocol */
+	if (ipvs->sysctl_sync_ver == 0) {
+		ip_vs_sync_conn_v0(net, cp);
+		return;
+	}
+	/* Do not sync ONE PACKET */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		goto control;
+sloop:
+	/* Sanity checks */
+	pe_name_len = 0;
+	if (cp->pe_data_len) {
+		if (!cp->pe_data || !cp->dest) {
+			IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+			return;
+		}
+		pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+	}
+
+	spin_lock(&ipvs->sync_buff_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		len = sizeof(struct ip_vs_sync_v6);
+	else
+#endif
+		len = sizeof(struct ip_vs_sync_v4);
+
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+		len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+	if (cp->pe_data_len)
+		len += cp->pe_data_len + 2;	/* + Param hdr field */
+	if (pe_name_len)
+		len += pe_name_len + 2;
+
+	/* check if there is a space for this one  */
+	pad = 0;
+	if (ipvs->sync_buff) {
+		pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+		if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+			sb_queue_tail(ipvs);
+			ipvs->sync_buff = NULL;
+			pad = 0;
+		}
+	}
+
+	if (!ipvs->sync_buff) {
+		ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+		if (!ipvs->sync_buff) {
+			spin_unlock(&ipvs->sync_buff_lock);
+			pr_err("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+	}
+
+	m = ipvs->sync_buff->mesg;
+	p = ipvs->sync_buff->head;
+	ipvs->sync_buff->head += pad + len;
+	m->size += pad + len;
+	/* Add ev. padding from prev. sync_conn */
+	while (pad--)
+		*(p++) = 0;
+
+	s = (union ip_vs_sync_conn *)p;
+
+	/* Set message type  & copy members */
+	s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+	s->v4.ver_size = htons(len & SVER_MASK);	/* Version 0 */
+	s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->v4.state = htons(cp->state);
+	s->v4.protocol = cp->protocol;
+	s->v4.cport = cp->cport;
+	s->v4.vport = cp->vport;
+	s->v4.dport = cp->dport;
+	s->v4.fwmark = htonl(cp->fwmark);
+	s->v4.timeout = htonl(cp->timeout / HZ);
+	m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6) {
+		p += sizeof(struct ip_vs_sync_v6);
+		ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+		ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+		ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+	} else
+#endif
+	{
+		p += sizeof(struct ip_vs_sync_v4);	/* options ptr */
+		s->v4.caddr = cp->caddr.ip;
+		s->v4.vaddr = cp->vaddr.ip;
+		s->v4.daddr = cp->daddr.ip;
+	}
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		*(p++) = IPVS_OPT_SEQ_DATA;
+		*(p++) = sizeof(struct ip_vs_sync_conn_options);
+		hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+		p += sizeof(struct ip_vs_seq);
+		hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+		p += sizeof(struct ip_vs_seq);
+	}
+	/* Handle pe data */
+	if (cp->pe_data_len && cp->pe_data) {
+		*(p++) = IPVS_OPT_PE_DATA;
+		*(p++) = cp->pe_data_len;
+		memcpy(p, cp->pe_data, cp->pe_data_len);
+		p += cp->pe_data_len;
+		if (pe_name_len) {
+			/* Add PE_NAME */
+			*(p++) = IPVS_OPT_PE_NAME;
+			*(p++) = pe_name_len;
+			memcpy(p, cp->pe->name, pe_name_len);
+			p += pe_name_len;
+		}
+	}
+
+	spin_unlock(&ipvs->sync_buff_lock);
+
+control:
+	/* synchronize its controller if it has */
+	cp = cp->control;
+	if (!cp)
+		return;
+	/*
+	 * Reduce sync rate for templates
+	 * i.e only increment in_pkts for Templates.
+	 */
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+		int pkts = atomic_add_return(1, &cp->in_pkts);
+
+		if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+			return;
+	}
+	goto sloop;
 }
 
+/*
+ *  fill_param used by version 1
+ */
 static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
-			   const union nf_inet_addr *caddr, __be16 cport,
-			   const union nf_inet_addr *vaddr, __be16 vport,
-			   struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+			   struct ip_vs_conn_param *p,
+			   __u8 *pe_data, unsigned int pe_data_len,
+			   __u8 *pe_name, unsigned int pe_name_len)
 {
-	/* XXX: Need to take into account persistence engine */
-	ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+				      (const union nf_inet_addr *)&sc->v6.caddr,
+				      sc->v6.cport,
+				      (const union nf_inet_addr *)&sc->v6.vaddr,
+				      sc->v6.vport, p);
+	else
+#endif
+		ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+				      (const union nf_inet_addr *)&sc->v4.caddr,
+				      sc->v4.cport,
+				      (const union nf_inet_addr *)&sc->v4.vaddr,
+				      sc->v4.vport, p);
+	/* Handle pe data */
+	if (pe_data_len) {
+		if (pe_name_len) {
+			char buff[IP_VS_PENAME_MAXLEN+1];
+
+			memcpy(buff, pe_name, pe_name_len);
+			buff[pe_name_len]=0;
+			p->pe = __ip_vs_pe_getbyname(buff);
+			if (!p->pe) {
+				IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+					     buff);
+				return 1;
+			}
+		} else {
+			IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+			return 1;
+		}
+
+		p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+		if (!p->pe_data) {
+			if (p->pe->module)
+				module_put(p->pe->module);
+			return -ENOMEM;
+		}
+		memcpy(p->pe_data, pe_data, pe_data_len);
+		p->pe_data_len = pe_data_len;
+	}
 	return 0;
 }
 
 /*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
+ *  Connection Add / Update.
+ *  Common for version 0 and 1 reception of backup sync_conns.
+ *  Param: ...
+ *         timeout is in sec.
  */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+			    unsigned int flags, unsigned int state,
+			    unsigned int protocol, unsigned int type,
+			    const union nf_inet_addr *daddr, __be16 dport,
+			    unsigned long timeout, __u32 fwmark,
+			    struct ip_vs_sync_conn_options *opt)
 {
-	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-	struct ip_vs_sync_conn *s;
-	struct ip_vs_sync_conn_options *opt;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
 	struct ip_vs_dest *dest;
-	struct ip_vs_conn_param param;
-	char *p;
-	int i;
+	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-		IP_VS_ERR_RL("sync message header too short\n");
-		return;
-	}
+	if (!(flags & IP_VS_CONN_F_TEMPLATE))
+		cp = ip_vs_conn_in_get(param);
+	else
+		cp = ip_vs_ct_in_get(param);
 
-	/* Convert size back to host byte order */
-	m->size = ntohs(m->size);
+	if (cp && param->pe_data) 	/* Free pe_data */
+		kfree(param->pe_data);
+	if (!cp) {
+		/*
+		 * Find the appropriate destination for the connection.
+		 * If it is not found the connection will remain unbound
+		 * but still handled.
+		 */
+		dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
+				       param->vport, protocol, fwmark);
 
-	if (buflen != m->size) {
-		IP_VS_ERR_RL("bogus sync message size\n");
-		return;
+		/*  Set the approprite ativity flag */
+		if (protocol == IPPROTO_TCP) {
+			if (state != IP_VS_TCP_S_ESTABLISHED)
+				flags |= IP_VS_CONN_F_INACTIVE;
+			else
+				flags &= ~IP_VS_CONN_F_INACTIVE;
+		} else if (protocol == IPPROTO_SCTP) {
+			if (state != IP_VS_SCTP_S_ESTABLISHED)
+				flags |= IP_VS_CONN_F_INACTIVE;
+			else
+				flags &= ~IP_VS_CONN_F_INACTIVE;
+		}
+		cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+		if (dest)
+			atomic_dec(&dest->refcnt);
+		if (!cp) {
+			if (param->pe_data)
+				kfree(param->pe_data);
+			IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+			return;
+		}
+	} else if (!cp->dest) {
+		dest = ip_vs_try_bind_dest(cp);
+		if (dest)
+			atomic_dec(&dest->refcnt);
+	} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+		(cp->state != state)) {
+		/* update active/inactive flag for the connection */
+		dest = cp->dest;
+		if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			(state != IP_VS_TCP_S_ESTABLISHED)) {
+			atomic_dec(&dest->activeconns);
+			atomic_inc(&dest->inactconns);
+			cp->flags |= IP_VS_CONN_F_INACTIVE;
+		} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			(state == IP_VS_TCP_S_ESTABLISHED)) {
+			atomic_inc(&dest->activeconns);
+			atomic_dec(&dest->inactconns);
+			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+		}
+	} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+		(cp->state != state)) {
+		dest = cp->dest;
+		if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+		(state != IP_VS_SCTP_S_ESTABLISHED)) {
+			atomic_dec(&dest->activeconns);
+			atomic_inc(&dest->inactconns);
+			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+		}
 	}
 
-	/* SyncID sanity check */
-	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-			  m->syncid);
-		return;
+	if (opt)
+		memcpy(&cp->in_seq, opt, sizeof(*opt));
+	atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+	cp->state = state;
+	cp->old_state = cp->state;
+	/*
+	 * For Ver 0 messages style
+	 *  - Not possible to recover the right timeout for templates
+	 *  - can not find the right fwmark
+	 *    virtual service. If needed, we can do it for
+	 *    non-fwmark persistent services.
+	 * Ver 1 messages style.
+	 *  - No problem.
+	 */
+	if (timeout) {
+		if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+			timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+		cp->timeout = timeout*HZ;
+	} else {
+		struct ip_vs_proto_data *pd;
+
+		pd = ip_vs_proto_data_get(net, protocol);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+			cp->timeout = pd->timeout_table[state];
+		else
+			cp->timeout = (3*60*HZ);
 	}
+	ip_vs_conn_put(cp);
+}
 
-	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+/*
+ *  Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+				     const size_t buflen)
+{
+	struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+	struct ip_vs_sync_conn_v0 *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn_param param;
+	char *p;
+	int i;
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
 	for (i=0; i<m->nr_conns; i++) {
 		unsigned flags, state;
 
 		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-			IP_VS_ERR_RL("bogus conn in sync message\n");
+			IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
 			return;
 		}
-		s = (struct ip_vs_sync_conn *) p;
+		s = (struct ip_vs_sync_conn_v0 *) p;
 		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
 		flags &= ~IP_VS_CONN_F_HASHED;
 		if (flags & IP_VS_CONN_F_SEQ_MASK) {
 			opt = (struct ip_vs_sync_conn_options *)&s[1];
 			p += FULL_CONN_SIZE;
 			if (p > buffer+buflen) {
-				IP_VS_ERR_RL("bogus conn options in sync message\n");
+				IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
 				return;
 			}
 		} else {
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
 			pp = ip_vs_proto_get(s->protocol);
 			if (!pp) {
-				IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+				IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
 					s->protocol);
 				continue;
 			}
 			if (state >= pp->num_states) {
-				IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+				IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
 					pp->name, state);
 				continue;
 			}
 		} else {
 			/* protocol in templates is not used for state/timeout */
-			pp = NULL;
 			if (state > 0) {
-				IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+				IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
 					state);
 				state = 0;
 			}
 		}
 
-		{
-			if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
-					      (union nf_inet_addr *)&s->caddr,
-					      s->cport,
-					      (union nf_inet_addr *)&s->vaddr,
-					      s->vport, &param)) {
-				pr_err("ip_vs_conn_fill_param_sync failed");
-				return;
+		ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+				      (const union nf_inet_addr *)&s->caddr,
+				      s->cport,
+				      (const union nf_inet_addr *)&s->vaddr,
+				      s->vport, &param);
+
+		/* Send timeout as Zero */
+		ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+				(union nf_inet_addr *)&s->daddr, s->dport,
+				0, 0, opt);
+	}
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+				    __u32 *opt_flags,
+				    struct ip_vs_sync_conn_options *opt)
+{
+	struct ip_vs_sync_conn_options *topt;
+
+	topt = (struct ip_vs_sync_conn_options *)p;
+
+	if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+		IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+		return -EINVAL;
+	}
+	if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+		IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+		return -EINVAL;
+	}
+	ntoh_seq(&topt->in_seq, &opt->in_seq);
+	ntoh_seq(&topt->out_seq, &opt->out_seq);
+	*opt_flags |= IPVS_OPT_F_SEQ_DATA;
+	return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+			  __u8 **data, unsigned int maxlen,
+			  __u32 *opt_flags, __u32 flag)
+{
+	if (plen > maxlen) {
+		IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+		return -EINVAL;
+	}
+	if (*opt_flags & flag) {
+		IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+		return -EINVAL;
+	}
+	*data_len = plen;
+	*data = p;
+	*opt_flags |= flag;
+	return 0;
+}
+/*
+ *   Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+{
+	struct ip_vs_sync_conn_options opt;
+	union  ip_vs_sync_conn *s;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn_param param;
+	__u32 flags;
+	unsigned int af, state, pe_data_len=0, pe_name_len=0;
+	__u8 *pe_data=NULL, *pe_name=NULL;
+	__u32 opt_flags=0;
+	int retc=0;
+
+	s = (union ip_vs_sync_conn *) p;
+
+	if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+		af = AF_INET6;
+		p += sizeof(struct ip_vs_sync_v6);
+#else
+		IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+		retc = 10;
+		goto out;
+#endif
+	} else if (!s->v4.type) {
+		af = AF_INET;
+		p += sizeof(struct ip_vs_sync_v4);
+	} else {
+		return -10;
+	}
+	if (p > msg_end)
+		return -20;
+
+	/* Process optional params check Type & Len. */
+	while (p < msg_end) {
+		int ptype;
+		int plen;
+
+		if (p+2 > msg_end)
+			return -30;
+		ptype = *(p++);
+		plen  = *(p++);
+
+		if (!plen || ((p + plen) > msg_end))
+			return -40;
+		/* Handle seq option  p = param data */
+		switch (ptype & ~IPVS_OPT_F_PARAM) {
+		case IPVS_OPT_SEQ_DATA:
+			if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+				return -50;
+			break;
+
+		case IPVS_OPT_PE_DATA:
+			if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+					   IP_VS_PEDATA_MAXLEN, &opt_flags,
+					   IPVS_OPT_F_PE_DATA))
+				return -60;
+			break;
+
+		case IPVS_OPT_PE_NAME:
+			if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+					   IP_VS_PENAME_MAXLEN, &opt_flags,
+					   IPVS_OPT_F_PE_NAME))
+				return -70;
+			break;
+
+		default:
+			/* Param data mandatory ? */
+			if (!(ptype & IPVS_OPT_F_PARAM)) {
+				IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+					  ptype & ~IPVS_OPT_F_PARAM);
+				retc = 20;
+				goto out;
 			}
-			if (!(flags & IP_VS_CONN_F_TEMPLATE))
-				cp = ip_vs_conn_in_get(&param);
-			else
-				cp = ip_vs_ct_in_get(&param);
 		}
-		if (!cp) {
-			/*
-			 * Find the appropriate destination for the connection.
-			 * If it is not found the connection will remain unbound
-			 * but still handled.
-			 */
-			dest = ip_vs_find_dest(AF_INET,
-					       (union nf_inet_addr *)&s->daddr,
-					       s->dport,
-					       (union nf_inet_addr *)&s->vaddr,
-					       s->vport,
-					       s->protocol);
-			/*  Set the approprite ativity flag */
-			if (s->protocol == IPPROTO_TCP) {
-				if (state != IP_VS_TCP_S_ESTABLISHED)
-					flags |= IP_VS_CONN_F_INACTIVE;
-				else
-					flags &= ~IP_VS_CONN_F_INACTIVE;
-			} else if (s->protocol == IPPROTO_SCTP) {
-				if (state != IP_VS_SCTP_S_ESTABLISHED)
-					flags |= IP_VS_CONN_F_INACTIVE;
-				else
-					flags &= ~IP_VS_CONN_F_INACTIVE;
+		p += plen;  /* Next option */
+	}
+
+	/* Get flags and Mask off unsupported */
+	flags  = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+	flags |= IP_VS_CONN_F_SYNC;
+	state = ntohs(s->v4.state);
+
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		pp = ip_vs_proto_get(s->v4.protocol);
+		if (!pp) {
+			IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+				s->v4.protocol);
+			retc = 30;
+			goto out;
+		}
+		if (state >= pp->num_states) {
+			IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+				pp->name, state);
+			retc = 40;
+			goto out;
+		}
+	} else {
+		/* protocol in templates is not used for state/timeout */
+		if (state > 0) {
+			IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+				state);
+			state = 0;
+		}
+	}
+	if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+				       pe_data_len, pe_name, pe_name_len)) {
+		retc = 50;
+		goto out;
+	}
+	/* If only IPv4, just silent skip IPv6 */
+	if (af == AF_INET)
+		ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+				(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+				ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+				);
+#ifdef CONFIG_IP_VS_IPV6
+	else
+		ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+				(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+				ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+				);
+#endif
+	return 0;
+	/* Error exit */
+out:
+	IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+	return retc;
+
+}
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ *      Handles Version 0 & 1
+ */
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+				  const size_t buflen)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+	__u8 *p, *msg_end;
+	int i, nr_conns;
+
+	if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+		IP_VS_DBG(2, "BACKUP, message header too short\n");
+		return;
+	}
+	/* Convert size back to host byte order */
+	m2->size = ntohs(m2->size);
+
+	if (buflen != m2->size) {
+		IP_VS_DBG(2, "BACKUP, bogus message size\n");
+		return;
+	}
+	/* SyncID sanity check */
+	if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+		IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+		return;
+	}
+	/* Handle version 1  message */
+	if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+	    && (m2->spare == 0)) {
+
+		msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+		nr_conns = m2->nr_conns;
+
+		for (i=0; i<nr_conns; i++) {
+			union ip_vs_sync_conn *s;
+			unsigned size;
+			int retc;
+
+			p = msg_end;
+			if (p + sizeof(s->v4) > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+				return;
 			}
-			cp = ip_vs_conn_new(&param,
-					    (union nf_inet_addr *)&s->daddr,
-					    s->dport, flags, dest);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-			if (!cp) {
-				pr_err("ip_vs_conn_new failed\n");
+			s = (union ip_vs_sync_conn *)p;
+			size = ntohs(s->v4.ver_size) & SVER_MASK;
+			msg_end = p + size;
+			/* Basic sanity checks */
+			if (msg_end  > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
 				return;
 			}
-		} else if (!cp->dest) {
-			dest = ip_vs_try_bind_dest(cp);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-			   (cp->state != state)) {
-			/* update active/inactive flag for the connection */
-			dest = cp->dest;
-			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				(state != IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_dec(&dest->activeconns);
-				atomic_inc(&dest->inactconns);
-				cp->flags |= IP_VS_CONN_F_INACTIVE;
-			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				(state == IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_inc(&dest->activeconns);
-				atomic_dec(&dest->inactconns);
-				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+					      ntohs(s->v4.ver_size) >> SVER_SHIFT);
+				return;
 			}
-		} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
-			   (cp->state != state)) {
-			dest = cp->dest;
-			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-			     (state != IP_VS_SCTP_S_ESTABLISHED)) {
-			    atomic_dec(&dest->activeconns);
-			    atomic_inc(&dest->inactconns);
-			    cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			/* Process a single sync_conn */
+			retc = ip_vs_proc_sync_conn(net, p, msg_end);
+			if (retc < 0) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+					     retc);
+				return;
 			}
+			/* Make sure we have 32 bit alignment */
+			msg_end = p + ((size + 3) & ~3);
 		}
-
-		if (opt)
-			memcpy(&cp->in_seq, opt, sizeof(*opt));
-		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-		cp->state = state;
-		cp->old_state = cp->state;
-		/*
-		 * We can not recover the right timeout for templates
-		 * in all cases, we can not find the right fwmark
-		 * virtual service. If needed, we can do it for
-		 * non-fwmark persistent services.
-		 */
-		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-			cp->timeout = pp->timeout_table[state];
-		else
-			cp->timeout = (3*60*HZ);
-		ip_vs_conn_put(cp);
+	} else {
+		/* Old type of message */
+		ip_vs_process_message_v0(net, buffer, buflen);
+		return;
 	}
 }
 
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
 {
 	struct net_device *dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
 		return -ENODEV;
 
 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  *	Set the maximum length of sync message according to the
  *	specified interface's MTU.
  */
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct net_device *dev;
 	int num;
 
 	if (sync_state == IP_VS_STATE_MASTER) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+		dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+		if (!dev)
 			return -ENODEV;
 
 		num = (dev->mtu - sizeof(struct iphdr) -
 		       sizeof(struct udphdr) -
 		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+		ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
 			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
 		IP_VS_DBG(7, "setting the maximum length of sync sending "
-			  "message %d.\n", sync_send_mesg_maxlen);
+			  "message %d.\n", ipvs->send_mesg_maxlen);
 	} else if (sync_state == IP_VS_STATE_BACKUP) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+		dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+		if (!dev)
 			return -ENODEV;
 
-		sync_recv_mesg_maxlen = dev->mtu -
+		ipvs->recv_mesg_maxlen = dev->mtu -
 			sizeof(struct iphdr) - sizeof(struct udphdr);
 		IP_VS_DBG(7, "setting the maximum length of sync receiving "
-			  "message %d.\n", sync_recv_mesg_maxlen);
+			  "message %d.\n", ipvs->recv_mesg_maxlen);
 	}
 
 	return 0;
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
 static int
 join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 {
+	struct net *net = sock_net(sk);
 	struct ip_mreqn mreq;
 	struct net_device *dev;
 	int ret;
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 	memset(&mreq, 0, sizeof(mreq));
 	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
 
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
 		return -ENODEV;
 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
 		return -EINVAL;
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 
 static int bind_mcastif_addr(struct socket *sock, char *ifname)
 {
+	struct net *net = sock_net(sock->sk);
 	struct net_device *dev;
 	__be32 addr;
 	struct sockaddr_in sin;
 
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
 		return -ENODEV;
 
 	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,19 +1298,20 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct socket *sock;
 	int result;
 
 	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
 
-	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
 		goto error;
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void)
 	set_mcast_loop(sock->sk, 0);
 	set_mcast_ttl(sock->sk, 1);
 
-	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+	result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error binding address of the mcast interface\n");
 		goto error;
@@ -664,13 +1344,14 @@ static struct socket * make_send_sock(void)
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct socket *sock;
 	int result;
 
 	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void)
 	/* join the multicast group */
 	result = join_mcast_group(sock->sk,
 			(struct in_addr *) &mcast_addr.sin_addr,
-			ip_vs_backup_mcast_ifn);
+			ipvs->backup_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error joining to the multicast group\n");
 		goto error;
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 static int sync_thread_master(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
+	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
 	struct ip_vs_sync_buff *sb;
 
 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
 		"syncid = %d\n",
-		ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+		ipvs->master_mcast_ifn, ipvs->master_syncid);
 
 	while (!kthread_should_stop()) {
-		while ((sb = sb_dequeue())) {
+		while ((sb = sb_dequeue(ipvs))) {
 			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}
 
-		/* check if entries stay in curr_sb for 2 seconds */
-		sb = get_curr_sync_buff(2 * HZ);
+		/* check if entries stay in ipvs->sync_buff for 2 seconds */
+		sb = get_curr_sync_buff(ipvs, 2 * HZ);
 		if (sb) {
 			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data)
 	}
 
 	/* clean up the sync_buff queue */
-	while ((sb=sb_dequeue())) {
+	while ((sb = sb_dequeue(ipvs)))
 		ip_vs_sync_buff_release(sb);
-	}
 
 	/* clean up the current sync_buff */
-	if ((sb = get_curr_sync_buff(0))) {
+	sb = get_curr_sync_buff(ipvs, 0);
+	if (sb)
 		ip_vs_sync_buff_release(sb);
-	}
 
 	/* release the sending multicast socket */
 	sock_release(tinfo->sock);
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data)
 static int sync_thread_backup(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
+	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
 	int len;
 
 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
 		"syncid = %d\n",
-		ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+		ipvs->backup_mcast_ifn, ipvs->backup_syncid);
 
 	while (!kthread_should_stop()) {
 		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data)
 		/* do we have data now? */
 		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
-					sync_recv_mesg_maxlen);
+					ipvs->recv_mesg_maxlen);
 			if (len <= 0) {
 				pr_err("receiving message error\n");
 				break;
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data)
 			/* disable bottom half, because it accesses the data
 			   shared by softirq while getting/creating conns */
 			local_bh_disable();
-			ip_vs_process_message(tinfo->buf, len);
+			ip_vs_process_message(tinfo->net, tinfo->buf, len);
 			local_bh_enable();
 		}
 	}
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data)
 }
 
 
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 {
 	struct ip_vs_sync_thread_data *tinfo;
 	struct task_struct **realtask, *task;
 	struct socket *sock;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	char *name, *buf = NULL;
 	int (*threadfn)(void *data);
 	int result = -ENOMEM;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-		  sizeof(struct ip_vs_sync_conn));
+		  sizeof(struct ip_vs_sync_conn_v0));
 
 	if (state == IP_VS_STATE_MASTER) {
-		if (sync_master_thread)
+		if (ipvs->master_thread)
 			return -EEXIST;
 
-		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_master_mcast_ifn));
-		ip_vs_master_syncid = syncid;
-		realtask = &sync_master_thread;
-		name = "ipvs_syncmaster";
+		strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+			sizeof(ipvs->master_mcast_ifn));
+		ipvs->master_syncid = syncid;
+		realtask = &ipvs->master_thread;
+		name = "ipvs_master:%d";
 		threadfn = sync_thread_master;
-		sock = make_send_sock();
+		sock = make_send_sock(net);
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (sync_backup_thread)
+		if (ipvs->backup_thread)
 			return -EEXIST;
 
-		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_backup_mcast_ifn));
-		ip_vs_backup_syncid = syncid;
-		realtask = &sync_backup_thread;
-		name = "ipvs_syncbackup";
+		strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+			sizeof(ipvs->backup_mcast_ifn));
+		ipvs->backup_syncid = syncid;
+		realtask = &ipvs->backup_thread;
+		name = "ipvs_backup:%d";
 		threadfn = sync_thread_backup;
-		sock = make_receive_sock();
+		sock = make_receive_sock(net);
 	} else {
 		return -EINVAL;
 	}
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 		goto out;
 	}
 
-	set_sync_mesg_maxlen(state);
+	set_sync_mesg_maxlen(net, state);
 	if (state == IP_VS_STATE_BACKUP) {
-		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+		buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
 		if (!buf)
 			goto outsocket;
 	}
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 	if (!tinfo)
 		goto outbuf;
 
+	tinfo->net = net;
 	tinfo->sock = sock;
 	tinfo->buf = buf;
 
-	task = kthread_run(threadfn, tinfo, name);
+	task = kthread_run(threadfn, tinfo, name, ipvs->gen);
 	if (IS_ERR(task)) {
 		result = PTR_ERR(task);
 		goto outtinfo;
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 
 	/* mark as active */
 	*realtask = task;
-	ip_vs_sync_state |= state;
+	ipvs->sync_state |= state;
 
 	/* increase the module use count */
 	ip_vs_use_count_inc();
@@ -924,16 +1608,18 @@ out:
 }
 
 
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
 	if (state == IP_VS_STATE_MASTER) {
-		if (!sync_master_thread)
+		if (!ipvs->master_thread)
 			return -ESRCH;
 
 		pr_info("stopping master sync thread %d ...\n",
-			task_pid_nr(sync_master_thread));
+			task_pid_nr(ipvs->master_thread));
 
 		/*
 		 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state)
 		 * progress of stopping the master sync daemon.
 		 */
 
-		spin_lock_bh(&ip_vs_sync_lock);
-		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-		spin_unlock_bh(&ip_vs_sync_lock);
-		kthread_stop(sync_master_thread);
-		sync_master_thread = NULL;
+		spin_lock_bh(&ipvs->sync_lock);
+		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock_bh(&ipvs->sync_lock);
+		kthread_stop(ipvs->master_thread);
+		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!sync_backup_thread)
+		if (!ipvs->backup_thread)
 			return -ESRCH;
 
 		pr_info("stopping backup sync thread %d ...\n",
-			task_pid_nr(sync_backup_thread));
+			task_pid_nr(ipvs->backup_thread));
 
-		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(sync_backup_thread);
-		sync_backup_thread = NULL;
+		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+		kthread_stop(ipvs->backup_thread);
+		ipvs->backup_thread = NULL;
 	} else {
 		return -EINVAL;
 	}
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state)
 
 	return 0;
 }
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	INIT_LIST_HEAD(&ipvs->sync_queue);
+	spin_lock_init(&ipvs->sync_lock);
+	spin_lock_init(&ipvs->sync_buff_lock);
+
+	ipvs->sync_mcast_addr.sin_family = AF_INET;
+	ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+	ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+	return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+	stop_sync_thread(net, IP_VS_STATE_MASTER);
+	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+}
+
+static struct pernet_operations ipvs_sync_ops = {
+	.init = __ip_vs_sync_init,
+	.exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+	return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+	unregister_pernet_subsys(&ipvs_sync_ops);
+}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3fbe4a..1f2a4e35fb1 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
 			.fl4_tos = RT_TOS(iph->tos),
 			.mark = skb->mark,
 		};
-		struct rtable *rt;
 
 		if (ip_route_output_key(net, &rt, &fl))
 			return 0;
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+	    !skb_is_gso(skb)) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+	    !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
 				 "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	df |= (old_iph->frag_off & htons(IP_DF));
 
-	if ((old_iph->frag_off & htons(IP_DF))
-	    && mtu < ntohs(old_iph->tot_len)) {
+	if ((old_iph->frag_off & htons(IP_DF) &&
+	    mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error_put;
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
-	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+	    !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+	    !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+	    !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error_put;
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 00000000000..4e99cca6161
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
+/*
+ *      broadcast connection tracking helper
+ *
+ *      (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+int nf_conntrack_broadcast_help(struct sk_buff *skb,
+				unsigned int protoff,
+				struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				unsigned int timeout)
+{
+	struct nf_conntrack_expect *exp;
+	struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt = skb_rtable(skb);
+	struct in_device *in_dev;
+	struct nf_conn_help *help = nfct_help(ct);
+	__be32 mask = 0;
+
+	/* we're only interested in locally generated packets */
+	if (skb->sk == NULL)
+		goto out;
+	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+		goto out;
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		goto out;
+
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
+	if (in_dev != NULL) {
+		for_primary_ifa(in_dev) {
+			if (ifa->ifa_broadcast == iph->daddr) {
+				mask = ifa->ifa_mask;
+				break;
+			}
+		} endfor_ifa(in_dev);
+	}
+	rcu_read_unlock();
+
+	if (mask == 0)
+		goto out;
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL)
+		goto out;
+
+	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+	exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
+
+	exp->mask.src.u3.ip       = mask;
+	exp->mask.src.u.udp.port  = htons(0xFFFF);
+
+	exp->expectfn             = NULL;
+	exp->flags                = NF_CT_EXPECT_PERMANENT;
+	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
+	exp->helper               = NULL;
+
+	nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
+
+	nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e61511929c6..1909311c392 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
+	struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp && tstamp->stop == 0)
+		tstamp->stop = ktime_to_ns(ktime_get_real());
 
 	if (!test_bit(IPS_DYING_BIT, &ct->status) &&
 	    unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
+	struct nf_conn_tstamp *tstamp;
 	struct hlist_nulls_node *n;
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	ct->timeout.expires += jiffies;
 	add_timer(&ct->timeout);
 	atomic_inc(&ct->ct_general.use);
-	set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	ct->status |= IPS_CONFIRMED;
+
+	/* set conntrack timestamp, if enabled. */
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp) {
+		if (skb->tstamp.tv64 == 0)
+			__net_timestamp((struct sk_buff *)skb);
 
+		tstamp->start = ktime_to_ns(skb->tstamp);
+	}
 	/* Since the lookup is lockless, hash insertion must be done after
 	 * starting the timer and setting the CONFIRMED bit. The RCU barriers
 	 * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 	 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
 	 */
 	memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
-	       sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+	       offsetof(struct nf_conn, proto) -
+	       offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
 	spin_lock_init(&ct->lock);
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 
 	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
 	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1185,6 +1202,11 @@ struct __nf_ct_flush_report {
 static int kill_report(struct nf_conn *i, void *data)
 {
 	struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+	struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(i);
+	if (tstamp && tstamp->stop == 0)
+		tstamp->stop = ktime_to_ns(ktime_get_real());
 
 	/* If we fail to deliver the event, death_by_timeout() will retry */
 	if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1201,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data)
 	return 1;
 }
 
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
 {
-	if (vmalloced)
+	if (is_vmalloc_addr(hash))
 		vfree(hash);
 	else
 		free_pages((unsigned long)hash,
@@ -1270,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 		goto i_see_dead_people;
 	}
 
-	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     net->ct.htable_size);
+	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1300,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net)
 	}
 }
 
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 {
 	struct hlist_nulls_head *hash;
 	unsigned int nr_slots, i;
 	size_t sz;
 
-	*vmalloced = 0;
-
 	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
 	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
 	sz = nr_slots * sizeof(struct hlist_nulls_head);
 	hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
 					get_order(sz));
 	if (!hash) {
-		*vmalloced = 1;
 		printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
 		hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
 				 PAGE_KERNEL);
@@ -1330,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
-	int i, bucket, vmalloced, old_vmalloced;
+	int i, bucket;
 	unsigned int hashsize, old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
@@ -1347,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hashsize)
 		return -EINVAL;
 
-	hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+	hash = nf_ct_alloc_hashtable(&hashsize, 1);
 	if (!hash)
 		return -ENOMEM;
 
@@ -1369,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		}
 	}
 	old_size = init_net.ct.htable_size;
-	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
 	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
 	spin_unlock_bh(&nf_conntrack_lock);
 
-	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+	nf_ct_free_hashtable(old_hash, old_size);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1490,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net)
 	}
 
 	net->ct.htable_size = nf_conntrack_htable_size;
-	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
-					     &net->ct.hash_vmalloc, 1);
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1503,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_acct_init(net);
 	if (ret < 0)
 		goto err_acct;
+	ret = nf_conntrack_tstamp_init(net);
+	if (ret < 0)
+		goto err_tstamp;
 	ret = nf_conntrack_ecache_init(net);
 	if (ret < 0)
 		goto err_ecache;
@@ -1510,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net)
 	return 0;
 
 err_ecache:
+	nf_conntrack_tstamp_fini(net);
+err_tstamp:
 	nf_conntrack_acct_fini(net);
 err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
-	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     net->ct.htable_size);
+	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0bd1ef..cd1e8e0970f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	const struct nf_conntrack_expect_policy *p;
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
-	atomic_inc(&exp->use);
+	/* two references : one for hash insert, one for the timer */
+	atomic_add(2, &exp->use);
 
 	if (master_help) {
 		hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
 	if (master_help) {
-		p = &master_help->helper->expect_policy[exp->class];
+		p = &rcu_dereference_protected(
+				master_help->helper,
+				lockdep_is_held(&nf_conntrack_lock)
+				)->expect_policy[exp->class];
 		exp->timeout.expires = jiffies + p->timeout * HZ;
 	}
 	add_timer(&exp->timeout);
 
-	atomic_inc(&exp->use);
 	NF_CT_STAT_INC(net, expect_create);
 }
 
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
 	if (!del_timer(&i->timeout))
 		return 0;
 
-	p = &master_help->helper->expect_policy[i->class];
+	p = &rcu_dereference_protected(
+		master_help->helper,
+		lockdep_is_held(&nf_conntrack_lock)
+		)->expect_policy[i->class];
 	i->timeout.expires = jiffies + p->timeout * HZ;
 	add_timer(&i->timeout);
 	return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	}
 	/* Will be over limit? */
 	if (master_help) {
-		p = &master_help->helper->expect_policy[expect->class];
+		p = &rcu_dereference_protected(
+			master_help->helper,
+			lockdep_is_held(&nf_conntrack_lock)
+			)->expect_policy[expect->class];
 		if (p->max_expected &&
 		    master_help->expecting[expect->class] >= p->max_expected) {
 			evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 		if (n)
 			return n;
 	}
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_next_rcu(head));
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 	}
 	return head;
 }
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
 	}
 
 	net->ct.expect_count = 0;
-	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
-						  &net->ct.expect_vmalloc, 0);
+	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
 	if (net->ct.expect_hash == NULL)
 		goto err1;
 
@@ -653,8 +661,7 @@ err3:
 	if (net_eq(net, &init_net))
 		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
-	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-			     nf_ct_expect_hsize);
+	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 err1:
 	return err;
 }
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
 		rcu_barrier(); /* Wait for call_rcu() before destroy */
 		kmem_cache_destroy(nf_ct_expect_cachep);
 	}
-	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-			     nf_ct_expect_hsize);
+	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450c193..80a23ed62bb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
 	/* This assumes that extended areas in conntrack for the types
 	   whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
 	for (i = min; i <= max; i++) {
-		t1 = nf_ct_ext_types[i];
+		t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+				lockdep_is_held(&nf_ct_ext_type_mutex));
 		if (!t1)
 			continue;
 
-		t1->alloc_size = sizeof(struct nf_ct_ext)
-				 + ALIGN(sizeof(struct nf_ct_ext), t1->align)
-				 + t1->len;
+		t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+				 t1->len;
 		for (j = 0; j < NF_CT_EXT_NUM; j++) {
-			t2 = nf_ct_ext_types[j];
+			t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+				lockdep_is_held(&nf_ct_ext_type_mutex));
 			if (t2 == NULL || t2 == t1 ||
 			    (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
 				continue;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4cd4e8..1bdfea35795 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
 static struct hlist_head *nf_ct_helper_hash __read_mostly;
 static unsigned int nf_ct_helper_hsize __read_mostly;
 static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
 
 
 /* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
 	struct nf_conn_help *help = nfct_help(ct);
 
-	if (help && help->helper == me) {
+	if (help && rcu_dereference_protected(
+			help->helper,
+			lockdep_is_held(&nf_conntrack_lock)
+			) == me) {
 		nf_conntrack_event(IPCT_HELPER, ct);
 		rcu_assign_pointer(help->helper, NULL);
 	}
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 		hlist_for_each_entry_safe(exp, n, next,
 					  &net->ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
-			if ((help->helper == me || exp->helper == me) &&
+			if ((rcu_dereference_protected(
+					help->helper,
+					lockdep_is_held(&nf_conntrack_lock)
+					) == me || exp->helper == me) &&
 			    del_timer(&exp->timeout)) {
 				nf_ct_unlink_expect(exp);
 				nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
 	int err;
 
 	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
-						  &nf_ct_helper_vmalloc, 0);
+	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
 	if (!nf_ct_helper_hash)
 		return -ENOMEM;
 
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
 	return 0;
 
 err1:
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-			     nf_ct_helper_hsize);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 	return err;
 }
 
 void nf_conntrack_helper_fini(void)
 {
 	nf_ct_extend_unregister(&helper_extend);
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-			     nf_ct_helper_hsize);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index aadde018a07..4c8f30a3d6d 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
 #include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/route.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
 MODULE_ALIAS_NFCT_HELPER("netbios_ns");
 
 static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, 0400);
+module_param(timeout, uint, S_IRUSR);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
-static int help(struct sk_buff *skb, unsigned int protoff,
-		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
-{
-	struct nf_conntrack_expect *exp;
-	struct iphdr *iph = ip_hdr(skb);
-	struct rtable *rt = skb_rtable(skb);
-	struct in_device *in_dev;
-	__be32 mask = 0;
-
-	/* we're only interested in locally generated packets */
-	if (skb->sk == NULL)
-		goto out;
-	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
-		goto out;
-	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-		goto out;
-
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->dst.dev);
-	if (in_dev != NULL) {
-		for_primary_ifa(in_dev) {
-			if (ifa->ifa_broadcast == iph->daddr) {
-				mask = ifa->ifa_mask;
-				break;
-			}
-		} endfor_ifa(in_dev);
-	}
-	rcu_read_unlock();
-
-	if (mask == 0)
-		goto out;
-
-	exp = nf_ct_expect_alloc(ct);
-	if (exp == NULL)
-		goto out;
-
-	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
-	exp->mask.src.u3.ip       = mask;
-	exp->mask.src.u.udp.port  = htons(0xFFFF);
-
-	exp->expectfn             = NULL;
-	exp->flags                = NF_CT_EXPECT_PERMANENT;
-	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
-	exp->helper               = NULL;
-
-	nf_ct_expect_related(exp);
-	nf_ct_expect_put(exp);
-
-	nf_ct_refresh(ct, skb, timeout * HZ);
-out:
-	return NF_ACCEPT;
-}
-
 static struct nf_conntrack_expect_policy exp_policy = {
 	.max_expected	= 1,
 };
 
+static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
+		   struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+}
+
 static struct nf_conntrack_helper helper __read_mostly = {
 	.name			= "netbios-ns",
-	.tuple.src.l3num	= AF_INET,
+	.tuple.src.l3num	= NFPROTO_IPV4,
 	.tuple.src.u.udp.port	= cpu_to_be16(NMBD_PORT),
 	.tuple.dst.protonum	= IPPROTO_UDP,
 	.me			= THIS_MODULE,
-	.help			= help,
+	.help			= netbios_ns_help,
 	.expect_policy		= &exp_policy,
 };
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 93297aaceb2..3fec12c570a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
 	return -1;
 }
 
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nlattr *nest_count;
+	const struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (!tstamp)
+		return 0;
+
+	nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+	if (!nest_count)
+		goto nla_put_failure;
+
+	NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
+	if (tstamp->stop != 0) {
+		NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
+			     cpu_to_be64(tstamp->stop));
+	}
+	nla_nest_end(skb, nest_count);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
 static inline int
 ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
 	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
 }
 
 static inline size_t
+ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+		return 0;
+	return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
+#else
+	return 0;
+#endif
+}
+
+static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
 {
 	return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
 	       + ctnetlink_counters_size(ct)
+	       + ctnetlink_timestamp_size(ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
 	       + nla_total_size(0) /* CTA_PROTOINFO */
 	       + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 
 	if (events & (1 << IPCT_DESTROY)) {
 		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+		    ctnetlink_dump_timestamp(skb, ct) < 0)
 			goto nla_put_failure;
 	} else {
 		if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -1357,6 +1400,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
@@ -1375,6 +1419,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	}
 #endif
 
+	memset(&ct->proto, 0, sizeof(ct->proto));
 	if (cda[CTA_PROTOINFO]) {
 		err = ctnetlink_change_protoinfo(ct, cda);
 		if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74110d..5701c8dd783 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
 int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
+	struct nf_conntrack_l3proto *old;
 
 	if (proto->l3proto >= AF_MAX)
 		return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 		return -EINVAL;
 
 	mutex_lock(&nf_ct_proto_mutex);
-	if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+					lockdep_is_held(&nf_ct_proto_mutex));
+	if (old != &nf_conntrack_l3proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	BUG_ON(proto->l3proto >= AF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
-	BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+	BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+					 lockdep_is_held(&nf_ct_proto_mutex)
+					 ) != proto);
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
 			   &nf_conntrack_l3proto_generic);
 	nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 	mutex_lock(&nf_ct_proto_mutex);
 	if (!nf_ct_protos[l4proto->l3proto]) {
 		/* l3proto may be loaded latter. */
-		struct nf_conntrack_l4proto **proto_array;
+		struct nf_conntrack_l4proto __rcu **proto_array;
 		int i;
 
 		proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		}
 
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
-			proto_array[i] = &nf_conntrack_l4proto_generic;
+			RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
 
 		/* Before making proto_array visible to lockless readers,
 		 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		smp_wmb();
 
 		nf_ct_protos[l4proto->l3proto] = proto_array;
-	} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
-					&nf_conntrack_l4proto_generic) {
+	} else if (rcu_dereference_protected(
+			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+			lockdep_is_held(&nf_ct_proto_mutex)
+			) != &nf_conntrack_l4proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
-	BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+	BUG_ON(rcu_dereference_protected(
+			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+			lockdep_is_held(&nf_ct_proto_mutex)
+			) != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   &nf_conntrack_l4proto_generic);
 	nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560d6d4..9ae57c57c50 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
 	ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
 	ct->proto.dccp.state = CT_DCCP_NONE;
+	ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+	ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+	ct->proto.dccp.handshake_seq = 0;
 	return true;
 
 out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2d5ea..6f4ee70f460 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	    test_bit(SCTP_CID_COOKIE_ACK, map))
 		return false;
 
+	memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
 	new_state = SCTP_CONNTRACK_MAX;
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
 		/* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73b24d..6f38d0e2ea4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	BUG_ON(th == NULL);
 
 	/* Don't need lock here: this conntrack not in circulation yet */
-	new_state
-		= tcp_conntracks[0][get_conntrack_index(th)]
-		[TCP_CONNTRACK_NONE];
+	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
 
 	/* Invalid: delete conntrack */
 	if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	}
 
 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
 		/* SYN packet */
 		ct->proto.tcp.seen[0].td_end =
 			segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 			ct->proto.tcp.seen[0].td_end;
 
 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-		ct->proto.tcp.seen[1].flags = 0;
 	} else if (nf_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return false;
 	} else {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
 		/*
 		 * We are in the middle of a connection,
 		 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		ct->proto.tcp.seen[0].td_maxend =
 			ct->proto.tcp.seen[0].td_end +
 			ct->proto.tcp.seen[0].td_maxwin;
-		ct->proto.tcp.seen[0].td_scale = 0;
 
 		/* We assume SACK and liberal window checking to handle
 		 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 					      IP_CT_TCP_FLAG_BE_LIBERAL;
 	}
 
-	ct->proto.tcp.seen[1].td_end = 0;
-	ct->proto.tcp.seen[1].td_maxend = 0;
-	ct->proto.tcp.seen[1].td_maxwin = 0;
-	ct->proto.tcp.seen[1].td_scale = 0;
-
 	/* tcp_packet will set them */
-	ct->proto.tcp.state = TCP_CONNTRACK_NONE;
 	ct->proto.tcp.last_index = TCP_NONE_SET;
 
 	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 00000000000..6e545e26289
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
+/*
+ *      SNMP service broadcast connection tracking helper
+ *
+ *      (c) 2011 Jiri Olsa <jolsa@redhat.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define SNMP_PORT	161
+
+MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
+MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFCT_HELPER("snmp");
+
+static unsigned int timeout __read_mostly = 30;
+module_param(timeout, uint, S_IRUSR);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+			unsigned int protoff,
+			struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
+
+static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	typeof(nf_nat_snmp_hook) nf_nat_snmp;
+
+	nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+
+	nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
+	if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
+		return nf_nat_snmp(skb, protoff, ct, ctinfo);
+
+	return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+	.max_expected	= 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+	.name			= "snmp",
+	.tuple.src.l3num	= NFPROTO_IPV4,
+	.tuple.src.u.udp.port	= cpu_to_be16(SNMP_PORT),
+	.tuple.dst.protonum	= IPPROTO_UDP,
+	.me			= THIS_MODULE,
+	.help			= snmp_conntrack_help,
+	.expect_policy		= &exp_policy,
+};
+
+static int __init nf_conntrack_snmp_init(void)
+{
+	exp_policy.timeout = timeout;
+	return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_snmp_fini(void)
+{
+	nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_snmp_init);
+module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f24b2..0ae14282588 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <linux/rculist_nulls.h>
 
 MODULE_LICENSE("GPL");
 
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
 struct ct_iter_state {
 	struct seq_net_private p;
 	unsigned int bucket;
+	u_int64_t time_now;
 };
 
 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	for (st->bucket = 0;
 	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(net->ct.hash[st->bucket].first);
+		n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
 			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
-		head = rcu_dereference(net->ct.hash[st->bucket].first);
+		head = rcu_dereference(
+				hlist_nulls_first_rcu(
+					&net->ct.hash[st->bucket]));
 	}
 	return head;
 }
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
+	struct ct_iter_state *st = seq->private;
+
+	st->time_now = ktime_to_ns(ktime_get_real());
 	rcu_read_lock();
 	return ct_get_idx(seq, *pos);
 }
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+	struct ct_iter_state *st = s->private;
+	struct nf_conn_tstamp *tstamp;
+	s64 delta_time;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp) {
+		delta_time = st->time_now - tstamp->start;
+		if (delta_time > 0)
+			delta_time = div_s64(delta_time, NSEC_PER_SEC);
+		else
+			delta_time = 0;
+
+		return seq_printf(s, "delta-time=%llu ",
+				  (unsigned long long)delta_time);
+	}
+	return 0;
+}
+#else
+static inline int
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+	return 0;
+}
+#endif
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 #endif
 
+	if (ct_show_delta_time(s, ct))
+		goto release;
+
 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
 		goto release;
 
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 00000000000..af7dd31af0a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static int nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tstamp_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_timestamp",
+		.data		= &init_net.ct.sysctl_tstamp,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+	.len	= sizeof(struct nf_conn_tstamp),
+	.align	= __alignof__(struct nf_conn_tstamp),
+	.id	= NF_CT_EXT_TSTAMP,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_tstamp;
+
+	net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
+			nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.tstamp_sysctl_header) {
+		printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+		goto out_register;
+	}
+	return 0;
+
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ct.tstamp_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_tstamp_init(struct net *net)
+{
+	int ret;
+
+	net->ct.sysctl_tstamp = nf_ct_tstamp;
+
+	if (net_eq(net, &init_net)) {
+		ret = nf_ct_extend_register(&tstamp_extend);
+		if (ret < 0) {
+			printk(KERN_ERR "nf_ct_tstamp: Unable to register "
+					"extension\n");
+			goto out_extend_register;
+		}
+	}
+
+	ret = nf_conntrack_tstamp_init_sysctl(net);
+	if (ret < 0)
+		goto out_sysctl;
+
+	return 0;
+
+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&tstamp_extend);
+out_extend_register:
+	return ret;
+}
+
+void nf_conntrack_tstamp_fini(struct net *net)
+{
+	nf_conntrack_tstamp_fini_sysctl(net);
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&tstamp_extend);
+}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88..20c775cff2a 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v)
 	struct nf_logger *t;
 	int ret;
 
-	logger = nf_loggers[*pos];
+	logger = rcu_dereference_protected(nf_loggers[*pos],
+					   lockdep_is_held(&nf_log_mutex));
 
 	if (!logger)
 		ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
 		mutex_unlock(&nf_log_mutex);
 	} else {
 		mutex_lock(&nf_log_mutex);
-		logger = nf_loggers[tindex];
+		logger = rcu_dereference_protected(nf_loggers[tindex],
+						   lockdep_is_held(&nf_log_mutex));
 		if (!logger)
 			table->data = "NONE";
 		else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed5bd2..5ab22e2bbd7 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
 	int ret;
+	const struct nf_queue_handler *old;
 
 	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
-	if (queue_handler[pf] == qh)
+	old = rcu_dereference_protected(queue_handler[pf],
+					lockdep_is_held(&queue_handler_mutex));
+	if (old == qh)
 		ret = -EEXIST;
-	else if (queue_handler[pf])
+	else if (old)
 		ret = -EBUSY;
 	else {
 		rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
 /* The caller must flush their queue before this */
 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
+	const struct nf_queue_handler *old;
+
 	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
-	if (queue_handler[pf] && queue_handler[pf] != qh) {
+	old = rcu_dereference_protected(queue_handler[pf],
+					lockdep_is_held(&queue_handler_mutex));
+	if (old && old != qh) {
 		mutex_unlock(&queue_handler_mutex);
 		return -EINVAL;
 	}
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 
 	mutex_lock(&queue_handler_mutex);
 	for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
-		if (queue_handler[pf] == qh)
+		if (rcu_dereference_protected(
+				queue_handler[pf],
+				lockdep_is_held(&queue_handler_mutex)
+				) == qh)
 			rcu_assign_pointer(queue_handler[pf], NULL);
 	}
 	mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
 		      int (*okfn)(struct sk_buff *),
 		      unsigned int queuenum)
 {
-	int status;
+	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct net_device *physindev;
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb,
 	rcu_read_lock();
 
 	qh = rcu_dereference(queue_handler[pf]);
-	if (!qh)
+	if (!qh) {
+		status = -ESRCH;
 		goto err_unlock;
+	}
 
 	afinfo = nf_get_afinfo(pf);
 	if (!afinfo)
 		goto err_unlock;
 
 	entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
-	if (!entry)
+	if (!entry) {
+		status = -ENOMEM;
 		goto err_unlock;
+	}
 
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
 
 	/* If it's going away, ignore hook. */
 	if (!try_module_get(entry->elem->owner)) {
-		rcu_read_unlock();
-		kfree(entry);
-		return 0;
+		status = -ECANCELED;
+		goto err_unlock;
 	}
-
 	/* Bump dev refs so they don't vanish while packet is out */
 	if (indev)
 		dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
 		goto err;
 	}
 
-	return 1;
+	return 0;
 
 err_unlock:
 	rcu_read_unlock();
 err:
-	kfree_skb(skb);
 	kfree(entry);
-	return 1;
+	return status;
 }
 
 int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
 	     unsigned int queuenum)
 {
 	struct sk_buff *segs;
+	int err;
+	unsigned int queued;
 
 	if (!skb_is_gso(skb))
 		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
 	}
 
 	segs = skb_gso_segment(skb, 0);
-	kfree_skb(skb);
+	/* Does not use PTR_ERR to limit the number of error codes that can be
+	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
+	 * 'ignore this hook'.
+	 */
 	if (IS_ERR(segs))
-		return 1;
+		return -EINVAL;
 
+	queued = 0;
+	err = 0;
 	do {
 		struct sk_buff *nskb = segs->next;
 
 		segs->next = NULL;
-		if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
-				queuenum))
+		if (err == 0)
+			err = __nf_queue(segs, elem, pf, hook, indev,
+					   outdev, okfn, queuenum);
+		if (err == 0)
+			queued++;
+		else
 			kfree_skb(segs);
 		segs = nskb;
 	} while (segs);
-	return 1;
+
+	/* also free orig skb if only some segments were queued */
+	if (unlikely(err && queued))
+		err = 0;
+	if (err == 0)
+		kfree_skb(skb);
+	return err;
 }
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	struct sk_buff *skb = entry->skb;
 	struct list_head *elem = &entry->elem->list;
 	const struct nf_afinfo *afinfo;
+	int err;
 
 	rcu_read_lock();
 
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		if (!__nf_queue(skb, elem, entry->pf, entry->hook,
-				entry->indev, entry->outdev, entry->okfn,
-				verdict >> NF_VERDICT_BITS))
-			goto next_hook;
+		err = __nf_queue(skb, elem, entry->pf, entry->hook,
+				 entry->indev, entry->outdev, entry->okfn,
+				 verdict >> NF_VERDICT_QBITS);
+		if (err < 0) {
+			if (err == -ECANCELED)
+				goto next_hook;
+			if (err == -ESRCH &&
+			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+				goto next_hook;
+			kfree_skb(skb);
+		}
 		break;
 	case NF_STOLEN:
 	default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b0ab4..91592da504b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
 		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(instance_table[st->bucket].first);
+			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
 	}
 	return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-	h = rcu_dereference_bh(h->next);
+	h = rcu_dereference_bh(hlist_next_rcu(h));
 	while (!h) {
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(instance_table[st->bucket].first);
+		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
 	}
 	return h;
 }
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 68e67d19724..b83123f12b4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
 	struct sk_buff *nskb;
 	struct nfqnl_instance *queue;
-	int err;
+	int err = -ENOBUFS;
 
 	/* rcu_read_lock()ed by nf_hook_slow() */
 	queue = instance_lookup(queuenum);
-	if (!queue)
+	if (!queue) {
+		err = -ESRCH;
 		goto err_out;
+	}
 
-	if (queue->copy_mode == NFQNL_COPY_NONE)
+	if (queue->copy_mode == NFQNL_COPY_NONE) {
+		err = -EINVAL;
 		goto err_out;
+	}
 
 	nskb = nfqnl_build_packet_message(queue, entry);
-	if (nskb == NULL)
+	if (nskb == NULL) {
+		err = -ENOMEM;
 		goto err_out;
-
+	}
 	spin_lock_bh(&queue->lock);
 
-	if (!queue->peer_pid)
+	if (!queue->peer_pid) {
+		err = -EINVAL;
 		goto err_out_free_nskb;
-
+	}
 	if (queue->queue_total >= queue->queue_maxlen) {
 		queue->queue_dropped++;
 		if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
 err_out_unlock:
 	spin_unlock_bh(&queue->lock);
 err_out:
-	return -1;
+	return err;
 }
 
 static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c9423763107..0a77d2ff215 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/audit.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
 struct compat_delta {
-	struct compat_delta *next;
-	unsigned int offset;
-	int delta;
+	unsigned int offset; /* offset in kernel */
+	int delta; /* delta in 32bit user land */
 };
 
 struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
 	struct list_head target;
 #ifdef CONFIG_COMPAT
 	struct mutex compat_mutex;
-	struct compat_delta *compat_offsets;
+	struct compat_delta *compat_tab;
+	unsigned int number; /* number of slots in compat_tab[] */
+	unsigned int cur; /* number of used slots in compat_tab[] */
 #endif
 };
 
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par,
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
-	struct compat_delta *tmp;
+	struct xt_af *xp = &xt[af];
 
-	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
+	if (!xp->compat_tab) {
+		if (!xp->number)
+			return -EINVAL;
+		xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
+		if (!xp->compat_tab)
+			return -ENOMEM;
+		xp->cur = 0;
+	}
 
-	tmp->offset = offset;
-	tmp->delta = delta;
+	if (xp->cur >= xp->number)
+		return -EINVAL;
 
-	if (xt[af].compat_offsets) {
-		tmp->next = xt[af].compat_offsets->next;
-		xt[af].compat_offsets->next = tmp;
-	} else {
-		xt[af].compat_offsets = tmp;
-		tmp->next = NULL;
-	}
+	if (xp->cur)
+		delta += xp->compat_tab[xp->cur - 1].delta;
+	xp->compat_tab[xp->cur].offset = offset;
+	xp->compat_tab[xp->cur].delta = delta;
+	xp->cur++;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
 void xt_compat_flush_offsets(u_int8_t af)
 {
-	struct compat_delta *tmp, *next;
-
-	if (xt[af].compat_offsets) {
-		for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
-			next = tmp->next;
-			kfree(tmp);
-		}
-		xt[af].compat_offsets = NULL;
+	if (xt[af].compat_tab) {
+		vfree(xt[af].compat_tab);
+		xt[af].compat_tab = NULL;
+		xt[af].number = 0;
 	}
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
 int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
-	struct compat_delta *tmp;
-	int delta;
-
-	for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
-		if (tmp->offset < offset)
-			delta += tmp->delta;
-	return delta;
+	struct compat_delta *tmp = xt[af].compat_tab;
+	int mid, left = 0, right = xt[af].cur - 1;
+
+	while (left <= right) {
+		mid = (left + right) >> 1;
+		if (offset > tmp[mid].offset)
+			left = mid + 1;
+		else if (offset < tmp[mid].offset)
+			right = mid - 1;
+		else
+			return mid ? tmp[mid - 1].delta : 0;
+	}
+	WARN_ON_ONCE(1);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
+void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+{
+	xt[af].number = number;
+	xt[af].cur = 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
 int xt_compat_match_offset(const struct xt_match *match)
 {
 	u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table,
 	 */
 	local_bh_enable();
 
+#ifdef CONFIG_AUDIT
+	if (audit_enabled) {
+		struct audit_buffer *ab;
+
+		ab = audit_log_start(current->audit_context, GFP_KERNEL,
+				     AUDIT_NETFILTER_CFG);
+		if (ab) {
+			audit_log_format(ab, "table=%s family=%u entries=%u",
+					 table->name, table->af,
+					 private->number);
+			audit_log_end(ab);
+		}
+	}
+#endif
+
 	return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1368,7 @@ static int __init xt_init(void)
 		mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
 		mutex_init(&xt[i].compat_mutex);
-		xt[i].compat_offsets = NULL;
+		xt[i].compat_tab = NULL;
 #endif
 		INIT_LIST_HEAD(&xt[i].target);
 		INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 00000000000..81802d27346
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,204 @@
+/*
+ * Creates audit record for dropped/accepted packets
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/audit.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_AUDIT.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
+MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
+MODULE_ALIAS("ipt_AUDIT");
+MODULE_ALIAS("ip6t_AUDIT");
+MODULE_ALIAS("ebt_AUDIT");
+MODULE_ALIAS("arpt_AUDIT");
+
+static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
+			unsigned int proto, unsigned int offset)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE: {
+		const __be16 *pptr;
+		__be16 _ports[2];
+
+		pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
+		if (pptr == NULL) {
+			audit_log_format(ab, " truncated=1");
+			return;
+		}
+
+		audit_log_format(ab, " sport=%hu dport=%hu",
+				 ntohs(pptr[0]), ntohs(pptr[1]));
+		}
+		break;
+
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6: {
+		const u8 *iptr;
+		u8 _ih[2];
+
+		iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
+		if (iptr == NULL) {
+			audit_log_format(ab, " truncated=1");
+			return;
+		}
+
+		audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
+				 iptr[0], iptr[1]);
+
+		}
+		break;
+	}
+}
+
+static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct iphdr _iph;
+	const struct iphdr *ih;
+
+	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+	if (!ih) {
+		audit_log_format(ab, " truncated=1");
+		return;
+	}
+
+	audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
+		&ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
+
+	if (ntohs(ih->frag_off) & IP_OFFSET) {
+		audit_log_format(ab, " frag=1");
+		return;
+	}
+
+	audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
+}
+
+static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct ipv6hdr _ip6h;
+	const struct ipv6hdr *ih;
+	u8 nexthdr;
+	int offset;
+
+	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+	if (!ih) {
+		audit_log_format(ab, " truncated=1");
+		return;
+	}
+
+	nexthdr = ih->nexthdr;
+	offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
+				  &nexthdr);
+
+	audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+			 &ih->saddr, &ih->daddr, nexthdr);
+
+	if (offset)
+		audit_proto(ab, skb, nexthdr, offset);
+}
+
+static unsigned int
+audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_audit_info *info = par->targinfo;
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+	if (ab == NULL)
+		goto errout;
+
+	audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
+			 info->type, par->hooknum, skb->len,
+			 par->in ? par->in->name : "?",
+			 par->out ? par->out->name : "?");
+
+	if (skb->mark)
+		audit_log_format(ab, " mark=%#x", skb->mark);
+
+	if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+		audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
+				 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+				 ntohs(eth_hdr(skb)->h_proto));
+
+		if (par->family == NFPROTO_BRIDGE) {
+			switch (eth_hdr(skb)->h_proto) {
+			case __constant_htons(ETH_P_IP):
+				audit_ip4(ab, skb);
+				break;
+
+			case __constant_htons(ETH_P_IPV6):
+				audit_ip6(ab, skb);
+				break;
+			}
+		}
+	}
+
+	switch (par->family) {
+	case NFPROTO_IPV4:
+		audit_ip4(ab, skb);
+		break;
+
+	case NFPROTO_IPV6:
+		audit_ip6(ab, skb);
+		break;
+	}
+
+	audit_log_end(ab);
+
+errout:
+	return XT_CONTINUE;
+}
+
+static int audit_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_audit_info *info = par->targinfo;
+
+	if (info->type > XT_AUDIT_TYPE_MAX) {
+		pr_info("Audit type out of range (valid range: 0..%hhu)\n",
+			XT_AUDIT_TYPE_MAX);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static struct xt_target audit_tg_reg __read_mostly = {
+	.name		= "AUDIT",
+	.family		= NFPROTO_UNSPEC,
+	.target		= audit_tg,
+	.targetsize	= sizeof(struct xt_audit_info),
+	.checkentry	= audit_tg_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init audit_tg_init(void)
+{
+	return xt_register_target(&audit_tg_reg);
+}
+
+static void __exit audit_tg_exit(void)
+{
+	xt_unregister_target(&audit_tg_reg);
+}
+
+module_init(audit_tg_init);
+module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4abeb9..af9c4dadf81 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Xtables: Qdisc classification");
 MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
 
 static unsigned int
 classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static struct xt_target classify_tg_reg __read_mostly = {
-	.name       = "CLASSIFY",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.table      = "mangle",
-	.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-		      (1 << NF_INET_POST_ROUTING),
-	.target     = classify_tg,
-	.targetsize = sizeof(struct xt_classify_target_info),
-	.me         = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+	{
+		.name       = "CLASSIFY",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+		              (1 << NF_INET_POST_ROUTING),
+		.target     = classify_tg,
+		.targetsize = sizeof(struct xt_classify_target_info),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "CLASSIFY",
+		.revision   = 0,
+		.family     = NFPROTO_ARP,
+		.hooks      = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+		.target     = classify_tg,
+		.targetsize = sizeof(struct xt_classify_target_info),
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init classify_tg_init(void)
 {
-	return xt_register_target(&classify_tg_reg);
+	return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 static void __exit classify_tg_exit(void)
 {
-	xt_unregister_target(&classify_tg_reg);
+	xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 module_init(classify_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e1354..3bdd443aaf1 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
 MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
 MODULE_DESCRIPTION("Xtables: idle time monitor");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a4140509eea..993de2ba89d 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
 MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
 
 static LIST_HEAD(xt_led_triggers);
 static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1bde3..d4f4b5d66b2 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 
 	if (info->queues_total > 1) {
 		if (par->family == NFPROTO_IPV4)
-			queue = hash_v4(skb) % info->queues_total + queue;
+			queue = (((u64) hash_v4(skb) * info->queues_total) >>
+				 32) + queue;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 		else if (par->family == NFPROTO_IPV6)
-			queue = hash_v6(skb) % info->queues_total + queue;
+			queue = (((u64) hash_v6(skb) * info->queues_total) >>
+				 32) + queue;
 #endif
 	}
 	return NF_QUEUE_NR(queue);
 }
 
-static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	const struct xt_NFQ_info_v2 *info = par->targinfo;
+	unsigned int ret = nfqueue_tg_v1(skb, par);
+
+	if (info->bypass)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+	return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_NFQ_info_v2 *info = par->targinfo;
 	u32 maxid;
 
 	if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 		       info->queues_total, maxid);
 		return -ERANGE;
 	}
+	if (par->target->revision == 2 && info->bypass > 1)
+		return -EINVAL;
 	return 0;
 }
 
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 		.name		= "NFQUEUE",
 		.revision	= 1,
 		.family		= NFPROTO_UNSPEC,
-		.checkentry	= nfqueue_tg_v1_check,
+		.checkentry	= nfqueue_tg_check,
 		.target		= nfqueue_tg_v1,
 		.targetsize	= sizeof(struct xt_NFQ_info_v1),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 2,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v2,
+		.targetsize	= sizeof(struct xt_NFQ_info_v2),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5c5b6b921b8..e029c480740 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -185,18 +185,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	int connections;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct != NULL)
-		tuple_ptr = &ct->tuplehash[0].tuple;
-	else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    par->family, &tuple))
+	if (ct != NULL) {
+		if (info->flags & XT_CONNLIMIT_DADDR)
+			tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		else
+			tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				    par->family, &tuple)) {
 		goto hotdrop;
+	}
 
 	if (par->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
-		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
+		memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
+		       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
-		addr.ip = iph->saddr;
+		addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+			  iph->daddr : iph->saddr;
 	}
 
 	spin_lock_bh(&info->data->lock);
@@ -204,13 +210,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	                         &info->mask, par->family);
 	spin_unlock_bh(&info->data->lock);
 
-	if (connections < 0) {
+	if (connections < 0)
 		/* kmalloc failed, drop it entirely */
-		par->hotdrop = true;
-		return false;
-	}
+		goto hotdrop;
 
-	return (connections > info->limit) ^ info->inverse;
+	return (connections > info->limit) ^
+	       !!(info->flags & XT_CONNLIMIT_INVERT);
 
  hotdrop:
 	par->hotdrop = true;
@@ -268,25 +273,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 	kfree(info->data);
 }
 
-static struct xt_match connlimit_mt_reg __read_mostly = {
-	.name       = "connlimit",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.checkentry = connlimit_mt_check,
-	.match      = connlimit_mt,
-	.matchsize  = sizeof(struct xt_connlimit_info),
-	.destroy    = connlimit_mt_destroy,
-	.me         = THIS_MODULE,
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
+	{
+		.name       = "connlimit",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = connlimit_mt_check,
+		.match      = connlimit_mt,
+		.matchsize  = sizeof(struct xt_connlimit_info),
+		.destroy    = connlimit_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "connlimit",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = connlimit_mt_check,
+		.match      = connlimit_mt,
+		.matchsize  = sizeof(struct xt_connlimit_info),
+		.destroy    = connlimit_mt_destroy,
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init connlimit_mt_init(void)
 {
-	return xt_register_match(&connlimit_mt_reg);
+	return xt_register_matches(connlimit_mt_reg,
+	       ARRAY_SIZE(connlimit_mt_reg));
 }
 
 static void __exit connlimit_mt_exit(void)
 {
-	xt_unregister_match(&connlimit_mt_reg);
+	xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
 }
 
 module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e536710ad91..4ef1b63ad73 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 	return true;
 }
 
+static inline bool
+port_match(u16 min, u16 max, u16 port, bool invert)
+{
+	return (port >= min && port <= max) ^ invert;
+}
+
+static inline bool
+ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
+		       const struct nf_conn *ct)
+{
+	const struct nf_conntrack_tuple *tuple;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+	    (nf_ct_protonum(ct) == info->l4proto) ^
+	    !(info->invert_flags & XT_CONNTRACK_PROTO))
+		return false;
+
+	/* Shortcut to match all recognized protocols by using ->src.all. */
+	if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+	    !port_match(info->origsrc_port, info->origsrc_port_high,
+			ntohs(tuple->src.u.all),
+			info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+	    !port_match(info->origdst_port, info->origdst_port_high,
+			ntohs(tuple->dst.u.all),
+			info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+		return false;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+	    !port_match(info->replsrc_port, info->replsrc_port_high,
+			ntohs(tuple->src.u.all),
+			info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+	    !port_match(info->repldst_port, info->repldst_port_high,
+			ntohs(tuple->dst.u.all),
+			info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+		return false;
+
+	return true;
+}
+
 static bool
 conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
 			return false;
 
-	if (!ct_proto_port_check(info, ct))
-		return false;
+	if (par->match->revision != 3) {
+		if (!ct_proto_port_check(info, ct))
+			return false;
+	} else {
+		if (!ct_proto_port_check_v3(par->matchinfo, ct))
+			return false;
+	}
 
 	if ((info->match_flags & XT_CONNTRACK_STATUS) &&
 	    (!!(status_mask & ct->status) ^
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
 }
 
+static bool
+conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
+
+	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
 static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	int ret;
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 		.destroy    = conntrack_mt_destroy,
 		.me         = THIS_MODULE,
 	},
+	{
+		.name       = "conntrack",
+		.revision   = 3,
+		.family     = NFPROTO_UNSPEC,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo3),
+		.match      = conntrack_mt_v3,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a5cba..c7a2e5466bc 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
 MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
 
 static int cpu_mt_check(const struct xt_mtchk_param *par)
 {
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 88f7c3511c7..d3eb5ed1892 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -31,7 +31,7 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 			pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n",
 			         &iph->saddr,
 			         (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
-			         &info->src_max.ip,
+			         &info->src_min.ip,
 			         &info->src_max.ip);
 			return false;
 		}
@@ -78,15 +78,27 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
 		m ^= !!(info->flags & IPRANGE_SRC_INV);
-		if (m)
+		if (m) {
+			pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n",
+				 &iph->saddr,
+				 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
+				 &info->src_min.in6,
+				 &info->src_max.in6);
 			return false;
+		}
 	}
 	if (info->flags & IPRANGE_DST) {
 		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
 		m ^= !!(info->flags & IPRANGE_DST_INV);
-		if (m)
+		if (m) {
+			pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n",
+				 &iph->daddr,
+				 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
+				 &info->dst_min.in6,
+				 &info->dst_max.in6);
 			return false;
+		}
 	}
 	return true;
 }
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d8aa3..bb10b0717f1 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+	cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
 	if (unlikely(cp == NULL)) {
 		match = false;
 		goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 91cb1d71f01..c60649ec119 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -164,7 +164,6 @@ struct packet_mreq_max {
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		int closing, int tx_ring);
 
-#define PGV_FROM_VMALLOC 1
 struct pgv {
 	char *buffer;
 };
@@ -523,11 +522,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
 {
 	struct sk_filter *filter;
 
-	rcu_read_lock_bh();
-	filter = rcu_dereference_bh(sk->sk_filter);
+	rcu_read_lock();
+	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
 		res = sk_run_filter(skb, filter->insns);
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 
 	return res;
 }
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 9542449c072..da8adac2bf0 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
 #define RDS_FRAG_SIZE	((unsigned int)(1 << RDS_FRAG_SHIFT))
 
 #define RDS_CONG_MAP_BYTES	(65536 / 8)
-#define RDS_CONG_MAP_LONGS	(RDS_CONG_MAP_BYTES / sizeof(unsigned long))
 #define RDS_CONG_MAP_PAGES	(PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
 #define RDS_CONG_MAP_PAGE_BITS	(PAGE_SIZE * 8)
 
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f04d4a484d5..e318f458713 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -205,6 +205,18 @@ config NET_SCH_DRR
 
 	  If unsure, say N.
 
+config NET_SCH_MQPRIO
+	tristate "Multi-queue priority scheduler (MQPRIO)"
+	help
+	  Say Y here if you want to use the Multi-queue Priority scheduler.
+	  This scheduler allows QOS to be offloaded on NICs that have support
+	  for offloading QOS schedulers.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_mqprio.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
@@ -243,7 +255,7 @@ config NET_CLS_TCINDEX
 
 config NET_CLS_ROUTE4
 	tristate "Routing decision (ROUTE)"
-	select NET_CLS_ROUTE
+	select IP_ROUTE_CLASSID
 	select NET_CLS
 	---help---
 	  If you say Y here, you will be able to classify packets
@@ -252,9 +264,6 @@ config NET_CLS_ROUTE4
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_route.
 
-config NET_CLS_ROUTE
-	bool
-
 config NET_CLS_FW
 	tristate "Netfilter mark (FW)"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5dba630..26ce681a2c6 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f89e7e..15873e14cb5 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 			   struct tc_action *a, struct tcf_hashinfo *hinfo)
 {
 	struct tcf_common *p;
-	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct nlattr *nest;
 
 	read_lock_bh(hinfo->lock);
@@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 {
 	struct tcf_common *p, *s_p;
 	struct nlattr *nest;
-	int i= 0, n_i = 0;
+	int i = 0, n_i = 0;
 
 	nest = nla_nest_start(skb, a->order);
 	if (nest == NULL)
@@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 		while (p != NULL) {
 			s_p = p->tcfc_next;
 			if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
-				 module_put(a->ops->owner);
+				module_put(a->ops->owner);
 			n_i++;
 			p = s_p;
 		}
@@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
-	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+	err = tcf_action_dump_old(skb, a, bind, ref);
+	if (err > 0) {
 		nla_nest_end(skb, nest);
 		return err;
 	}
@@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
 	char act_name[IFNAMSIZ];
-	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct nlattr *kind;
 	int err;
 
@@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 		goto err_free;
 
 	/* module count goes up only when brand new policy is created
-	   if it exists and is only bound to in a_o->init() then
-	   ACT_P_CREATED is not returned (a zero is).
-	*/
+	 * if it exists and is only bound to in a_o->init() then
+	 * ACT_P_CREATED is not returned (a zero is).
+	 */
 	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 	a->ops = a_o;
@@ -569,7 +570,7 @@ err_out:
 struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 				  char *name, int ovr, int bind)
 {
-	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 	int err;
 	int i;
@@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
 static struct tc_action *
 tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 {
-	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct tc_action *a;
 	int index;
 	int err;
@@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	struct tcamsg *t;
 	struct netlink_callback dcb;
 	struct nlattr *nest;
-	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct nlattr *kind;
 	struct tc_action *a = create_a(0);
 	int err = -ENOMEM;
@@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
-	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
 		return 0;
 
@@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	      u32 pid, int event)
 {
 	int i, ret;
-	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
 	if (ret < 0)
 		return ret;
 
-	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
 		if (tb[1] != NULL)
 			return tca_action_flush(net, tb[1], n, pid);
 		else
@@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		/* now do the delete */
 		tcf_action_destroy(head, 0);
 		ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
-				     n->nlmsg_flags&NLM_F_ECHO);
+				     n->nlmsg_flags & NLM_F_ECHO);
 		if (ret > 0)
 			return 0;
 		return ret;
@@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
 	if (err > 0)
 		err = 0;
 	return err;
@@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
-	 * */
+	 */
 	ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
 	for (a = act; a; a = act) {
 		act = a->next;
@@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		return -EINVAL;
 	}
 
-	/* n->nlmsg_flags&NLM_F_CREATE
-	 * */
+	/* n->nlmsg_flags & NLM_F_CREATE */
 	switch (n->nlmsg_type) {
 	case RTM_NEWACTION:
 		/* we are going to assume all other flags
@@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		 * but since we want avoid ambiguity (eg when flags
 		 * is zero) then just set this
 		 */
-		if (n->nlmsg_flags&NLM_F_REPLACE)
+		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
 		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1029,7 @@ replay:
 static struct nlattr *
 find_dump_kind(const struct nlmsghdr *n)
 {
-	struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
@@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	a_o = tc_lookup_action(kind);
-	if (a_o == NULL) {
+	if (a_o == NULL)
 		return 0;
-	}
 
 	memset(&a, 0, sizeof(struct tc_action));
 	a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 83ddfc07e45..6cdf9abe475 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a4c0b..2b4ab4b05ce 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
 }
 
 typedef int (*g_rand)(struct tcf_gact *gact);
-static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
 #endif /* CONFIG_GACT_PROB */
 
 static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
 				     bind, &gact_idx_gen, &gact_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
 static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
-	printk(KERN_INFO "GACT probability on\n");
+	pr_info("GACT probability on\n");
 #else
-	printk(KERN_INFO "GACT probability NOT on\n");
+	pr_info("GACT probability NOT on\n");
 #endif
 	return tcf_register_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c2a7c20e81c..9fc211a1b20 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
 				     &ipt_idx_gen, &ipt_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	if (unlikely(!t))
 		goto err2;
 
-	if ((err = ipt_init_target(t, tname, hook)) < 0)
+	err = ipt_init_target(t, tname, hook);
+	if (err < 0)
 		goto err3;
 
 	spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	bstats_update(&ipt->tcf_bstats, skb);
 
 	/* yes, we have to worry about both in and out dev
-	 worry later - danger - this API seems to have changed
-	 from earlier kernels */
+	 * worry later - danger - this API seems to have changed
+	 * from earlier kernels
+	 */
 	par.in       = skb->dev;
 	par.out      = NULL;
 	par.hooknum  = ipt->tcfi_hook;
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	struct tc_cnt c;
 
 	/* for simple targets kernel size == user size
-	** user name = target name
-	** for foolproof you need to not assume this
-	*/
+	 * user name = target name
+	 * for foolproof you need to not assume this
+	 */
 
 	t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
 	if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d765067e99d..961386e2f2c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
 	.lock	=	&mirred_lock,
 };
 
-static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
 	if (m) {
 		if (bind)
 			m->tcf_bindcnt--;
 		m->tcf_refcnt--;
-		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+		if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
 			list_del(&m->tcfm_list);
 			if (m->tcfm_dev)
 				dev_put(m->tcfm_dev);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 178a4bd7b7c..762b027650a 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
 				     &nat_idx_gen, &nat_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		p = to_tcf_nat(pc);
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 445bef716f7..50c7c06c019 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
 				     &pedit_idx_gen, &pedit_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		p = to_pedit(pc);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (skb_cloned(skb)) {
-		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-			return p->tcf_action;
-		}
-	}
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		return p->tcf_action;
 
 	off = skb_network_offset(skb);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e2f08b1e2e5..8a1630774fd 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
 
 #define POL_TAB_MASK     15
 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
 };
 
 /* old policer structure from before tc actions */
-struct tc_police_compat
-{
+struct tc_police_compat {
 	u32			index;
 	int			action;
 	u32			limit;
@@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 				 struct tc_action *a, int ovr, int bind)
 {
-	unsigned h;
+	unsigned int h;
 	int ret = 0, err;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
 	struct tc_police *parm;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7287cff7af3..a34a22de60b 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 	/* print policy string followed by _ then packet count
 	 * Example if this was the 3rd packet and the string was "hello"
 	 * then it would look like "hello_3" (without quotes)
-	 **/
+	 */
 	pr_info("simple: %s_%d\n",
 	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
 	spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
 				     &simp_idx_gen, &simp_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 
 		d = to_defact(pc);
 		ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 	return ret;
 }
 
-static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+static int tcf_simp_cleanup(struct tc_action *a, int bind)
 {
 	struct tcf_defact *d = a->priv;
 
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
-				int bind, int ref)
+static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+			 int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 836f5fee9e5..5f6f0c7c390 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
 				     &skbedit_idx_gen, &skbedit_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 
 		d = to_skbedit(pc);
 		ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 	return ret;
 }
 
-static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
 {
 	struct tcf_skbedit *d = a->priv;
 
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
-				int bind, int ref)
+static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+			    int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28ef79..bb2c523f815 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 	int rc = -ENOENT;
 
 	write_lock(&cls_mod_lock);
-	for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
 		if (t == ops)
 			break;
 
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 	u32 first = TC_H_MAKE(0xC0000000U, 0U);
 
 	if (tp)
-		first = tp->prio-1;
+		first = tp->prio - 1;
 
 	return first;
 }
@@ -149,7 +149,8 @@ replay:
 
 	if (prio == 0) {
 		/* If no priority is given, user wants we allocated it. */
-		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTFILTER ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			return -ENOENT;
 		prio = TC_H_MAKE(0x80000000U, 0U);
 	}
@@ -176,7 +177,8 @@ replay:
 	}
 
 	/* Is it classful? */
-	if ((cops = q->ops->cl_ops) == NULL)
+	cops = q->ops->cl_ops;
+	if (!cops)
 		return -EINVAL;
 
 	if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
 		goto errout;
 
 	/* Check the chain for existence of proto-tcf with this priority */
-	for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+	for (back = chain; (tp = *back) != NULL; back = &tp->next) {
 		if (tp->prio >= prio) {
 			if (tp->prio == prio) {
-				if (!nprio || (tp->protocol != protocol && protocol))
+				if (!nprio ||
+				    (tp->protocol != protocol && protocol))
 					goto errout;
 			} else
 				tp = NULL;
@@ -216,7 +219,8 @@ replay:
 			goto errout;
 
 		err = -ENOENT;
-		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTFILTER ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			goto errout;
 
 
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return skb->len;
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return skb->len;
 
 	if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
 	if (!q)
 		goto out;
-	if ((cops = q->ops->cl_ops) == NULL)
+	cops = q->ops->cl_ops;
+	if (!cops)
 		goto errout;
 	if (cops->tcf_chain == NULL)
 		goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
 	s_t = cb->args[0];
 
-	for (tp=*chain, t=0; tp; tp = tp->next, t++) {
-		if (t < s_t) continue;
+	for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+		if (t < s_t)
+			continue;
 		if (TC_H_MAJ(tcm->tcm_info) &&
 		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
 			continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		arg.skb = skb;
 		arg.cb = cb;
 		arg.w.stop = 0;
-		arg.w.skip = cb->args[1]-1;
+		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
 		tp->ops->walk(tp, &arg.w);
-		cb->args[1] = arg.w.count+1;
+		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
 			break;
 	}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f23d9155b1e..8be8872dd57 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
-struct basic_head
-{
+struct basic_head {
 	u32			hgenerator;
 	struct list_head	flist;
 };
 
-struct basic_filter
-{
+struct basic_filter {
 	u32			handle;
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static inline void basic_delete_filter(struct tcf_proto *tp,
-				       struct basic_filter *f)
+static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
 	tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 	[TCA_BASIC_EMATCHES]	= { .type = NLA_NESTED },
 };
 
-static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-				  unsigned long base, struct nlattr **tb,
-				  struct nlattr *est)
+static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+			   unsigned long base, struct nlattr **tb,
+			   struct nlattr *est)
 {
 	int err = -EINVAL;
 	struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 		} while (--i > 0 && basic_get(tp, head->hgenerator));
 
 		if (i <= 0) {
-			printk(KERN_ERR "Insufficient number of handles\n");
+			pr_err("Insufficient number of handles\n");
 			goto errout;
 		}
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d49c40fb7e0..32a335194ca 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 {
 	struct cgroup_cls_state *cs;
 
-	if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
 		return ERR_PTR(-ENOMEM);
 
 	if (cgrp->parent)
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
 }
 
-struct cls_cgroup_head
-{
+struct cls_cgroup_head {
 	u32			handle;
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
 			     unsigned long *arg)
 {
-	struct nlattr *tb[TCA_CGROUP_MAX+1];
+	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = tp->root;
 	struct tcf_ematch_tree t;
 	struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5b271a18bc3..8ec01391d98 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb)
 		if (!pskb_network_may_pull(skb, sizeof(*iph)))
 			break;
 		iph = ip_hdr(skb);
-		if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+		if (iph->frag_off & htons(IP_MF | IP_OFFSET))
 			break;
 		poff = proto_ports_offset(iph->protocol);
 		if (poff >= 0 &&
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
 		if (!pskb_network_may_pull(skb, sizeof(*iph)))
 			break;
 		iph = ip_hdr(skb);
-		if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+		if (iph->frag_off & htons(IP_MF | IP_OFFSET))
 			break;
 		poff = proto_ports_offset(iph->protocol);
 		if (poff >= 0 &&
@@ -276,7 +276,7 @@ fallback:
 
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (skb_dst(skb))
 		return skb_dst(skb)->tclassid;
 #endif
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b6f9b..26e7bc4ffb7 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
 
 #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
 
-struct fw_head
-{
+struct fw_head {
 	struct fw_filter *ht[HTSIZE];
 	u32 mask;
 };
 
-struct fw_filter
-{
+struct fw_filter {
 	struct fw_filter	*next;
 	u32			id;
 	struct tcf_result	res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
 	.police = TCA_FW_POLICE
 };
 
-static __inline__ int fw_hash(u32 handle)
+static inline int fw_hash(u32 handle)
 {
 	if (HTSIZE == 4096)
 		return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
 static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			  struct tcf_result *res)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f;
 	int r;
 	u32 id = skb->mark;
 
 	if (head != NULL) {
 		id &= head->mask;
-		for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+		for (f = head->ht[fw_hash(id)]; f; f = f->next) {
 			if (f->id == id) {
 				*res = f->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 		}
 	} else {
 		/* old method */
-		if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+		if (id && (TC_H_MAJ(id) == 0 ||
+			   !(TC_H_MAJ(id ^ tp->q->handle)))) {
 			res->classid = id;
 			res->class = 0;
 			return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 
 static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f;
 
 	if (head == NULL)
 		return 0;
 
-	for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+	for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
 		if (f->id == handle)
 			return (unsigned long)f;
 	}
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static inline void
-fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
 	tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
 	if (head == NULL)
 		return;
 
-	for (h=0; h<HTSIZE; h++) {
-		while ((f=head->ht[h]) != NULL) {
+	for (h = 0; h < HTSIZE; h++) {
+		while ((f = head->ht[h]) != NULL) {
 			head->ht[h] = f->next;
 			fw_delete_filter(tp, f);
 		}
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
 
 static int fw_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
-	struct fw_filter *f = (struct fw_filter*)arg;
+	struct fw_head *head = (struct fw_head *)tp->root;
+	struct fw_filter *f = (struct fw_filter *)arg;
 	struct fw_filter **fp;
 
 	if (head == NULL || f == NULL)
 		goto out;
 
-	for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+	for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
 		if (*fp == f) {
 			tcf_tree_lock(tp);
 			*fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 		     struct nlattr **tca,
 		     unsigned long *arg)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter *) *arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
 
 static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	int h;
 
 	if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
-	struct fw_filter *f = (struct fw_filter*)fh;
+	struct fw_filter *f = (struct fw_filter *)fh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd85dec..d580cdfca09 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
 #include <net/pkt_cls.h>
 
 /*
-   1. For now we assume that route tags < 256.
-      It allows to use direct table lookups, instead of hash tables.
-   2. For now we assume that "from TAG" and "fromdev DEV" statements
-      are mutually  exclusive.
-   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ * 1. For now we assume that route tags < 256.
+ *    It allows to use direct table lookups, instead of hash tables.
+ * 2. For now we assume that "from TAG" and "fromdev DEV" statements
+ *    are mutually  exclusive.
+ * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
  */
 
-struct route4_fastmap
-{
+struct route4_fastmap {
 	struct route4_filter	*filter;
 	u32			id;
 	int			iif;
 };
 
-struct route4_head
-{
+struct route4_head {
 	struct route4_fastmap	fastmap[16];
-	struct route4_bucket	*table[256+1];
+	struct route4_bucket	*table[256 + 1];
 };
 
-struct route4_bucket
-{
+struct route4_bucket {
 	/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
-	struct route4_filter	*ht[16+16+1];
+	struct route4_filter	*ht[16 + 16 + 1];
 };
 
-struct route4_filter
-{
+struct route4_filter {
 	struct route4_filter	*next;
 	u32			id;
 	int			iif;
@@ -61,20 +57,20 @@ struct route4_filter
 	struct route4_bucket	*bkt;
 };
 
-#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
 
 static const struct tcf_ext_map route_ext_map = {
 	.police = TCA_ROUTE4_POLICE,
 	.action = TCA_ROUTE4_ACT
 };
 
-static __inline__ int route4_fastmap_hash(u32 id, int iif)
+static inline int route4_fastmap_hash(u32 id, int iif)
 {
-	return id&0xF;
+	return id & 0xF;
 }
 
-static inline
-void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+static void
+route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 {
 	spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
 
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 	spin_unlock_bh(root_lock);
 }
 
-static inline void
+static void
 route4_set_fastmap(struct route4_head *head, u32 id, int iif,
 		   struct route4_filter *f)
 {
 	int h = route4_fastmap_hash(id, iif);
+
 	head->fastmap[h].id = id;
 	head->fastmap[h].iif = iif;
 	head->fastmap[h].filter = f;
 }
 
-static __inline__ int route4_hash_to(u32 id)
+static inline int route4_hash_to(u32 id)
 {
-	return id&0xFF;
+	return id & 0xFF;
 }
 
-static __inline__ int route4_hash_from(u32 id)
+static inline int route4_hash_from(u32 id)
 {
-	return (id>>16)&0xF;
+	return (id >> 16) & 0xF;
 }
 
-static __inline__ int route4_hash_iif(int iif)
+static inline int route4_hash_iif(int iif)
 {
-	return 16 + ((iif>>16)&0xF);
+	return 16 + ((iif >> 16) & 0xF);
 }
 
-static __inline__ int route4_hash_wild(void)
+static inline int route4_hash_wild(void)
 {
 	return 32;
 }
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
 static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			   struct tcf_result *res)
 {
-	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_head *head = (struct route4_head *)tp->root;
 	struct dst_entry *dst;
 	struct route4_bucket *b;
 	struct route4_filter *f;
 	u32 id, h;
 	int iif, dont_cache = 0;
 
-	if ((dst = skb_dst(skb)) == NULL)
+	dst = skb_dst(skb);
+	if (!dst)
 		goto failure;
 
 	id = dst->tclassid;
 	if (head == NULL)
 		goto old_method;
 
-	iif = ((struct rtable*)dst)->fl.iif;
+	iif = ((struct rtable *)dst)->fl.iif;
 
 	h = route4_fastmap_hash(id, iif);
 	if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	h = route4_hash_to(id);
 
 restart:
-	if ((b = head->table[h]) != NULL) {
+	b = head->table[h];
+	if (b) {
 		for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
 			if (f->id == id)
 				ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
 
 static inline u32 to_hash(u32 id)
 {
-	u32 h = id&0xFF;
-	if (id&0x8000)
+	u32 h = id & 0xFF;
+
+	if (id & 0x8000)
 		h += 256;
 	return h;
 }
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
 	if (!(id & 0x8000)) {
 		if (id > 255)
 			return 256;
-		return id&0xF;
+		return id & 0xF;
 	}
-	return 16 + (id&0xF);
+	return 16 + (id & 0xF);
 }
 
 static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 {
-	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_head *head = (struct route4_head *)tp->root;
 	struct route4_bucket *b;
 	struct route4_filter *f;
-	unsigned h1, h2;
+	unsigned int h1, h2;
 
 	if (!head)
 		return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 	if (h1 > 256)
 		return 0;
 
-	h2 = from_hash(handle>>16);
+	h2 = from_hash(handle >> 16);
 	if (h2 > 32)
 		return 0;
 
-	if ((b = head->table[h1]) != NULL) {
+	b = head->table[h1];
+	if (b) {
 		for (f = b->ht[h2]; f; f = f->next)
 			if (f->handle == handle)
 				return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static inline void
+static void
 route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
 	if (head == NULL)
 		return;
 
-	for (h1=0; h1<=256; h1++) {
+	for (h1 = 0; h1 <= 256; h1++) {
 		struct route4_bucket *b;
 
-		if ((b = head->table[h1]) != NULL) {
-			for (h2=0; h2<=32; h2++) {
+		b = head->table[h1];
+		if (b) {
+			for (h2 = 0; h2 <= 32; h2++) {
 				struct route4_filter *f;
 
 				while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
 
 static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct route4_head *head = (struct route4_head*)tp->root;
-	struct route4_filter **fp, *f = (struct route4_filter*)arg;
-	unsigned h = 0;
+	struct route4_head *head = (struct route4_head *)tp->root;
+	struct route4_filter **fp, *f = (struct route4_filter *)arg;
+	unsigned int h = 0;
 	struct route4_bucket *b;
 	int i;
 
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 	h = f->handle;
 	b = f->bkt;
 
-	for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+	for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
 		if (*fp == f) {
 			tcf_tree_lock(tp);
 			*fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 
 			/* Strip tree */
 
-			for (i=0; i<=32; i++)
+			for (i = 0; i <= 32; i++)
 				if (b->ht[i])
 					return 0;
 
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	}
 
 	h1 = to_hash(nhandle);
-	if ((b = head->table[h1]) == NULL) {
+	b = head->table[h1];
+	if (!b) {
 		err = -ENOBUFS;
 		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
 		if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 		tcf_tree_unlock(tp);
 	} else {
 		unsigned int h2 = from_hash(nhandle >> 16);
+
 		err = -EEXIST;
 		for (fp = b->ht[h2]; fp; fp = fp->next)
 			if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 	if (err < 0)
 		return err;
 
-	if ((f = (struct route4_filter*)*arg) != NULL) {
+	f = (struct route4_filter *)*arg;
+	if (f) {
 		if (f->handle != handle && handle)
 			return -EINVAL;
 
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 
 reinsert:
 	h = from_hash(f->handle >> 16);
-	for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+	for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
 		if (f->handle < f1->handle)
 			break;
 
@@ -492,7 +497,8 @@ reinsert:
 	if (old_handle && f->handle != old_handle) {
 		th = to_hash(old_handle);
 		h = from_hash(old_handle >> 16);
-		if ((b = head->table[th]) != NULL) {
+		b = head->table[th];
+		if (b) {
 			for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
 				if (*fp == f) {
 					*fp = f->next;
@@ -515,7 +521,7 @@ errout:
 static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct route4_head *head = tp->root;
-	unsigned h, h1;
+	unsigned int h, h1;
 
 	if (head == NULL)
 		arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
-	struct route4_filter *f = (struct route4_filter*)fh;
+	struct route4_filter *f = (struct route4_filter *)fh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 	u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (!(f->handle&0x8000)) {
-		id = f->id&0xFF;
+	if (!(f->handle & 0x8000)) {
+		id = f->id & 0xFF;
 		NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
 	}
-	if (f->handle&0x80000000) {
-		if ((f->handle>>16) != 0xFFFF)
+	if (f->handle & 0x80000000) {
+		if ((f->handle >> 16) != 0xFFFF)
 			NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
 	} else {
-		id = f->id>>16;
+		id = f->id >> 16;
 		NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
 	}
 	if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a1790b04..402c44b241a 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
    powerful classification engine.  */
 
 
-struct rsvp_head
-{
+struct rsvp_head {
 	u32			tmap[256/32];
 	u32			hgenerator;
 	u8			tgenerator;
 	struct rsvp_session	*ht[256];
 };
 
-struct rsvp_session
-{
+struct rsvp_session {
 	struct rsvp_session	*next;
 	__be32			dst[RSVP_DST_LEN];
 	struct tc_rsvp_gpi 	dpi;
 	u8			protocol;
 	u8			tunnelid;
 	/* 16 (src,sport) hash slots, and one wildcard source slot */
-	struct rsvp_filter	*ht[16+1];
+	struct rsvp_filter	*ht[16 + 1];
 };
 
 
-struct rsvp_filter
-{
+struct rsvp_filter {
 	struct rsvp_filter	*next;
 	__be32			src[RSVP_DST_LEN];
 	struct tc_rsvp_gpi	spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
 	struct rsvp_session	*sess;
 };
 
-static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 {
-	unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+	unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
+
 	h ^= h>>16;
 	h ^= h>>8;
 	return (h ^ protocol ^ tunnelid) & 0xFF;
 }
 
-static __inline__ unsigned hash_src(__be32 *src)
+static inline unsigned int hash_src(__be32 *src)
 {
-	unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+	unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
+
 	h ^= h>>16;
 	h ^= h>>8;
 	h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			 struct tcf_result *res)
 {
-	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 	struct rsvp_session *s;
 	struct rsvp_filter *f;
-	unsigned h1, h2;
+	unsigned int h1, h2;
 	__be32 *dst, *src;
 	u8 protocol;
 	u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
 	src = &nhptr->saddr.s6_addr32[0];
 	dst = &nhptr->daddr.s6_addr32[0];
 	protocol = nhptr->nexthdr;
-	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+	xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
 #else
 	src = &nhptr->saddr;
 	dst = &nhptr->daddr;
 	protocol = nhptr->protocol;
-	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
-	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+	xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
+	if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
 		return -1;
 #endif
 
@@ -176,10 +175,10 @@ restart:
 	h2 = hash_src(src);
 
 	for (s = sht[h1]; s; s = s->next) {
-		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
 		    protocol == s->protocol &&
 		    !(s->dpi.mask &
-		      (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
+		      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
 #if RSVP_DST_LEN == 4
 		    dst[0] == s->dst[0] &&
 		    dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
 		    tunnelid == s->tunnelid) {
 
 			for (f = s->ht[h2]; f; f = f->next) {
-				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
-				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
+				    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
 #if RSVP_DST_LEN == 4
 				    &&
 				    src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
 						return 0;
 
 					tunnelid = f->res.classid;
-					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+					nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
 					goto restart;
 				}
 			}
@@ -224,11 +223,11 @@ matched:
 
 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 {
-	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 	struct rsvp_session *s;
 	struct rsvp_filter *f;
-	unsigned h1 = handle&0xFF;
-	unsigned h2 = (handle>>8)&0xFF;
+	unsigned int h1 = handle & 0xFF;
+	unsigned int h2 = (handle >> 8) & 0xFF;
 
 	if (h2 > 16)
 		return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
 	return -ENOBUFS;
 }
 
-static inline void
+static void
 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 
 	sht = data->ht;
 
-	for (h1=0; h1<256; h1++) {
+	for (h1 = 0; h1 < 256; h1++) {
 		struct rsvp_session *s;
 
 		while ((s = sht[h1]) != NULL) {
 			sht[h1] = s->next;
 
-			for (h2=0; h2<=16; h2++) {
+			for (h2 = 0; h2 <= 16; h2++) {
 				struct rsvp_filter *f;
 
 				while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 
 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
-	unsigned h = f->handle;
+	struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+	unsigned int h = f->handle;
 	struct rsvp_session **sp;
 	struct rsvp_session *s = f->sess;
 	int i;
 
-	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+	for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
 		if (*fp == f) {
 			tcf_tree_lock(tp);
 			*fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 
 			/* Strip tree */
 
-			for (i=0; i<=16; i++)
+			for (i = 0; i <= 16; i++)
 				if (s->ht[i])
 					return 0;
 
 			/* OK, session has no flows */
-			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+			for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
 			     *sp; sp = &(*sp)->next) {
 				if (*sp == s) {
 					tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 	return 0;
 }
 
-static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 {
 	struct rsvp_head *data = tp->root;
 	int i = 0xFFFF;
 
 	while (i-- > 0) {
 		u32 h;
+
 		if ((data->hgenerator += 0x10000) == 0)
 			data->hgenerator = 0x10000;
 		h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 
 static int tunnel_bts(struct rsvp_head *data)
 {
-	int n = data->tgenerator>>5;
-	u32 b = 1<<(data->tgenerator&0x1F);
+	int n = data->tgenerator >> 5;
+	u32 b = 1 << (data->tgenerator & 0x1F);
 
-	if (data->tmap[n]&b)
+	if (data->tmap[n] & b)
 		return 0;
 	data->tmap[n] |= b;
 	return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
 
 	memset(tmap, 0, sizeof(tmap));
 
-	for (h1=0; h1<256; h1++) {
+	for (h1 = 0; h1 < 256; h1++) {
 		struct rsvp_session *s;
 		for (s = sht[h1]; s; s = s->next) {
-			for (h2=0; h2<=16; h2++) {
+			for (h2 = 0; h2 <= 16; h2++) {
 				struct rsvp_filter *f;
 
 				for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
 {
 	int i, k;
 
-	for (k=0; k<2; k++) {
-		for (i=255; i>0; i--) {
+	for (k = 0; k < 2; k++) {
+		for (i = 255; i > 0; i--) {
 			if (++data->tgenerator == 0)
 				data->tgenerator = 1;
 			if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	struct nlattr *opt = tca[TCA_OPTIONS-1];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
-	unsigned h1, h2;
+	unsigned int h1, h2;
 	__be32 *dst;
 	int err;
 
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	if (err < 0)
 		return err;
 
-	if ((f = (struct rsvp_filter*)*arg) != NULL) {
+	f = (struct rsvp_filter *)*arg;
+	if (f) {
 		/* Node exists: adjust only classid */
 
 		if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 			goto errout;
 	}
 
-	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+	for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
 		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 		    pinfo && pinfo->protocol == s->protocol &&
 		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
 			tcf_exts_change(tp, &f->exts, &e);
 
 			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
-				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+				if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
 					break;
 			f->next = *fp;
 			wmb();
@@ -567,7 +568,7 @@ errout2:
 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct rsvp_head *head = tp->root;
-	unsigned h, h1;
+	unsigned int h, h1;
 
 	if (arg->stop)
 		return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct rsvp_filter *f = (struct rsvp_filter*)fh;
+	struct rsvp_filter *f = (struct rsvp_filter *)fh;
 	struct rsvp_session *s;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 	if (f->res.classid)
 		NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
-	if (((f->handle>>8)&0xFF) != 16)
+	if (((f->handle >> 8) & 0xFF) != 16)
 		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 
 	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330bb91..36667fa6423 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		 * of the hashing index is below the threshold.
 		 */
 		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-			cp.hash = (cp.mask >> cp.shift)+1;
+			cp.hash = (cp.mask >> cp.shift) + 1;
 		else
 			cp.hash = DEFAULT_HASH_SIZE;
 	}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82178a..966920c14e7 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
-struct tc_u_knode
-{
+struct tc_u_knode {
 	struct tc_u_knode	*next;
 	u32			handle;
 	struct tc_u_hnode	*ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
 	struct tc_u32_sel	sel;
 };
 
-struct tc_u_hnode
-{
+struct tc_u_hnode {
 	struct tc_u_hnode	*next;
 	u32			handle;
 	u32			prio;
 	struct tc_u_common	*tp_c;
 	int			refcnt;
-	unsigned		divisor;
+	unsigned int		divisor;
 	struct tc_u_knode	*ht[1];
 };
 
-struct tc_u_common
-{
+struct tc_u_common {
 	struct tc_u_hnode	*hlist;
 	struct Qdisc		*q;
 	int			refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
 	.police = TCA_U32_POLICE
 };
 
-static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+static inline unsigned int u32_hash_fold(__be32 key,
+					 const struct tc_u32_sel *sel,
+					 u8 fshift)
 {
-	unsigned h = ntohl(key & sel->hmask)>>fshift;
+	unsigned int h = ntohl(key & sel->hmask) >> fshift;
 
 	return h;
 }
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 		unsigned int	  off;
 	} stack[TC_U32_MAXDEPTH];
 
-	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+	struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
 	unsigned int off = skb_network_offset(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
 		struct tc_u32_key *key = n->sel.keys;
 
 #ifdef CONFIG_CLS_U32_PERF
-		n->pf->rcnt +=1;
+		n->pf->rcnt += 1;
 		j = 0;
 #endif
 
@@ -133,7 +132,7 @@ next_knode:
 		}
 #endif
 
-		for (i = n->sel.nkeys; i>0; i--, key++) {
+		for (i = n->sel.nkeys; i > 0; i--, key++) {
 			int toff = off + key->off + (off2 & key->offmask);
 			__be32 *data, _data;
 
@@ -148,13 +147,13 @@ next_knode:
 				goto next_knode;
 			}
 #ifdef CONFIG_CLS_U32_PERF
-			n->pf->kcnts[j] +=1;
+			n->pf->kcnts[j] += 1;
 			j++;
 #endif
 		}
 		if (n->ht_down == NULL) {
 check_terminal:
-			if (n->sel.flags&TC_U32_TERMINAL) {
+			if (n->sel.flags & TC_U32_TERMINAL) {
 
 				*res = n->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
 				}
 #endif
 #ifdef CONFIG_CLS_U32_PERF
-				n->pf->rhit +=1;
+				n->pf->rhit += 1;
 #endif
 				r = tcf_exts_exec(skb, &n->exts, res);
 				if (r < 0) {
@@ -197,10 +196,10 @@ check_terminal:
 			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
 							  n->fshift);
 		}
-		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
 			goto next_ht;
 
-		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
 			off2 = n->sel.off + 3;
 			if (n->sel.flags & TC_U32_VAROFFSET) {
 				__be16 *data, _data;
@@ -215,7 +214,7 @@ check_terminal:
 			}
 			off2 &= ~3;
 		}
-		if (n->sel.flags&TC_U32_EAT) {
+		if (n->sel.flags & TC_U32_EAT) {
 			off += off2;
 			off2 = 0;
 		}
@@ -236,11 +235,11 @@ out:
 
 deadloop:
 	if (net_ratelimit())
-		printk(KERN_WARNING "cls_u32: dead loop\n");
+		pr_warning("cls_u32: dead loop\n");
 	return -1;
 }
 
-static __inline__ struct tc_u_hnode *
+static struct tc_u_hnode *
 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 {
 	struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 	return ht;
 }
 
-static __inline__ struct tc_u_knode *
+static struct tc_u_knode *
 u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 {
-	unsigned sel;
+	unsigned int sel;
 	struct tc_u_knode *n = NULL;
 
 	sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
 	do {
 		if (++tp_c->hgenerator == 0x7FF)
 			tp_c->hgenerator = 1;
-	} while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+	} while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
 
 	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 }
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 {
 	struct tc_u_knode *n;
-	unsigned h;
+	unsigned int h;
 
-	for (h=0; h<=ht->divisor; h++) {
+	for (h = 0; h <= ht->divisor; h++) {
 		while ((n = ht->ht[h]) != NULL) {
 			ht->ht[h] = n->next;
 
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
 
 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
 
 	if (ht == NULL)
 		return 0;
 
 	if (TC_U32_KEY(ht->handle))
-		return u32_delete_key(tp, (struct tc_u_knode*)ht);
+		return u32_delete_key(tp, (struct tc_u_knode *)ht);
 
 	if (tp->root == ht)
 		return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 {
 	struct tc_u_knode *n;
-	unsigned i = 0x7FF;
+	unsigned int i = 0x7FF;
 
-	for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+	for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
 		if (i < TC_U32_NODE(n->handle))
 			i = TC_U32_NODE(n->handle);
 	i++;
 
-	return handle|(i>0xFFF ? 0xFFF : i);
+	return handle | (i > 0xFFF ? 0xFFF : i);
 }
 
 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (err < 0)
 		return err;
 
-	if ((n = (struct tc_u_knode*)*arg) != NULL) {
+	n = (struct tc_u_knode *)*arg;
+	if (n) {
 		if (TC_U32_KEY(n->handle) == 0)
 			return -EINVAL;
 
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
-		unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 
 		if (--divisor > 0x100)
 			return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 			if (handle == 0)
 				return -ENOMEM;
 		}
-		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
 		if (ht == NULL)
 			return -ENOBUFS;
 		ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
 	struct tc_u_knode *n;
-	unsigned h;
+	unsigned int h;
 
 	if (arg->stop)
 		return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tc_u_knode *n = (struct tc_u_knode*)fh;
+	struct tc_u_knode *n = (struct tc_u_knode *)fh;
 	struct nlattr *nest;
 
 	if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (TC_U32_KEY(n->handle) == 0) {
-		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
-		u32 divisor = ht->divisor+1;
+		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+		u32 divisor = ht->divisor + 1;
+
 		NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
 	} else {
 		NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 			goto nla_put_failure;
 
 #ifdef CONFIG_NET_CLS_IND
-		if(strlen(n->indev))
+		if (strlen(n->indev))
 			NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
 #endif
 #ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc450397487..1c8360a2752 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
 		return 0;
 
 	switch (cmp->align) {
-		case TCF_EM_ALIGN_U8:
-			val = *ptr;
-			break;
+	case TCF_EM_ALIGN_U8:
+		val = *ptr;
+		break;
 
-		case TCF_EM_ALIGN_U16:
-			val = get_unaligned_be16(ptr);
+	case TCF_EM_ALIGN_U16:
+		val = get_unaligned_be16(ptr);
 
-			if (cmp_needs_transformation(cmp))
-				val = be16_to_cpu(val);
-			break;
+		if (cmp_needs_transformation(cmp))
+			val = be16_to_cpu(val);
+		break;
 
-		case TCF_EM_ALIGN_U32:
-			/* Worth checking boundries? The branching seems
-			 * to get worse. Visit again. */
-			val = get_unaligned_be32(ptr);
+	case TCF_EM_ALIGN_U32:
+		/* Worth checking boundries? The branching seems
+		 * to get worse. Visit again.
+		 */
+		val = get_unaligned_be32(ptr);
 
-			if (cmp_needs_transformation(cmp))
-				val = be32_to_cpu(val);
-			break;
+		if (cmp_needs_transformation(cmp))
+			val = be32_to_cpu(val);
+		break;
 
-		default:
-			return 0;
+	default:
+		return 0;
 	}
 
 	if (cmp->mask)
 		val &= cmp->mask;
 
 	switch (cmp->opnd) {
-		case TCF_EM_OPND_EQ:
-			return val == cmp->val;
-		case TCF_EM_OPND_LT:
-			return val < cmp->val;
-		case TCF_EM_OPND_GT:
-			return val > cmp->val;
+	case TCF_EM_OPND_EQ:
+		return val == cmp->val;
+	case TCF_EM_OPND_LT:
+		return val < cmp->val;
+	case TCF_EM_OPND_GT:
+		return val > cmp->val;
 	}
 
 	return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 34da5e29ea1..a889d099320 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -73,21 +73,18 @@
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 
-struct meta_obj
-{
+struct meta_obj {
 	unsigned long		value;
 	unsigned int		len;
 };
 
-struct meta_value
-{
+struct meta_value {
 	struct tcf_meta_val	hdr;
 	unsigned long		val;
 	unsigned int		len;
 };
 
-struct meta_match
-{
+struct meta_match {
 	struct meta_value	lvalue;
 	struct meta_value	rvalue;
 };
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
 	if (unlikely(skb_dst(skb) == NULL))
 		*err = -1;
 	else
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		dst->value = skb_dst(skb)->tclassid;
 #else
 		dst->value = 0;
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
  * Meta value collectors assignment table
  **************************************************************************/
 
-struct meta_ops
-{
+struct meta_ops {
 	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
 			       struct meta_value *, struct meta_obj *, int *);
 };
@@ -494,7 +490,7 @@ struct meta_ops
 
 /* Meta value operations table listing all meta value collectors and
  * assigns them to a type and meta id. */
-static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
 	[TCF_META_TYPE_VAR] = {
 		[META_ID(DEV)]			= META_FUNC(var_dev),
 		[META_ID(SK_BOUND_IF)] 		= META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
 	}
 };
 
-static inline struct meta_ops * meta_ops(struct meta_value *val)
+static inline struct meta_ops *meta_ops(struct meta_value *val)
 {
 	return &__meta_ops[meta_type(val)][meta_id(val)];
 }
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 {
 	if (v->len == sizeof(unsigned long))
 		NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
-	else if (v->len == sizeof(u32)) {
+	else if (v->len == sizeof(u32))
 		NLA_PUT_U32(skb, tlv, v->val);
-	}
 
 	return 0;
 
@@ -663,8 +658,7 @@ nla_put_failure:
  * Type specific operations table
  **************************************************************************/
 
-struct meta_type_ops
-{
+struct meta_type_ops {
 	void	(*destroy)(struct meta_value *);
 	int	(*compare)(struct meta_obj *, struct meta_obj *);
 	int	(*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@ struct meta_type_ops
 	int	(*dump)(struct sk_buff *, struct meta_value *, int);
 };
 
-static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
 	[TCF_META_TYPE_VAR] = {
 		.destroy = meta_var_destroy,
 		.compare = meta_var_compare,
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
 	}
 };
 
-static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
 {
 	return &__meta_type_ops[meta_type(v)];
 }
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
 		return err;
 
 	if (meta_type_ops(v)->apply_extras)
-	    meta_type_ops(v)->apply_extras(v, dst);
+		meta_type_ops(v)->apply_extras(v, dst);
 
 	return 0;
 }
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
 	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
 
 	switch (meta->lvalue.hdr.op) {
-		case TCF_EM_OPND_EQ:
-			return !r;
-		case TCF_EM_OPND_LT:
-			return r < 0;
-		case TCF_EM_OPND_GT:
-			return r > 0;
+	case TCF_EM_OPND_EQ:
+		return !r;
+	case TCF_EM_OPND_LT:
+		return r < 0;
+	case TCF_EM_OPND_GT:
+		return r > 0;
 	}
 
 	return 0;
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
 
 static inline int meta_is_supported(struct meta_value *val)
 {
-	return (!meta_id(val) || meta_ops(val)->get);
+	return !meta_id(val) || meta_ops(val)->get;
 }
 
 static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176aee6e..a3bed07a008 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
 #include <linux/tc_ematch/tc_em_nbyte.h>
 #include <net/pkt_cls.h>
 
-struct nbyte_data
-{
+struct nbyte_data {
 	struct tcf_em_nbyte	hdr;
 	char			pattern[0];
 };
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index ea8f566e720..15d353d2e4b 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
 #include <linux/tc_ematch/tc_em_text.h>
 #include <net/pkt_cls.h>
 
-struct text_match
-{
+struct text_match {
 	u16			from_offset;
 	u16			to_offset;
 	u8			from_layer;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f1479f7d..797bdb88c01 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
 	if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
 		return 0;
 
-	return !(((*(__be32*) ptr)  ^ key->val) & key->mask);
+	return !(((*(__be32 *) ptr)  ^ key->val) & key->mask);
 }
 
 static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da961f8..88d93eb9250 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
 static LIST_HEAD(ematch_ops);
 static DEFINE_RWLOCK(ematch_mod_lock);
 
-static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
 {
 	struct tcf_ematch_ops *e = NULL;
 
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
 }
 EXPORT_SYMBOL(tcf_em_unregister);
 
-static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
-						   int index)
+static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
+						  int index)
 {
 	return &tree->matches[index];
 }
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 
 	if (em_hdr->kind == TCF_EM_CONTAINER) {
 		/* Special ematch called "container", carries an index
-		 * referencing an external ematch sequence. */
+		 * referencing an external ematch sequence.
+		 */
 		u32 ref;
 
 		if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			goto errout;
 
 		/* We do not allow backward jumps to avoid loops and jumps
-		 * to our own position are of course illegal. */
+		 * to our own position are of course illegal.
+		 */
 		if (ref <= idx)
 			goto errout;
 
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 		 * which automatically releases the reference again, therefore
 		 * the module MUST not be given back under any circumstances
 		 * here. Be aware, the destroy function assumes that the
-		 * module is held if the ops field is non zero. */
+		 * module is held if the ops field is non zero.
+		 */
 		em->ops = tcf_em_lookup(em_hdr->kind);
 
 		if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			if (em->ops) {
 				/* We dropped the RTNL mutex in order to
 				 * perform the module load. Tell the caller
-				 * to replay the request. */
+				 * to replay the request.
+				 */
 				module_put(em->ops->owner);
 				err = -EAGAIN;
 			}
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 		}
 
 		/* ematch module provides expected length of data, so we
-		 * can do a basic sanity check. */
+		 * can do a basic sanity check.
+		 */
 		if (em->ops->datalen && data_len < em->ops->datalen)
 			goto errout;
 
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			 * TCF_EM_SIMPLE may be specified stating that the
 			 * data only consists of a u32 integer and the module
 			 * does not expected a memory reference but rather
-			 * the value carried. */
+			 * the value carried.
+			 */
 			if (em_hdr->flags & TCF_EM_SIMPLE) {
 				if (data_len < sizeof(u32))
 					goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 	 * The array of rt attributes is parsed in the order as they are
 	 * provided, their type must be incremental from 1 to n. Even
 	 * if it does not serve any real purpose, a failure of sticking
-	 * to this policy will result in parsing failure. */
+	 * to this policy will result in parsing failure.
+	 */
 	for (idx = 0; nla_ok(rt_match, list_len); idx++) {
 		err = -EINVAL;
 
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 	/* Check if the number of matches provided by userspace actually
 	 * complies with the array of matches. The number was used for
 	 * the validation of references and a mismatch could lead to
-	 * undefined references during the matching process. */
+	 * undefined references during the matching process.
+	 */
 	if (idx != tree_hdr->nmatches) {
 		err = -EINVAL;
 		goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 			.flags = em->flags
 		};
 
-		NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+		NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
 
 		if (em->ops && em->ops->dump) {
 			if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
 			       struct tcf_pkt_info *info)
 {
 	int r = em->ops->match(skb, em, info);
+
 	return tcf_em_is_inverted(em) ? !r : r;
 }
 
@@ -527,8 +536,8 @@ pop_stack:
 
 stack_overflow:
 	if (net_ratelimit())
-		printk(KERN_WARNING "tc ematch: local stack overflow,"
-			" increase NET_EMATCH_STACK\n");
+		pr_warning("tc ematch: local stack overflow,"
+			   " increase NET_EMATCH_STACK\n");
 	return -1;
 }
 EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b22ca2d1ceb..15074157940 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
 	int err = -ENOENT;
 
 	write_lock(&qdisc_mod_lock);
-	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 		if (q == qops)
 			break;
 	if (q) {
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 	if (!tab || --tab->refcnt)
 		return;
 
-	for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+	for (rtabp = &qdisc_rtab_list;
+	     (rtab = *rtabp) != NULL;
+	     rtabp = &rtab->next) {
 		if (rtab == tab) {
 			*rtabp = rtab->next;
 			kfree(rtab);
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	return stab;
 }
 
+static void stab_kfree_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct qdisc_size_table, rcu));
+}
+
 void qdisc_put_stab(struct qdisc_size_table *tab)
 {
 	if (!tab)
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
 
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
-		kfree(tab);
+		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
 	}
 
 	spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@ nla_put_failure:
 	return -1;
 }
 
-void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
 {
 	int pkt_len, slot;
 
@@ -454,14 +461,13 @@ out:
 		pkt_len = 1;
 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
 }
-EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 
 void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
 {
 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
-		printk(KERN_WARNING
-		       "%s: %s qdisc %X: is non-work-conserving?\n",
-		       txt, qdisc->ops->id, qdisc->handle >> 16);
+		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+			txt, qdisc->ops->id, qdisc->handle >> 16);
 		qdisc->flags |= TCQ_F_WARN_NONWC;
 	}
 }
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 						 timer);
 
-	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(wd->qdisc);
 	__netif_schedule(qdisc_root(wd->qdisc));
 
 	return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 		     &qdisc_root_sleeping(wd->qdisc)->state))
 		return;
 
-	wd->qdisc->flags |= TCQ_F_THROTTLED;
+	qdisc_throttled(wd->qdisc);
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
 	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
 	hrtimer_cancel(&wd->timer);
-	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(wd->qdisc);
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 			autohandle = TC_H_MAKE(0x80000000U, 0);
 	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
 
-	return i>0 ? autohandle : 0;
+	return i > 0 ? autohandle : 0;
 }
 
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				err = PTR_ERR(stab);
 				goto err_out4;
 			}
-			sch->stab = stab;
+			rcu_assign_pointer(sch->stab, stab);
 		}
 		if (tca[TCA_RATE]) {
 			spinlock_t *root_lock;
@@ -874,7 +880,7 @@ err_out4:
 	 * Any broken qdiscs that would require a ops->reset() here?
 	 * The qdisc was never in action so it shouldn't be necessary.
 	 */
-	qdisc_put_stab(sch->stab);
+	qdisc_put_stab(rtnl_dereference(sch->stab));
 	if (ops->destroy)
 		ops->destroy(sch);
 	goto err_out3;
@@ -882,7 +888,7 @@ err_out4:
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-	struct qdisc_size_table *stab = NULL;
+	struct qdisc_size_table *ostab, *stab = NULL;
 	int err = 0;
 
 	if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 			return PTR_ERR(stab);
 	}
 
-	qdisc_put_stab(sch->stab);
-	sch->stab = stab;
+	ostab = rtnl_dereference(sch->stab);
+	rcu_assign_pointer(sch->stab, stab);
+	qdisc_put_stab(ostab);
 
 	if (tca[TCA_RATE]) {
 		/* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@ out:
 	return 0;
 }
 
-struct check_loop_arg
-{
-	struct qdisc_walker 	w;
+struct check_loop_arg {
+	struct qdisc_walker	w;
 	struct Qdisc		*p;
 	int			depth;
 };
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *p = NULL;
 	int err;
 
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
-				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+				p = qdisc_lookup(dev, TC_H_MAJ(clid));
+				if (!p)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
-			} else { /* ingress */
-				if (dev_ingress_queue(dev))
-					q = dev_ingress_queue(dev)->qdisc_sleeping;
+			} else if (dev_ingress_queue(dev)) {
+				q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
 			return -EINVAL;
 	} else {
-		if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+		q = qdisc_lookup(dev, tcm->tcm_handle);
+		if (!q)
 			return -ENOENT;
 	}
 
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			return -EINVAL;
 		if (q->handle == 0)
 			return -ENOENT;
-		if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+		if (err != 0)
 			return err;
 	} else {
 		qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 }
 
 /*
-   Create/change qdisc.
+ * Create/change qdisc.
  */
 
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@ replay:
 	clid = tcm->tcm_parent;
 	q = p = NULL;
 
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@ replay:
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (clid != TC_H_INGRESS) {
-				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+				p = qdisc_lookup(dev, TC_H_MAJ(clid));
+				if (!p)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
-			} else { /* ingress */
-				if (dev_ingress_queue_create(dev))
-					q = dev_ingress_queue(dev)->qdisc_sleeping;
+			} else if (dev_ingress_queue_create(dev)) {
+				q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1063,13 +1073,14 @@ replay:
 
 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
 			if (tcm->tcm_handle) {
-				if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
 					return -EEXIST;
 				if (TC_H_MIN(tcm->tcm_handle))
 					return -EINVAL;
-				if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+				q = qdisc_lookup(dev, tcm->tcm_handle);
+				if (!q)
 					goto create_n_graft;
-				if (n->nlmsg_flags&NLM_F_EXCL)
+				if (n->nlmsg_flags & NLM_F_EXCL)
 					return -EEXIST;
 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 					return -EINVAL;
@@ -1079,7 +1090,7 @@ replay:
 				atomic_inc(&q->refcnt);
 				goto graft;
 			} else {
-				if (q == NULL)
+				if (!q)
 					goto create_n_graft;
 
 				/* This magic test requires explanation.
@@ -1101,9 +1112,9 @@ replay:
 				 *   For now we select create/graft, if
 				 *   user gave KIND, which does not match existing.
 				 */
-				if ((n->nlmsg_flags&NLM_F_CREATE) &&
-				    (n->nlmsg_flags&NLM_F_REPLACE) &&
-				    ((n->nlmsg_flags&NLM_F_EXCL) ||
+				if ((n->nlmsg_flags & NLM_F_CREATE) &&
+				    (n->nlmsg_flags & NLM_F_REPLACE) &&
+				    ((n->nlmsg_flags & NLM_F_EXCL) ||
 				     (tca[TCA_KIND] &&
 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
 					goto create_n_graft;
@@ -1118,7 +1129,7 @@ replay:
 	/* Change qdisc parameters */
 	if (q == NULL)
 		return -ENOENT;
-	if (n->nlmsg_flags&NLM_F_EXCL)
+	if (n->nlmsg_flags & NLM_F_EXCL)
 		return -EEXIST;
 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 		return -EINVAL;
@@ -1128,7 +1139,7 @@ replay:
 	return err;
 
 create_n_graft:
-	if (!(n->nlmsg_flags&NLM_F_CREATE))
+	if (!(n->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 	if (clid == TC_H_INGRESS) {
 		if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	struct nlmsghdr  *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
+	struct qdisc_size_table *stab;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto nla_put_failure;
 	q->qstats.qlen = q->q.qlen;
 
-	if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+	stab = rtnl_dereference(q->stab);
+	if (stab && qdisc_dump_stab(skb, stab) < 0)
 		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 		return -ENOBUFS;
 
 	if (old && !tc_qdisc_dump_ignore(old)) {
-		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+				  0, RTM_DELQDISC) < 0)
 			goto err_out;
 	}
 	if (new && !tc_qdisc_dump_ignore(new)) {
-		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 			goto err_out;
 	}
 
 	if (skb->len)
-		return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+		return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+				      n->nlmsg_flags & NLM_F_ECHO);
 
 err_out:
 	kfree_skb(skb);
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			q_idx++;
 			continue;
 		}
-		if (!tc_qdisc_dump_ignore(q) && 
+		if (!tc_qdisc_dump_ignore(q) &&
 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
 			goto done;
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 qid = TC_H_MAJ(clid);
 	int err;
 
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			qid = dev->qdisc->handle;
 
 		/* Now qid is genuine qdisc handle consistent
-		   both with parent and child.
-
-		   TC_H_MAJ(pid) still may be unspecified, complete it now.
+		 * both with parent and child.
+		 *
+		 * TC_H_MAJ(pid) still may be unspecified, complete it now.
 		 */
 		if (pid)
 			pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	}
 
 	/* OK. Locate qdisc */
-	if ((q = qdisc_lookup(dev, qid)) == NULL)
+	q = qdisc_lookup(dev, qid);
+	if (!q)
 		return -ENOENT;
 
 	/* An check that it supports classes */
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	if (cl == 0) {
 		err = -ENOENT;
-		if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTCLASS ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			goto out;
 	} else {
 		switch (n->nlmsg_type) {
 		case RTM_NEWTCLASS:
 			err = -EEXIST;
-			if (n->nlmsg_flags&NLM_F_EXCL)
+			if (n->nlmsg_flags & NLM_F_EXCL)
 				goto out;
 			break;
 		case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
-struct qdisc_dump_args
-{
-	struct qdisc_walker w;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
+struct qdisc_dump_args {
+	struct qdisc_walker	w;
+	struct sk_buff		*skb;
+	struct netlink_callback	*cb;
 };
 
 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
 	struct net *net = sock_net(skb->sk);
 	struct netdev_queue *dev_queue;
 	struct net_device *dev;
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return 0;
-	if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return 0;
 
 	s_t = cb->args[0];
@@ -1621,19 +1641,22 @@ done:
 }
 
 /* Main classifier routine: scans classifier chain attached
-   to this qdisc, (optionally) tests for protocol and asks
-   specific classifiers.
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
  */
 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
 	__be16 protocol = skb->protocol;
-	int err = 0;
+	int err;
 
 	for (; tp; tp = tp->next) {
-		if ((tp->protocol == protocol ||
-		     tp->protocol == htons(ETH_P_ALL)) &&
-		    (err = tp->classify(skb, tp, res)) >= 0) {
+		if (tp->protocol != protocol &&
+		    tp->protocol != htons(ETH_P_ALL))
+			continue;
+		err = tp->classify(skb, tp, res);
+
+		if (err >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
 				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1664,11 +1687,11 @@ reclassify:
 
 		if (verd++ >= MAX_REC_LOOP) {
 			if (net_ratelimit())
-				printk(KERN_NOTICE
-				       "%s: packet reclassify loop"
+				pr_notice("%s: packet reclassify loop"
 					  " rule prio %u protocol %02x\n",
-				       tp->q->ops->id,
-				       tp->prio & 0xffff, ntohs(tp->protocol));
+					  tp->q->ops->id,
+					  tp->prio & 0xffff,
+					  ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
 		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1784,7 @@ static int __init pktsched_init(void)
 
 	err = register_pernet_subsys(&psched_net_ops);
 	if (err) {
-		printk(KERN_ERR "pktsched_init: "
+		pr_err("pktsched_init: "
 		       "cannot initialize per netns operations\n");
 		return err;
 	}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 943d733409d..3f08158b868 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 	 * creation), and one for the reference held when calling delete.
 	 */
 	if (flow->ref < 2) {
-		printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+		pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
 		return -EINVAL;
 	}
 	if (flow->ref > 2)
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			}
 		}
 		flow = NULL;
-	done:
-		;		
+done:
+		;
 	}
-	if (!flow)
+	if (!flow) {
 		flow = &p->link;
-	else {
+	} else {
 		if (flow->vcc)
 			ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
 		/*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 
 	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
-			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
-			       flow->ref);
+			pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
 		atm_tc_put(sch, (unsigned long)flow);
 	}
 	tasklet_kill(&p->task);
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 	}
 	if (flow->excess)
 		NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
-	else {
+	else
 		NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
-	}
 
 	nla_nest_end(skb, nest);
 	return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c80d1c210c5..24d94c097b3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
 struct cbq_sched_data;
 
 
-struct cbq_class
-{
+struct cbq_class {
 	struct Qdisc_class_common common;
 	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
 
@@ -139,19 +138,18 @@ struct cbq_class
 	int			refcnt;
 	int			filters;
 
-	struct cbq_class 	*defaults[TC_PRIO_MAX+1];
+	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
 };
 
-struct cbq_sched_data
-{
+struct cbq_sched_data {
 	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
-	int			nclasses[TC_CBQ_MAXPRIO+1];
-	unsigned		quanta[TC_CBQ_MAXPRIO+1];
+	int			nclasses[TC_CBQ_MAXPRIO + 1];
+	unsigned int		quanta[TC_CBQ_MAXPRIO + 1];
 
 	struct cbq_class	link;
 
-	unsigned		activemask;
-	struct cbq_class	*active[TC_CBQ_MAXPRIO+1];	/* List of all classes
+	unsigned int		activemask;
+	struct cbq_class	*active[TC_CBQ_MAXPRIO + 1];	/* List of all classes
 								   with backlog */
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
 	int			tx_len;
 	psched_time_t		now;		/* Cached timestamp */
 	psched_time_t		now_rt;		/* Cached real time */
-	unsigned		pmask;
+	unsigned int		pmask;
 
 	struct hrtimer		delay_timer;
 	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
 };
 
 
-#define L2T(cl,len)	qdisc_l2t((cl)->R_tab,len)
+#define L2T(cl, len)	qdisc_l2t((cl)->R_tab, len)
 
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 {
 	struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 static struct cbq_class *
 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 {
-	struct cbq_class *cl, *new;
+	struct cbq_class *cl;
 
-	for (cl = this->tparent; cl; cl = cl->tparent)
-		if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
-			return new;
+	for (cl = this->tparent; cl; cl = cl->tparent) {
+		struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 
+		if (new != NULL && new != this)
+			return new;
+	}
 	return NULL;
 }
 
 #endif
 
 /* Classify packet. The procedure is pretty complicated, but
-   it allows us to combine link sharing and priority scheduling
-   transparently.
-
-   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
-   so that it resolves to split nodes. Then packets are classified
-   by logical priority, or a more specific classifier may be attached
-   to the split node.
+ * it allows us to combine link sharing and priority scheduling
+ * transparently.
+ *
+ * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+ * so that it resolves to split nodes. Then packets are classified
+ * by logical priority, or a more specific classifier may be attached
+ * to the split node.
  */
 
 static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	/*
 	 *  Step 1. If skb->priority points to one of our classes, use it.
 	 */
-	if (TC_H_MAJ(prio^sch->handle) == 0 &&
+	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 	    (cl = cbq_class_lookup(q, prio)) != NULL)
 		return cl;
 
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 			goto fallback;
 
-		if ((cl = (void*)res.class) == NULL) {
+		cl = (void *)res.class;
+		if (!cl) {
 			if (TC_H_MAJ(res.classid))
 				cl = cbq_class_lookup(q, res.classid);
-			else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 				cl = defmap[TC_PRIO_BESTEFFORT];
 
 			if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
 	 * Step 4. No success...
 	 */
 	if (TC_H_MAJ(prio) == 0 &&
-	    !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+	    !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 		return head;
 
@@ -290,12 +291,12 @@ fallback:
 }
 
 /*
-   A packet has just been enqueued on the empty class.
-   cbq_activate_class adds it to the tail of active class list
-   of its priority band.
+ * A packet has just been enqueued on the empty class.
+ * cbq_activate_class adds it to the tail of active class list
+ * of its priority band.
  */
 
-static __inline__ void cbq_activate_class(struct cbq_class *cl)
+static inline void cbq_activate_class(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 	int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
 }
 
 /*
-   Unlink class from active chain.
-   Note that this same procedure is done directly in cbq_dequeue*
-   during round-robin procedure.
+ * Unlink class from active chain.
+ * Note that this same procedure is done directly in cbq_dequeue*
+ * during round-robin procedure.
  */
 
 static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 {
 	int toplevel = q->toplevel;
 
-	if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 		psched_time_t now;
 		psched_tdiff_t incr;
 
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 				q->toplevel = cl->level;
 				return;
 			}
-		} while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+		} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 	}
 }
 
@@ -390,7 +391,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, cl->q);
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
-		qdisc_bstats_update(sch, skb);
 		cbq_mark_toplevel(q, cl);
 		if (!cl->next_alive)
 			cbq_activate_class(cl);
@@ -418,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 		delay += cl->offtime;
 
 		/*
-		   Class goes to sleep, so that it will have no
-		   chance to work avgidle. Let's forgive it 8)
-
-		   BTW cbq-2.0 has a crap in this
-		   place, apparently they forgot to shift it by cl->ewma_log.
+		 * Class goes to sleep, so that it will have no
+		 * chance to work avgidle. Let's forgive it 8)
+		 *
+		 * BTW cbq-2.0 has a crap in this
+		 * place, apparently they forgot to shift it by cl->ewma_log.
 		 */
 		if (cl->avgidle < 0)
 			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -439,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 		q->wd_expires = delay;
 
 	/* Dirty work! We must schedule wakeups based on
-	   real available rate, rather than leaf rate,
-	   which may be tiny (even zero).
+	 * real available rate, rather than leaf rate,
+	 * which may be tiny (even zero).
 	 */
 	if (q->toplevel == TC_CBQ_MAXLEVEL) {
 		struct cbq_class *b;
@@ -460,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 }
 
 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
-   they go overlimit
+ * they go overlimit
  */
 
 static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -595,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	struct Qdisc *sch = q->watchdog.qdisc;
 	psched_time_t now;
 	psched_tdiff_t delay = 0;
-	unsigned pmask;
+	unsigned int pmask;
 
 	now = psched_get_time();
 
@@ -624,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
-	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(sch);
 	__netif_schedule(qdisc_root(sch));
 	return HRTIMER_NORESTART;
 }
@@ -649,7 +649,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		ret = qdisc_enqueue(skb, cl->q);
 		if (ret == NET_XMIT_SUCCESS) {
 			sch->q.qlen++;
-			qdisc_bstats_update(sch, skb);
 			if (!cl->next_alive)
 				cbq_activate_class(cl);
 			return 0;
@@ -665,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 #endif
 
 /*
-   It is mission critical procedure.
-
-   We "regenerate" toplevel cutoff, if transmitting class
-   has backlog and it is not regulated. It is not part of
-   original CBQ description, but looks more reasonable.
-   Probably, it is wrong. This question needs further investigation.
-*/
+ * It is mission critical procedure.
+ *
+ * We "regenerate" toplevel cutoff, if transmitting class
+ * has backlog and it is not regulated. It is not part of
+ * original CBQ description, but looks more reasonable.
+ * Probably, it is wrong. This question needs further investigation.
+ */
 
-static __inline__ void
+static inline void
 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 		    struct cbq_class *borrowed)
 {
@@ -684,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 					q->toplevel = borrowed->level;
 					return;
 				}
-			} while ((borrowed=borrowed->borrow) != NULL);
+			} while ((borrowed = borrowed->borrow) != NULL);
 		}
 #if 0
 	/* It is not necessary now. Uncommenting it
@@ -712,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
 		cl->bstats.bytes += len;
 
 		/*
-		   (now - last) is total time between packet right edges.
-		   (last_pktlen/rate) is "virtual" busy time, so that
-
-			 idle = (now - last) - last_pktlen/rate
+		 * (now - last) is total time between packet right edges.
+		 * (last_pktlen/rate) is "virtual" busy time, so that
+		 *
+		 *	idle = (now - last) - last_pktlen/rate
 		 */
 
 		idle = q->now - cl->last;
@@ -725,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
 			idle -= L2T(cl, len);
 
 		/* true_avgidle := (1-W)*true_avgidle + W*idle,
-		   where W=2^{-ewma_log}. But cl->avgidle is scaled:
-		   cl->avgidle == true_avgidle/W,
-		   hence:
+		 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
+		 * cl->avgidle == true_avgidle/W,
+		 * hence:
 		 */
 			avgidle += idle - (avgidle>>cl->ewma_log);
 		}
@@ -741,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
 			cl->avgidle = avgidle;
 
 			/* Calculate expected time, when this class
-			   will be allowed to send.
-			   It will occur, when:
-			   (1-W)*true_avgidle + W*delay = 0, i.e.
-			   idle = (1/W - 1)*(-true_avgidle)
-			   or
-			   idle = (1 - W)*(-cl->avgidle);
+			 * will be allowed to send.
+			 * It will occur, when:
+			 * (1-W)*true_avgidle + W*delay = 0, i.e.
+			 * idle = (1/W - 1)*(-true_avgidle)
+			 * or
+			 * idle = (1 - W)*(-cl->avgidle);
 			 */
 			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 
 			/*
-			   That is not all.
-			   To maintain the rate allocated to the class,
-			   we add to undertime virtual clock,
-			   necessary to complete transmitted packet.
-			   (len/phys_bandwidth has been already passed
-			   to the moment of cbq_update)
+			 * That is not all.
+			 * To maintain the rate allocated to the class,
+			 * we add to undertime virtual clock,
+			 * necessary to complete transmitted packet.
+			 * (len/phys_bandwidth has been already passed
+			 * to the moment of cbq_update)
 			 */
 
 			idle -= L2T(&q->link, len);
@@ -778,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
 	cbq_update_toplevel(q, this, q->tx_borrowed);
 }
 
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_under_limit(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -794,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
 
 	do {
 		/* It is very suspicious place. Now overlimit
-		   action is generated for not bounded classes
-		   only if link is completely congested.
-		   Though it is in agree with ancestor-only paradigm,
-		   it looks very stupid. Particularly,
-		   it means that this chunk of code will either
-		   never be called or result in strong amplification
-		   of burstiness. Dangerous, silly, and, however,
-		   no another solution exists.
+		 * action is generated for not bounded classes
+		 * only if link is completely congested.
+		 * Though it is in agree with ancestor-only paradigm,
+		 * it looks very stupid. Particularly,
+		 * it means that this chunk of code will either
+		 * never be called or result in strong amplification
+		 * of burstiness. Dangerous, silly, and, however,
+		 * no another solution exists.
 		 */
-		if ((cl = cl->borrow) == NULL) {
+		cl = cl->borrow;
+		if (!cl) {
 			this_cl->qstats.overlimits++;
 			this_cl->overlimit(this_cl);
 			return NULL;
@@ -816,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
 	return cl;
 }
 
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
@@ -840,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 
 			if (cl->deficit <= 0) {
 				/* Class exhausted its allotment per
-				   this round. Switch to the next one.
+				 * this round. Switch to the next one.
 				 */
 				deficit = 1;
 				cl->deficit += cl->quantum;
@@ -850,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 			skb = cl->q->dequeue(cl->q);
 
 			/* Class did not give us any skb :-(
-			   It could occur even if cl->q->q.qlen != 0
-			   f.e. if cl->q == "tbf"
+			 * It could occur even if cl->q->q.qlen != 0
+			 * f.e. if cl->q == "tbf"
 			 */
 			if (skb == NULL)
 				goto skip_class;
@@ -880,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 skip_class:
 			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 				/* Class is empty or penalized.
-				   Unlink it from active chain.
+				 * Unlink it from active chain.
 				 */
 				cl_prev->next_alive = cl->next_alive;
 				cl->next_alive = NULL;
@@ -919,14 +919,14 @@ next_class:
 	return NULL;
 }
 
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_1(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
-	unsigned activemask;
+	unsigned int activemask;
 
-	activemask = q->activemask&0xFF;
+	activemask = q->activemask & 0xFF;
 	while (activemask) {
 		int prio = ffz(~activemask);
 		activemask &= ~(1<<prio);
@@ -951,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
 	if (q->tx_class) {
 		psched_tdiff_t incr2;
 		/* Time integrator. We calculate EOS time
-		   by adding expected packet transmission time.
-		   If real time is greater, we warp artificial clock,
-		   so that:
-
-		   cbq_time = max(real_time, work);
+		 * by adding expected packet transmission time.
+		 * If real time is greater, we warp artificial clock,
+		 * so that:
+		 *
+		 * cbq_time = max(real_time, work);
 		 */
 		incr2 = L2T(&q->link, q->tx_len);
 		q->now += incr2;
@@ -971,28 +971,29 @@ cbq_dequeue(struct Qdisc *sch)
 
 		skb = cbq_dequeue_1(sch);
 		if (skb) {
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
-			sch->flags &= ~TCQ_F_THROTTLED;
+			qdisc_unthrottled(sch);
 			return skb;
 		}
 
 		/* All the classes are overlimit.
-
-		   It is possible, if:
-
-		   1. Scheduler is empty.
-		   2. Toplevel cutoff inhibited borrowing.
-		   3. Root class is overlimit.
-
-		   Reset 2d and 3d conditions and retry.
-
-		   Note, that NS and cbq-2.0 are buggy, peeking
-		   an arbitrary class is appropriate for ancestor-only
-		   sharing, but not for toplevel algorithm.
-
-		   Our version is better, but slower, because it requires
-		   two passes, but it is unavoidable with top-level sharing.
-		*/
+		 *
+		 * It is possible, if:
+		 *
+		 * 1. Scheduler is empty.
+		 * 2. Toplevel cutoff inhibited borrowing.
+		 * 3. Root class is overlimit.
+		 *
+		 * Reset 2d and 3d conditions and retry.
+		 *
+		 * Note, that NS and cbq-2.0 are buggy, peeking
+		 * an arbitrary class is appropriate for ancestor-only
+		 * sharing, but not for toplevel algorithm.
+		 *
+		 * Our version is better, but slower, because it requires
+		 * two passes, but it is unavoidable with top-level sharing.
+		 */
 
 		if (q->toplevel == TC_CBQ_MAXLEVEL &&
 		    q->link.undertime == PSCHED_PASTPERFECT)
@@ -1003,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
 	}
 
 	/* No packets in scheduler or nobody wants to give them to us :-(
-	   Sigh... start watchdog timer in the last case. */
+	 * Sigh... start watchdog timer in the last case.
+	 */
 
 	if (sch->q.qlen) {
 		sch->qstats.overlimits++;
@@ -1025,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
 		int level = 0;
 		struct cbq_class *cl;
 
-		if ((cl = this->children) != NULL) {
+		cl = this->children;
+		if (cl) {
 			do {
 				if (cl->level > level)
 					level = cl->level;
 			} while ((cl = cl->sibling) != this->children);
 		}
-		this->level = level+1;
+		this->level = level + 1;
 	} while ((this = this->tparent) != NULL);
 }
 
@@ -1047,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
 			/* BUGGGG... Beware! This expression suffer of
-			   arithmetic overflows!
+			 * arithmetic overflows!
 			 */
 			if (cl->priority == prio) {
 				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
 					q->quanta[prio];
 			}
 			if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+				pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+					   cl->common.classid, cl->quantum);
 				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
 			}
 		}
@@ -1065,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 	struct cbq_class *split = cl->split;
-	unsigned h;
+	unsigned int h;
 	int i;
 
 	if (split == NULL)
 		return;
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
-		if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+	for (i = 0; i <= TC_PRIO_MAX; i++) {
+		if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
 			split->defaults[i] = NULL;
 	}
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
+	for (i = 0; i <= TC_PRIO_MAX; i++) {
 		int level = split->level;
 
 		if (split->defaults[i])
@@ -1089,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 			hlist_for_each_entry(c, n, &q->clhash.hash[h],
 					     common.hnode) {
 				if (c->split == split && c->level < level &&
-				    c->defmap&(1<<i)) {
+				    c->defmap & (1<<i)) {
 					split->defaults[i] = c;
 					level = c->level;
 				}
@@ -1103,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
 	struct cbq_class *split = NULL;
 
 	if (splitid == 0) {
-		if ((split = cl->split) == NULL)
+		split = cl->split;
+		if (!split)
 			return;
 		splitid = split->common.classid;
 	}
@@ -1121,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
 		cl->defmap = 0;
 		cbq_sync_defmap(cl);
 		cl->split = split;
-		cl->defmap = def&mask;
+		cl->defmap = def & mask;
 	} else
-		cl->defmap = (cl->defmap&~mask)|(def&mask);
+		cl->defmap = (cl->defmap & ~mask) | (def & mask);
 
 	cbq_sync_defmap(cl);
 }
@@ -1136,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
 	qdisc_class_hash_remove(&q->clhash, &this->common);
 
 	if (this->tparent) {
-		clp=&this->sibling;
+		clp = &this->sibling;
 		cl = *clp;
 		do {
 			if (cl == this) {
@@ -1175,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
 	}
 }
 
-static unsigned int cbq_drop(struct Qdisc* sch)
+static unsigned int cbq_drop(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl, *cl_head;
@@ -1183,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
 	unsigned int len;
 
 	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
-		if ((cl_head = q->active[prio]) == NULL)
+		cl_head = q->active[prio];
+		if (!cl_head)
 			continue;
 
 		cl = cl_head;
@@ -1200,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
 }
 
 static void
-cbq_reset(struct Qdisc* sch)
+cbq_reset(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
 	struct hlist_node *n;
 	int prio;
-	unsigned h;
+	unsigned int h;
 
 	q->activemask = 0;
 	q->pmask = 0;
@@ -1238,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
 
 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
 {
-	if (lss->change&TCF_CBQ_LSS_FLAGS) {
-		cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
-		cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+	if (lss->change & TCF_CBQ_LSS_FLAGS) {
+		cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+		cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
 	}
-	if (lss->change&TCF_CBQ_LSS_EWMA)
+	if (lss->change & TCF_CBQ_LSS_EWMA)
 		cl->ewma_log = lss->ewma_log;
-	if (lss->change&TCF_CBQ_LSS_AVPKT)
+	if (lss->change & TCF_CBQ_LSS_AVPKT)
 		cl->avpkt = lss->avpkt;
-	if (lss->change&TCF_CBQ_LSS_MINIDLE)
+	if (lss->change & TCF_CBQ_LSS_MINIDLE)
 		cl->minidle = -(long)lss->minidle;
-	if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
 		cl->maxidle = lss->maxidle;
 		cl->avgidle = lss->maxidle;
 	}
-	if (lss->change&TCF_CBQ_LSS_OFFTIME)
+	if (lss->change & TCF_CBQ_LSS_OFFTIME)
 		cl->offtime = lss->offtime;
 	return 0;
 }
@@ -1280,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
 	if (wrr->weight)
 		cl->weight = wrr->weight;
 	if (wrr->priority) {
-		cl->priority = wrr->priority-1;
+		cl->priority = wrr->priority - 1;
 		cl->cpriority = cl->priority;
 		if (cl->priority >= cl->priority2)
-			cl->priority2 = TC_CBQ_MAXPRIO-1;
+			cl->priority2 = TC_CBQ_MAXPRIO - 1;
 	}
 
 	cbq_addprio(q, cl);
@@ -1300,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
 		cl->overlimit = cbq_ovl_delay;
 		break;
 	case TC_CBQ_OVL_LOWPRIO:
-		if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
-		    ovl->priority2-1 <= cl->priority)
+		if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
+		    ovl->priority2 - 1 <= cl->priority)
 			return -EINVAL;
-		cl->priority2 = ovl->priority2-1;
+		cl->priority2 = ovl->priority2 - 1;
 		cl->overlimit = cbq_ovl_lowprio;
 		break;
 	case TC_CBQ_OVL_DROP:
@@ -1382,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!q->link.q)
 		q->link.q = &noop_qdisc;
 
-	q->link.priority = TC_CBQ_MAXPRIO-1;
-	q->link.priority2 = TC_CBQ_MAXPRIO-1;
-	q->link.cpriority = TC_CBQ_MAXPRIO-1;
+	q->link.priority = TC_CBQ_MAXPRIO - 1;
+	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
+	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
 	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
 	q->link.overlimit = cbq_ovl_classic;
 	q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1415,7 +1421,7 @@ put_rtab:
 	return err;
 }
 
-static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 
@@ -1427,7 +1433,7 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_lssopt opt;
@@ -1452,15 +1458,15 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_wrropt opt;
 
 	opt.flags = 0;
 	opt.allot = cl->allot;
-	opt.priority = cl->priority+1;
-	opt.cpriority = cl->cpriority+1;
+	opt.priority = cl->priority + 1;
+	opt.cpriority = cl->cpriority + 1;
 	opt.weight = cl->weight;
 	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
 	return skb->len;
@@ -1470,13 +1476,13 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_ovl opt;
 
 	opt.strategy = cl->ovl_strategy;
-	opt.priority2 = cl->priority2+1;
+	opt.priority2 = cl->priority2 + 1;
 	opt.pad = 0;
 	opt.penalty = cl->penalty;
 	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1487,7 +1493,7 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_fopt opt;
@@ -1506,7 +1512,7 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NET_CLS_ACT
-static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_police opt;
@@ -1570,7 +1576,7 @@ static int
 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	       struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 	struct nlattr *nest;
 
 	if (cl->tparent)
@@ -1598,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	struct gnet_dump *d)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	cl->qstats.qlen = cl->q->q.qlen;
 	cl->xstats.avgidle = cl->avgidle;
@@ -1618,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		     struct Qdisc **old)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	if (new == NULL) {
 		new = qdisc_create_dflt(sch->dev_queue,
@@ -1641,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	return 0;
 }
 
-static struct Qdisc *
-cbq_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	return cl->q;
 }
@@ -1683,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 		kfree(cl);
 }
 
-static void
-cbq_destroy(struct Qdisc* sch)
+static void cbq_destroy(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct hlist_node *n, *next;
 	struct cbq_class *cl;
-	unsigned h;
+	unsigned int h;
 
 #ifdef CONFIG_NET_CLS_ACT
 	q->rx_class = NULL;
@@ -1713,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
 
 static void cbq_put(struct Qdisc *sch, unsigned long arg)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	if (--cl->refcnt == 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1736,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 {
 	int err;
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class*)*arg;
+	struct cbq_class *cl = (struct cbq_class *)*arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_CBQ_MAX + 1];
 	struct cbq_class *parent;
@@ -1828,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (classid) {
 		err = -EINVAL;
-		if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+		if (TC_H_MAJ(classid ^ sch->handle) ||
+		    cbq_class_lookup(q, classid))
 			goto failure;
 	} else {
 		int i;
-		classid = TC_H_MAKE(sch->handle,0x8000);
+		classid = TC_H_MAKE(sch->handle, 0x8000);
 
-		for (i=0; i<0x8000; i++) {
+		for (i = 0; i < 0x8000; i++) {
 			if (++q->hgenerator >= 0x8000)
 				q->hgenerator = 1;
 			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1891,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl->minidle = -0x7FFFFFFF;
 	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
 	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-	if (cl->ewma_log==0)
+	if (cl->ewma_log == 0)
 		cl->ewma_log = q->link.ewma_log;
-	if (cl->maxidle==0)
+	if (cl->maxidle == 0)
 		cl->maxidle = q->link.maxidle;
-	if (cl->avpkt==0)
+	if (cl->avpkt == 0)
 		cl->avpkt = q->link.avpkt;
 	cl->overlimit = cbq_ovl_classic;
 	if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1921,7 +1926,7 @@ failure:
 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 	unsigned int qlen;
 
 	if (cl->filters || cl->children || cl == &q->link)
@@ -1979,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
 				     u32 classid)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *p = (struct cbq_class*)parent;
+	struct cbq_class *p = (struct cbq_class *)parent;
 	struct cbq_class *cl = cbq_class_lookup(q, classid);
 
 	if (cl) {
@@ -1993,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
 
 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	cl->filters--;
 }
@@ -2003,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
 	struct hlist_node *n;
-	unsigned h;
+	unsigned int h;
 
 	if (arg->stop)
 		return;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index de55e642eaf..6b7fe4a84f1 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -376,7 +376,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	bstats_update(&cl->bstats, skb);
-	qdisc_bstats_update(sch, skb);
 
 	sch->q.qlen++;
 	return err;
@@ -403,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
 			skb = qdisc_dequeue_peeked(cl->qdisc);
 			if (cl->qdisc->q.qlen == 0)
 				list_del(&cl->alist);
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			return skb;
 		}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 60f4bdd4408..2c790204d04 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 		mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
 
 	if (tb[TCA_DSMARK_VALUE])
-		p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+		p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
 
 	if (tb[TCA_DSMARK_MASK])
-		p->mask[*arg-1] = mask;
+		p->mask[*arg - 1] = mask;
 
 	err = 0;
 
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
 	if (!dsmark_valid_index(p, arg))
 		return -EINVAL;
 
-	p->mask[arg-1] = 0xff;
-	p->value[arg-1] = 0;
+	p->mask[arg - 1] = 0xff;
+	p->value[arg - 1] = 0;
 
 	return 0;
 }
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 		if (p->mask[i] == 0xff && !p->value[i])
 			goto ignore;
 		if (walker->count >= walker->skip) {
-			if (walker->fn(sch, i+1, walker) < 0) {
+			if (walker->fn(sch, i + 1, walker) < 0) {
 				walker->stop = 1;
 				break;
 			}
@@ -260,7 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	qdisc_bstats_update(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -283,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	if (skb == NULL)
 		return NULL;
 
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 
 	index = skb->tc_index & (p->indices - 1);
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		 * and don't need yet another qdisc as a bypass.
 		 */
 		if (p->mask[index] != 0xff || p->value[index])
-			printk(KERN_WARNING
-			       "dsmark_dequeue: unsupported protocol %d\n",
-			       ntohs(skb->protocol));
+			pr_warning("dsmark_dequeue: unsupported protocol %d\n",
+				   ntohs(skb->protocol));
 		break;
 	}
 
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	if (!dsmark_valid_index(p, cl))
 		return -EINVAL;
 
-	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
 	tcm->tcm_info = p->q->handle;
 
 	opts = nla_nest_start(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
-	NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
-	NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+	NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
+	NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
 
 	return nla_nest_end(skb, opts);
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index aa4d6337e43..be33f9ddf9d 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,12 +19,11 @@
 
 /* 1 band FIFO pseudo-"scheduler" */
 
-struct fifo_sched_data
-{
+struct fifo_sched_data {
 	u32 limit;
 };
 
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
@@ -34,7 +33,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return qdisc_reshape_fail(skb, sch);
 }
 
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
@@ -44,19 +43,16 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return qdisc_reshape_fail(skb, sch);
 }
 
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct sk_buff *skb_head;
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
 	if (likely(skb_queue_len(&sch->q) < q->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	/* queue full, remove one skb to fulfill the limit */
-	skb_head = qdisc_dequeue_head(sch);
+	__qdisc_queue_drop_head(sch, &sch->q);
 	sch->qstats.drops++;
-	kfree_skb(skb_head);
-
 	qdisc_enqueue_tail(skb, sch);
 
 	return NET_XMIT_CN;
@@ -65,11 +61,13 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
+	bool bypass;
+	bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
 
 	if (opt == NULL) {
 		u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
 
-		if (sch->ops == &bfifo_qdisc_ops)
+		if (is_bfifo)
 			limit *= psched_mtu(qdisc_dev(sch));
 
 		q->limit = limit;
@@ -82,6 +80,15 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 		q->limit = ctl->limit;
 	}
 
+	if (is_bfifo)
+		bypass = q->limit >= psched_mtu(qdisc_dev(sch));
+	else
+		bypass = q->limit >= 1;
+
+	if (bypass)
+		sch->flags |= TCQ_F_CAN_BYPASS;
+	else
+		sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598440a..0da09d50873 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 */
 		kfree_skb(skb);
 		if (net_ratelimit())
-			printk(KERN_WARNING "Dead loop on netdevice %s, "
-			       "fix it urgently!\n", dev_queue->dev->name);
+			pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
+				   dev_queue->dev->name);
 		ret = qdisc_qlen(q);
 	} else {
 		/*
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	} else {
 		/* Driver returned NETDEV_TX_BUSY - requeue skb */
 		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
-			       dev->name, ret, q->q.qlen);
+			pr_warning("BUG %s code %d qlen %d\n",
+				   dev->name, ret, q->q.qlen);
 
 		ret = dev_requeue_skb(skb, q);
 	}
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = {
 };
 
 
-static const u8 prio2band[TC_PRIO_MAX+1] =
-	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
 
 /* 3-band FIFO queue: old style, but should be a bit faster than
    generic prio+fifo combination.
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
 	return priv->q + band;
 }
 
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
 {
 	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
 		int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
 	return qdisc_drop(skb, qdisc);
 }
 
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static void pfifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc *qdisc)
 {
 	int prio;
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
 {
 	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
 
-	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	return skb->len;
 
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
 		skb_queue_head_init(band2list(priv, prio));
 
+	/* Can by-pass the queue discipline */
+	qdisc->flags |= TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -540,6 +543,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.dump		=	pfifo_fast_dump,
 	.owner		=	THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_fast_ops);
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  struct Qdisc_ops *ops)
@@ -630,7 +634,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 #ifdef CONFIG_NET_SCHED
 	qdisc_list_del(qdisc);
 
-	qdisc_put_stab(qdisc->stab);
+	qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
 	if (ops->reset)
@@ -674,25 +678,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 
 	return oqdisc;
 }
+EXPORT_SYMBOL(dev_graft_qdisc);
 
 static void attach_one_default_qdisc(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
 				     void *_unused)
 {
-	struct Qdisc *qdisc;
+	struct Qdisc *qdisc = &noqueue_qdisc;
 
 	if (dev->tx_queue_len) {
 		qdisc = qdisc_create_dflt(dev_queue,
 					  &pfifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {
-			printk(KERN_INFO "%s: activation failed\n", dev->name);
+			netdev_info(dev, "activation failed\n");
 			return;
 		}
-
-		/* Can by-pass the queue discipline for default qdisc */
-		qdisc->flags |= TCQ_F_CAN_BYPASS;
-	} else {
-		qdisc =  &noqueue_qdisc;
 	}
 	dev_queue->qdisc_sleeping = qdisc;
 }
@@ -761,6 +761,7 @@ void dev_activate(struct net_device *dev)
 		dev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(dev_activate);
 
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
@@ -840,6 +841,7 @@ void dev_deactivate(struct net_device *dev)
 	list_add(&dev->unreg_list, &single);
 	dev_deactivate_many(&single);
 }
+EXPORT_SYMBOL(dev_deactivate);
 
 static void dev_init_scheduler_queue(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2aa5c9..b9493a09a87 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
 struct gred_sched_data;
 struct gred_sched;
 
-struct gred_sched_data
-{
+struct gred_sched_data {
 	u32		limit;		/* HARD maximal queue length	*/
 	u32      	DP;		/* the drop pramaters */
 	u32		bytesin;	/* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
 	GRED_RIO_MODE,
 };
 
-struct gred_sched
-{
+struct gred_sched {
 	struct gred_sched_data *tab[MAX_DPs];
 	unsigned long	flags;
 	u32		red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
 	return t->red_flags & TC_RED_HARDDROP;
 }
 
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct gred_sched_data *q=NULL;
-	struct gred_sched *t= qdisc_priv(sch);
+	struct gred_sched_data *q = NULL;
+	struct gred_sched *t = qdisc_priv(sch);
 	unsigned long qavg = 0;
 	u16 dp = tc_index_to_dp(skb);
 
-	if (dp >= t->DPs  || (q = t->tab[dp]) == NULL) {
+	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
 		dp = t->def;
 
-		if ((q = t->tab[dp]) == NULL) {
+		q = t->tab[dp];
+		if (!q) {
 			/* Pass through packets not assigned to a DP
 			 * if no default DP has been configured. This
 			 * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		for (i = 0; i < t->DPs; i++) {
 			if (t->tab[i] && t->tab[i]->prio < q->prio &&
 			    !red_is_idling(&t->tab[i]->parms))
-				qavg +=t->tab[i]->parms.qavg;
+				qavg += t->tab[i]->parms.qavg;
 		}
 
 	}
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		gred_store_wred_set(t, q);
 
 	switch (red_action(&q->parms, q->parms.qavg + qavg)) {
-		case RED_DONT_MARK:
-			break;
-
-		case RED_PROB_MARK:
-			sch->qstats.overlimits++;
-			if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
-				q->stats.prob_drop++;
-				goto congestion_drop;
-			}
-
-			q->stats.prob_mark++;
-			break;
-
-		case RED_HARD_MARK:
-			sch->qstats.overlimits++;
-			if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
-			    !INET_ECN_set_ce(skb)) {
-				q->stats.forced_drop++;
-				goto congestion_drop;
-			}
-			q->stats.forced_mark++;
-			break;
+	case RED_DONT_MARK:
+		break;
+
+	case RED_PROB_MARK:
+		sch->qstats.overlimits++;
+		if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+			q->stats.prob_drop++;
+			goto congestion_drop;
+		}
+
+		q->stats.prob_mark++;
+		break;
+
+	case RED_HARD_MARK:
+		sch->qstats.overlimits++;
+		if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+		    !INET_ECN_set_ce(skb)) {
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
+		q->stats.forced_mark++;
+		break;
 	}
 
 	if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static struct sk_buff *gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 
 		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
 			if (net_ratelimit())
-				printk(KERN_WARNING "GRED: Unable to relocate "
-				       "VQ 0x%x after dequeue, screwing up "
-				       "backlog.\n", tc_index_to_dp(skb));
+				pr_warning("GRED: Unable to relocate VQ 0x%x "
+					   "after dequeue, screwing up "
+					   "backlog.\n", tc_index_to_dp(skb));
 		} else {
 			q->backlog -= qdisc_pkt_len(skb);
 
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 	return NULL;
 }
 
-static unsigned int gred_drop(struct Qdisc* sch)
+static unsigned int gred_drop(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
 			if (net_ratelimit())
-				printk(KERN_WARNING "GRED: Unable to relocate "
-				       "VQ 0x%x while dropping, screwing up "
-				       "backlog.\n", tc_index_to_dp(skb));
+				pr_warning("GRED: Unable to relocate VQ 0x%x "
+					   "while dropping, screwing up "
+					   "backlog.\n", tc_index_to_dp(skb));
 		} else {
 			q->backlog -= len;
 			q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 }
 
-static void gred_reset(struct Qdisc* sch)
+static void gred_reset(struct Qdisc *sch)
 {
 	int i;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 
 	for (i = table->DPs; i < MAX_DPs; i++) {
 		if (table->tab[i]) {
-			printk(KERN_WARNING "GRED: Warning: Destroying "
-			       "shadowed VQ 0x%x\n", i);
+			pr_warning("GRED: Warning: Destroying "
+				   "shadowed VQ 0x%x\n", i);
 			gred_destroy_vq(table->tab[i]);
 			table->tab[i] = NULL;
 		}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 2e45791d4f6..6488e642565 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
  *   that are expensive on 32-bit architectures.
  */
 
-struct internal_sc
-{
+struct internal_sc {
 	u64	sm1;	/* scaled slope of the 1st segment */
 	u64	ism1;	/* scaled inverse-slope of the 1st segment */
 	u64	dx;	/* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
 };
 
 /* runtime service curve */
-struct runtime_sc
-{
+struct runtime_sc {
 	u64	x;	/* current starting position on x-axis */
 	u64	y;	/* current starting position on y-axis */
 	u64	sm1;	/* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
 	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
 };
 
-enum hfsc_class_flags
-{
+enum hfsc_class_flags {
 	HFSC_RSC = 0x1,
 	HFSC_FSC = 0x2,
 	HFSC_USC = 0x4
 };
 
-struct hfsc_class
-{
+struct hfsc_class {
 	struct Qdisc_class_common cl_common;
 	unsigned int	refcnt;		/* usage count */
 
@@ -140,8 +136,8 @@ struct hfsc_class
 	u64	cl_cumul;		/* cumulative work in bytes done by
 					   real-time criteria */
 
-	u64 	cl_d;			/* deadline*/
-	u64 	cl_e;			/* eligible time */
+	u64	cl_d;			/* deadline*/
+	u64	cl_e;			/* eligible time */
 	u64	cl_vt;			/* virtual time */
 	u64	cl_f;			/* time when this class will fit for
 					   link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
 	unsigned long	cl_nactive;	/* number of active children */
 };
 
-struct hfsc_sched
-{
+struct hfsc_sched {
 	u16	defcls;				/* default class id */
 	struct hfsc_class root;			/* root class */
 	struct Qdisc_class_hash clhash;		/* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 		if (go_active) {
 			n = rb_last(&cl->cl_parent->vt_tree);
 			if (n != NULL) {
-				max_cl = rb_entry(n, struct hfsc_class,vt_node);
+				max_cl = rb_entry(n, struct hfsc_class, vt_node);
 				/*
 				 * set vt to the average of the min and max
 				 * classes.  if the parent's period didn't
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			return NULL;
 		}
 #endif
-		if ((cl = (struct hfsc_class *)res.class) == NULL) {
-			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+		cl = (struct hfsc_class *)res.class;
+		if (!cl) {
+			cl = hfsc_find_class(res.classid, sch);
+			if (!cl)
 				break; /* filter selected invalid classid */
 			if (cl->level >= head->level)
 				break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
 	return -1;
 }
 
-static inline int
+static int
 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
 {
 	if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
 	struct hfsc_class *cl;
 	u64 next_time = 0;
 
-	if ((cl = eltree_get_minel(q)) != NULL)
+	cl = eltree_get_minel(q);
+	if (cl)
 		next_time = cl->cl_e;
 	if (q->root.cl_cfmin != 0) {
 		if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1600,7 +1598,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		set_active(cl, qdisc_pkt_len(skb));
 
 	bstats_update(&cl->bstats, skb);
-	qdisc_bstats_update(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -1626,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
 	 * find the class with the minimum deadline among
 	 * the eligible classes.
 	 */
-	if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+	cl = eltree_get_mindl(q, cur_time);
+	if (cl) {
 		realtime = 1;
 	} else {
 		/*
@@ -1665,7 +1663,8 @@ hfsc_dequeue(struct Qdisc *sch)
 		set_passive(cl);
 	}
 
-	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(sch);
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 
 	return skb;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 984c1b0c683..e1429a85091 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
 			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
 			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
 			/* When class changes from state 1->2 and disconnects from
-			   parent's feed then we lost ptr value and start from the
-			   first child again. Here we store classid of the
-			   last valid ptr (used when ptr is NULL). */
+			 * parent's feed then we lost ptr value and start from the
+			 * first child again. Here we store classid of the
+			 * last valid ptr (used when ptr is NULL).
+			 */
 			u32 last_ptr_id[TC_HTB_NUMPRIO];
 		} inner;
 	} un;
@@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
  * then finish and return direct queue.
  */
-#define HTB_DIRECT (struct htb_class*)-1
+#define HTB_DIRECT ((struct htb_class *)-1L)
 
 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 				      int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 	int result;
 
 	/* allow to select class by setting skb->priority to valid classid;
-	   note that nfmark can be used too by attaching filter fw with no
-	   rules in it */
+	 * note that nfmark can be used too by attaching filter fw with no
+	 * rules in it
+	 */
 	if (skb->priority == sch->handle)
 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
-	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+	cl = htb_find(skb->priority, sch);
+	if (cl && cl->level == 0)
 		return cl;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 			return NULL;
 		}
 #endif
-		if ((cl = (void *)res.class) == NULL) {
+		cl = (void *)res.class;
+		if (!cl) {
 			if (res.classid == sch->handle)
 				return HTB_DIRECT;	/* X:0 (direct flow) */
-			if ((cl = htb_find(res.classid, sch)) == NULL)
+			cl = htb_find(res.classid, sch);
+			if (!cl)
 				break;	/* filter selected invalid classid */
 		}
 		if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 
 			if (p->un.inner.feed[prio].rb_node)
 				/* parent already has its feed in use so that
-				   reset bit in mask as parent is already ok */
+				 * reset bit in mask as parent is already ok
+				 */
 				mask &= ~(1 << prio);
 
 			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 
 			if (p->un.inner.ptr[prio] == cl->node + prio) {
 				/* we are removing child which is pointed to from
-				   parent feed - forget the pointer but remember
-				   classid */
+				 * parent feed - forget the pointer but remember
+				 * classid
+				 */
 				p->un.inner.last_ptr_id[prio] = cl->common.classid;
 				p->un.inner.ptr[prio] = NULL;
 			}
@@ -574,7 +581,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	sch->q.qlen++;
-	qdisc_bstats_update(sch, skb);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -664,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 				   unsigned long start)
 {
 	/* don't run for longer than 2 jiffies; 2 is used instead of
-	   1 to simplify things when jiffy is going to be incremented
-	   too soon */
+	 * 1 to simplify things when jiffy is going to be incremented
+	 * too soon
+	 */
 	unsigned long stop_at = start + 2;
 	while (time_before(jiffies, stop_at)) {
 		struct htb_class *cl;
@@ -688,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 
 	/* too much load - let's continue after a break for scheduling */
 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
-		printk(KERN_WARNING "htb: too many events!\n");
+		pr_warning("htb: too many events!\n");
 		q->warned |= HTB_WARN_TOOMANYEVENTS;
 	}
 
@@ -696,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
-   is no such one exists. */
+ * is no such one exists.
+ */
 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 					      u32 id)
 {
@@ -740,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 	for (i = 0; i < 65535; i++) {
 		if (!*sp->pptr && *sp->pid) {
 			/* ptr was invalidated but id is valid - try to recover
-			   the original or next ptr */
+			 * the original or next ptr
+			 */
 			*sp->pptr =
 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 		}
 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
-				   can become out of date quickly */
+				 * can become out of date quickly
+				 */
 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 			*sp->pptr = sp->root;
 			while ((*sp->pptr)->rb_left)
@@ -773,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 }
 
 /* dequeues packet at given priority and level; call only if
-   you are sure that there is active class at prio/level */
+ * you are sure that there is active class at prio/level
+ */
 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 					int level)
 {
@@ -790,9 +801,10 @@ next:
 			return NULL;
 
 		/* class can be empty - it is unlikely but can be true if leaf
-		   qdisc drops packets in enqueue routine or if someone used
-		   graft operation on the leaf since last dequeue;
-		   simply deactivate and skip such class */
+		 * qdisc drops packets in enqueue routine or if someone used
+		 * graft operation on the leaf since last dequeue;
+		 * simply deactivate and skip such class
+		 */
 		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
 			htb_deactivate(q, cl);
@@ -832,7 +844,8 @@ next:
 					  ptr[0]) + prio);
 		}
 		/* this used to be after charge_class but this constelation
-		   gives us slightly better performance */
+		 * gives us slightly better performance
+		 */
 		if (!cl->un.leaf.q->q.qlen)
 			htb_deactivate(q, cl);
 		htb_charge_class(q, cl, level, skb);
@@ -842,7 +855,7 @@ next:
 
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 {
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
 	psched_time_t next_event;
@@ -851,7 +864,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 	skb = __skb_dequeue(&q->direct_queue);
 	if (skb != NULL) {
-		sch->flags &= ~TCQ_F_THROTTLED;
+ok:
+		qdisc_bstats_update(sch, skb);
+		qdisc_unthrottled(sch);
 		sch->q.qlen--;
 		return skb;
 	}
@@ -882,13 +897,11 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
 			int prio = ffz(m);
+
 			m |= 1 << prio;
 			skb = htb_dequeue_tree(q, prio, level);
-			if (likely(skb != NULL)) {
-				sch->q.qlen--;
-				sch->flags &= ~TCQ_F_THROTTLED;
-				goto fin;
-			}
+			if (likely(skb != NULL))
+				goto ok;
 		}
 	}
 	sch->qstats.overlimits++;
@@ -989,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 
 	if (tb[TCA_HTB_INIT] == NULL) {
-		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+		pr_err("HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
 	gopt = nla_data(tb[TCA_HTB_INIT]);
 	if (gopt->version != HTB_VER >> 16) {
-		printk(KERN_ERR
-		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+		pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
 		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
 		return -EINVAL;
 	}
@@ -1208,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
 	cancel_work_sync(&q->work);
 	qdisc_watchdog_cancel(&q->watchdog);
 	/* This line used to be after htb_destroy_class call below
-	   and surprisingly it worked in 2.4. But it must precede it
-	   because filter need its target class alive to be able to call
-	   unbind_filter on it (without Oops). */
+	 * and surprisingly it worked in 2.4. But it must precede it
+	 * because filter need its target class alive to be able to call
+	 * unbind_filter on it (without Oops).
+	 */
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1344,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 		/* check maximal depth */
 		if (parent && parent->parent && parent->parent->level < 2) {
-			printk(KERN_ERR "htb: tree is too deep\n");
+			pr_err("htb: tree is too deep\n");
 			goto failure;
 		}
 		err = -ENOBUFS;
-		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+		if (!cl)
 			goto failure;
 
 		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1368,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			RB_CLEAR_NODE(&cl->node[prio]);
 
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
-		   so that can't be used inside of sch_tree_lock
-		   -- thanks to Karlis Peisenieks */
+		 * so that can't be used inside of sch_tree_lock
+		 * -- thanks to Karlis Peisenieks
+		 */
 		new_q = qdisc_create_dflt(sch->dev_queue,
 					  &pfifo_qdisc_ops, classid);
 		sch_tree_lock(sch);
@@ -1421,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	}
 
 	/* it used to be a nasty bug here, we have to check that node
-	   is really leaf before changing cl->un.leaf ! */
+	 * is really leaf before changing cl->un.leaf !
+	 */
 	if (!cl->level) {
 		cl->quantum = rtab->rate.rate / q->rate2quantum;
 		if (!hopt->quantum && cl->quantum < 1000) {
-			printk(KERN_WARNING
+			pr_warning(
 			       "HTB: quantum of class %X is small. Consider r2q change.\n",
 			       cl->common.classid);
 			cl->quantum = 1000;
 		}
 		if (!hopt->quantum && cl->quantum > 200000) {
-			printk(KERN_WARNING
+			pr_warning(
 			       "HTB: quantum of class %X is big. Consider r2q change.\n",
 			       cl->common.classid);
 			cl->quantum = 200000;
@@ -1480,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
 	struct htb_class *cl = htb_find(classid, sch);
 
 	/*if (cl && !cl->level) return 0;
-	   The line above used to be there to prevent attaching filters to
-	   leaves. But at least tc_index filter uses this just to get class
-	   for other reasons so that we have to allow for it.
-	   ----
-	   19.6.2002 As Werner explained it is ok - bind filter is just
-	   another way to "lock" the class - unlike "get" this lock can
-	   be broken by class during destroy IIUC.
+	 * The line above used to be there to prevent attaching filters to
+	 * leaves. But at least tc_index filter uses this just to get class
+	 * for other reasons so that we have to allow for it.
+	 * ----
+	 * 19.6.2002 As Werner explained it is ok - bind filter is just
+	 * another way to "lock" the class - unlike "get" this lock can
+	 * be broken by class during destroy IIUC.
 	 */
 	if (cl)
 		cl->filter_cnt++;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ecc302f4d2a..ec5cbc84896 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 						    TC_H_MIN(ntx + 1)));
 		if (qdisc == NULL)
 			goto err;
-		qdisc->flags |= TCQ_F_CAN_BYPASS;
 		priv->qdiscs[ntx] = qdisc;
 	}
 
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 00000000000..effd4ee0e88
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,416 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+	struct Qdisc		**qdiscs;
+	int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned int ntx;
+
+	if (!priv->qdiscs)
+		return;
+
+	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+		qdisc_destroy(priv->qdiscs[ntx]);
+
+	if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+		dev->netdev_ops->ndo_setup_tc(dev, 0);
+	else
+		netdev_set_num_tc(dev, 0);
+
+	kfree(priv->qdiscs);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+	int i, j;
+
+	/* Verify num_tc is not out of max range */
+	if (qopt->num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	/* Verify priority mapping uses valid tcs */
+	for (i = 0; i < TC_BITMASK + 1; i++) {
+		if (qopt->prio_tc_map[i] >= qopt->num_tc)
+			return -EINVAL;
+	}
+
+	/* net_device does not support requested operation */
+	if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+		return -EINVAL;
+
+	/* if hw owned qcount and qoffset are taken from LLD so
+	 * no reason to verify them here
+	 */
+	if (qopt->hw)
+		return 0;
+
+	for (i = 0; i < qopt->num_tc; i++) {
+		unsigned int last = qopt->offset[i] + qopt->count[i];
+
+		/* Verify the queue count is in tx range being equal to the
+		 * real_num_tx_queues indicates the last queue is in use.
+		 */
+		if (qopt->offset[i] >= dev->real_num_tx_queues ||
+		    !qopt->count[i] ||
+		    last > dev->real_num_tx_queues)
+			return -EINVAL;
+
+		/* Verify that the offset and counts do not overlap */
+		for (j = i + 1; j < qopt->num_tc; j++) {
+			if (last > qopt->offset[j])
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct netdev_queue *dev_queue;
+	struct Qdisc *qdisc;
+	int i, err = -EOPNOTSUPP;
+	struct tc_mqprio_qopt *qopt = NULL;
+
+	BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+	BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+	if (mqprio_parse_opt(dev, qopt))
+		return -EINVAL;
+
+	/* pre-allocate qdisc, attachment can't fail */
+	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+			       GFP_KERNEL);
+	if (priv->qdiscs == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		dev_queue = netdev_get_tx_queue(dev, i);
+		qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(i + 1)));
+		if (qdisc == NULL) {
+			err = -ENOMEM;
+			goto err;
+		}
+		priv->qdiscs[i] = qdisc;
+	}
+
+	/* If the mqprio options indicate that hardware should own
+	 * the queue mapping then run ndo_setup_tc otherwise use the
+	 * supplied and verified mapping
+	 */
+	if (qopt->hw) {
+		priv->hw_owned = 1;
+		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+		if (err)
+			goto err;
+	} else {
+		netdev_set_num_tc(dev, qopt->num_tc);
+		for (i = 0; i < qopt->num_tc; i++)
+			netdev_set_tc_queue(dev, i,
+					    qopt->count[i], qopt->offset[i]);
+	}
+
+	/* Always use supplied priority mappings */
+	for (i = 0; i < TC_BITMASK + 1; i++)
+		netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
+
+	sch->flags |= TCQ_F_MQROOT;
+	return 0;
+
+err:
+	mqprio_destroy(sch);
+	return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	/* Attach underlying qdisc */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = priv->qdiscs[ntx];
+		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		if (qdisc)
+			qdisc_destroy(qdisc);
+	}
+	kfree(priv->qdiscs);
+	priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+					     unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+		    struct Qdisc **old)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return -EINVAL;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = dev_graft_qdisc(dev_queue, new);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_mqprio_qopt opt = { 0 };
+	struct Qdisc *qdisc;
+	unsigned int i;
+
+	sch->q.qlen = 0;
+	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		spin_lock_bh(qdisc_lock(qdisc));
+		sch->q.qlen		+= qdisc->q.qlen;
+		sch->bstats.bytes	+= qdisc->bstats.bytes;
+		sch->bstats.packets	+= qdisc->bstats.packets;
+		sch->qstats.qlen	+= qdisc->qstats.qlen;
+		sch->qstats.backlog	+= qdisc->qstats.backlog;
+		sch->qstats.drops	+= qdisc->qstats.drops;
+		sch->qstats.requeues	+= qdisc->qstats.requeues;
+		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+		spin_unlock_bh(qdisc_lock(qdisc));
+	}
+
+	opt.num_tc = netdev_get_num_tc(dev);
+	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+	opt.hw = priv->hw_owned;
+
+	for (i = 0; i < netdev_get_num_tc(dev); i++) {
+		opt.count[i] = dev->tc_to_txq[i].count;
+		opt.offset[i] = dev->tc_to_txq[i].offset;
+	}
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return NULL;
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
+		return 0;
+	return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			 struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (cl <= netdev_get_num_tc(dev)) {
+		tcm->tcm_parent = TC_H_ROOT;
+		tcm->tcm_info = 0;
+	} else {
+		int i;
+		struct netdev_queue *dev_queue;
+
+		dev_queue = mqprio_queue_get(sch, cl);
+		tcm->tcm_parent = 0;
+		for (i = 0; i < netdev_get_num_tc(dev); i++) {
+			struct netdev_tc_txq tc = dev->tc_to_txq[i];
+			int q_idx = cl - netdev_get_num_tc(dev);
+
+			if (q_idx > tc.offset &&
+			    q_idx <= tc.offset + tc.count) {
+				tcm->tcm_parent =
+					TC_H_MAKE(TC_H_MAJ(sch->handle),
+						  TC_H_MIN(i + 1));
+				break;
+			}
+		}
+		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	}
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+			       struct gnet_dump *d)
+{
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (cl <= netdev_get_num_tc(dev)) {
+		int i;
+		struct Qdisc *qdisc;
+		struct gnet_stats_queue qstats = {0};
+		struct gnet_stats_basic_packed bstats = {0};
+		struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+
+		/* Drop lock here it will be reclaimed before touching
+		 * statistics this is required because the d->lock we
+		 * hold here is the look on dev_queue->qdisc_sleeping
+		 * also acquired below.
+		 */
+		spin_unlock_bh(d->lock);
+
+		for (i = tc.offset; i < tc.offset + tc.count; i++) {
+			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			spin_lock_bh(qdisc_lock(qdisc));
+			bstats.bytes      += qdisc->bstats.bytes;
+			bstats.packets    += qdisc->bstats.packets;
+			qstats.qlen       += qdisc->qstats.qlen;
+			qstats.backlog    += qdisc->qstats.backlog;
+			qstats.drops      += qdisc->qstats.drops;
+			qstats.requeues   += qdisc->qstats.requeues;
+			qstats.overlimits += qdisc->qstats.overlimits;
+			spin_unlock_bh(qdisc_lock(qdisc));
+		}
+		/* Reclaim root sleeping lock before completing stats */
+		spin_lock_bh(d->lock);
+		if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+		    gnet_stats_copy_queue(d, &qstats) < 0)
+			return -1;
+	} else {
+		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+		sch = dev_queue->qdisc_sleeping;
+		sch->qstats.qlen = sch->q.qlen;
+		if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+		    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx;
+
+	if (arg->stop)
+		return;
+
+	/* Walk hierarchy with a virtual class per tc */
+	arg->count = arg->skip;
+	for (ntx = arg->skip;
+	     ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
+	     ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+	.graft		= mqprio_graft,
+	.leaf		= mqprio_leaf,
+	.get		= mqprio_get,
+	.put		= mqprio_put,
+	.walk		= mqprio_walk,
+	.dump		= mqprio_dump_class,
+	.dump_stats	= mqprio_dump_class_stats,
+};
+
+struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+	.cl_ops		= &mqprio_class_ops,
+	.id		= "mqprio",
+	.priv_size	= sizeof(struct mqprio_sched),
+	.init		= mqprio_init,
+	.destroy	= mqprio_destroy,
+	.attach		= mqprio_attach,
+	.dump		= mqprio_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+	return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+	unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 21f13da2476..edc1950e0e7 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -83,7 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
@@ -112,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
+				qdisc_bstats_update(sch, skb);
 				sch->q.qlen--;
 				return skb;
 			}
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
 	unsigned int len;
 	struct Qdisc *qdisc;
 
-	for (band = q->bands-1; band >= 0; band--) {
+	for (band = q->bands - 1; band >= 0; band--) {
 		qdisc = q->queues[band];
 		if (qdisc->ops->drop) {
 			len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->max_bands; i++)
 		q->queues[i] = &noop_qdisc;
 
-	err = multiq_tune(sch,opt);
+	err = multiq_tune(sch, opt);
 
 	if (err)
 		kfree(q->queues);
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
 	struct multiq_sched_data *q = qdisc_priv(sch);
 
 	tcm->tcm_handle |= TC_H_MIN(cl);
-	tcm->tcm_info = q->queues[cl-1]->handle;
+	tcm->tcm_info = q->queues[cl - 1]->handle;
 	return 0;
 }
 
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 			arg->count++;
 			continue;
 		}
-		if (arg->fn(sch, band+1, arg) < 0) {
+		if (arg->fn(sch, band + 1, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1c4bce86347..64f0d3293b4 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -211,8 +211,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	cb = netem_skb_cb(skb);
-	if (q->gap == 0 || 		/* not doing reordering */
-	    q->counter < q->gap || 	/* inside last reordering gap */
+	if (q->gap == 0 ||		/* not doing reordering */
+	    q->counter < q->gap ||	/* inside last reordering gap */
 	    q->reorder < get_crandom(&q->reorder_cor)) {
 		psched_time_t now;
 		psched_tdiff_t delay;
@@ -240,7 +240,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
-		qdisc_bstats_update(sch, skb);
 	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 	}
@@ -249,7 +248,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-static unsigned int netem_drop(struct Qdisc* sch)
+static unsigned int netem_drop(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	unsigned int len = 0;
@@ -266,7 +265,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
-	if (sch->flags & TCQ_F_THROTTLED)
+	if (qdisc_is_throttled(sch))
 		return NULL;
 
 	skb = q->qdisc->ops->peek(q->qdisc);
@@ -289,6 +288,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 				skb->tstamp.tv64 = 0;
 #endif
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			return skb;
 		}
@@ -476,7 +476,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		__skb_queue_after(list, skb, nskb);
 
 		sch->qstats.backlog += qdisc_pkt_len(nskb);
-		qdisc_bstats_update(sch, nskb);
 
 		return NET_XMIT_SUCCESS;
 	}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 966158d49dd..2a318f2dc3e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
 #include <net/pkt_sched.h>
 
 
-struct prio_sched_data
-{
+struct prio_sched_data {
 	int bands;
 	struct tcf_proto *filter_list;
 	u8  prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if (!q->filter_list || err < 0) {
 			if (TC_H_MAJ(band))
 				band = 0;
-			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+			return q->queues[q->prio2band[band & TC_PRIO_MAX]];
 		}
 		band = res.classid;
 	}
@@ -84,7 +83,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
@@ -107,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
 	return NULL;
 }
 
-static struct sk_buff *prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc *sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int prio;
@@ -116,6 +114,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 		struct Qdisc *qdisc = q->queues[prio];
 		struct sk_buff *skb = qdisc->dequeue(qdisc);
 		if (skb) {
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			return skb;
 		}
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 
 }
 
-static unsigned int prio_drop(struct Qdisc* sch)
+static unsigned int prio_drop(struct Qdisc *sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int prio;
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
 
 
 static void
-prio_reset(struct Qdisc* sch)
+prio_reset(struct Qdisc *sch)
 {
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
-	for (prio=0; prio<q->bands; prio++)
+	for (prio = 0; prio < q->bands; prio++)
 		qdisc_reset(q->queues[prio]);
 	sch->q.qlen = 0;
 }
 
 static void
-prio_destroy(struct Qdisc* sch)
+prio_destroy(struct Qdisc *sch)
 {
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	tcf_destroy_chain(&q->filter_list);
-	for (prio=0; prio<q->bands; prio++)
+	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
 
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
 		return -EINVAL;
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
+	for (i = 0; i <= TC_PRIO_MAX; i++) {
 		if (qopt->priomap[i] >= qopt->bands)
 			return -EINVAL;
 	}
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+	for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
 		struct Qdisc *child = q->queues[i];
 		q->queues[i] = &noop_qdisc;
 		if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	}
 	sch_tree_unlock(sch);
 
-	for (i=0; i<q->bands; i++) {
+	for (i = 0; i < q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child, *old;
+
 			child = qdisc_create_dflt(sch->dev_queue,
 						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int i;
 
-	for (i=0; i<TCQ_PRIO_BANDS; i++)
+	for (i = 0; i < TCQ_PRIO_BANDS; i++)
 		q->queues[i] = &noop_qdisc;
 
 	if (opt == NULL) {
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 	} else {
 		int err;
 
-		if ((err= prio_tune(sch, opt)) != 0)
+		if ((err = prio_tune(sch, opt)) != 0)
 			return err;
 	}
 	return 0;
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
-	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 			arg->count++;
 			continue;
 		}
-		if (arg->fn(sch, prio+1, arg) < 0) {
+		if (arg->fn(sch, prio + 1, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a6009c5a2c9..6649463da1b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
 	if RED works correctly.
  */
 
-struct red_sched_data
-{
+struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
 	struct red_parms	parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
 	return q->flags & TC_RED_HARDDROP;
 }
 
-static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -67,34 +66,33 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		red_end_of_idle_period(&q->parms);
 
 	switch (red_action(&q->parms, q->parms.qavg)) {
-		case RED_DONT_MARK:
-			break;
-
-		case RED_PROB_MARK:
-			sch->qstats.overlimits++;
-			if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-				q->stats.prob_drop++;
-				goto congestion_drop;
-			}
-
-			q->stats.prob_mark++;
-			break;
-
-		case RED_HARD_MARK:
-			sch->qstats.overlimits++;
-			if (red_use_harddrop(q) || !red_use_ecn(q) ||
-			    !INET_ECN_set_ce(skb)) {
-				q->stats.forced_drop++;
-				goto congestion_drop;
-			}
-
-			q->stats.forced_mark++;
-			break;
+	case RED_DONT_MARK:
+		break;
+
+	case RED_PROB_MARK:
+		sch->qstats.overlimits++;
+		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+			q->stats.prob_drop++;
+			goto congestion_drop;
+		}
+
+		q->stats.prob_mark++;
+		break;
+
+	case RED_HARD_MARK:
+		sch->qstats.overlimits++;
+		if (red_use_harddrop(q) || !red_use_ecn(q) ||
+		    !INET_ECN_set_ce(skb)) {
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
+
+		q->stats.forced_mark++;
+		break;
 	}
 
 	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 	} else if (net_xmit_drop_count(ret)) {
 		q->stats.pdrop++;
@@ -107,22 +105,24 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static struct sk_buff * red_dequeue(struct Qdisc* sch)
+static struct sk_buff *red_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
 
 	skb = child->dequeue(child);
-	if (skb)
+	if (skb) {
+		qdisc_bstats_update(sch, skb);
 		sch->q.qlen--;
-	else if (!red_is_idling(&q->parms))
-		red_start_of_idle_period(&q->parms);
-
+	} else {
+		if (!red_is_idling(&q->parms))
+			red_start_of_idle_period(&q->parms);
+	}
 	return skb;
 }
 
-static struct sk_buff * red_peek(struct Qdisc* sch)
+static struct sk_buff *red_peek(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -130,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
 	return child->ops->peek(child);
 }
 
-static unsigned int red_drop(struct Qdisc* sch)
+static unsigned int red_drop(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -149,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
 	return 0;
 }
 
-static void red_reset(struct Qdisc* sch)
+static void red_reset(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
@@ -216,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
-static int red_init(struct Qdisc* sch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 239ec53a634..4cff4423577 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
 #define SFQ_DEPTH		128 /* max number of packets per flow */
 #define SFQ_SLOTS		128 /* max number of flows */
 #define SFQ_EMPTY_SLOT		255
-#define SFQ_HASH_DIVISOR	1024
+#define SFQ_DEFAULT_HASH_DIVISOR 1024
+
 /* We use 16 bits to store allot, and want to handle packets up to 64K
  * Scale allot by 8 (1<<3) so that no overflow occurs.
  */
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index;
  * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
  * are 'pointers' to dep[] array
  */
-struct sfq_head
-{
+struct sfq_head {
 	sfq_index	next;
 	sfq_index	prev;
 };
@@ -108,13 +109,12 @@ struct sfq_slot {
 	short		allot; /* credit for this slot */
 };
 
-struct sfq_sched_data
-{
+struct sfq_sched_data {
 /* Parameters */
 	int		perturb_period;
-	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
+	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
 	int		limit;
-
+	unsigned int	divisor;	/* number of slots in hash table */
 /* Variables */
 	struct tcf_proto *filter_list;
 	struct timer_list perturb_timer;
@@ -122,7 +122,7 @@ struct sfq_sched_data
 	sfq_index	cur_depth;	/* depth of longest slot */
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
-	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
+	sfq_index	*ht;		/* Hash table (divisor slots) */
 	struct sfq_slot	slots[SFQ_SLOTS];
 	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
 };
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
 	return &q->dep[val - SFQ_SLOTS];
 }
 
-static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
-	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+	return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
 }
 
-static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 {
 	u32 h, h2;
 
@@ -157,13 +157,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		iph = ip_hdr(skb);
 		h = (__force u32)iph->daddr;
 		h2 = (__force u32)iph->saddr ^ iph->protocol;
-		if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+		if (iph->frag_off & htons(IP_MF | IP_OFFSET))
 			break;
 		poff = proto_ports_offset(iph->protocol);
 		if (poff >= 0 &&
 		    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
 			iph = ip_hdr(skb);
-			h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
+			h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
 		}
 		break;
 	}
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		if (poff >= 0 &&
 		    pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
 			iph = ipv6_hdr(skb);
-			h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
+			h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
 		}
 		break;
 	}
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 	if (TC_H_MAJ(skb->priority) == sch->handle &&
 	    TC_H_MIN(skb->priority) > 0 &&
-	    TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+	    TC_H_MIN(skb->priority) <= q->divisor)
 		return TC_H_MIN(skb->priority);
 
 	if (!q->filter_list)
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 			return 0;
 		}
 #endif
-		if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+		if (TC_H_MIN(res.classid) <= q->divisor)
 			return TC_H_MIN(res.classid);
 	}
 	return 0;
@@ -402,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->tail = slot;
 		slot->allot = q->scaled_quantum;
 	}
-	if (++sch->q.qlen <= q->limit) {
-		qdisc_bstats_update(sch, skb);
+	if (++sch->q.qlen <= q->limit)
 		return NET_XMIT_SUCCESS;
-	}
 
 	sfq_drop(sch);
 	return NET_XMIT_CN;
@@ -445,6 +443,7 @@ next_slot:
 	}
 	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
@@ -498,7 +497,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-
+	if (ctl->divisor) {
+		if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)
+			return -EINVAL;
+		q->divisor = ctl->divisor;
+	}
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
 		sfq_drop(sch);
@@ -516,15 +519,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
+	size_t sz;
 	int i;
 
 	q->perturb_timer.function = sfq_perturbation;
 	q->perturb_timer.data = (unsigned long)sch;
 	init_timer_deferrable(&q->perturb_timer);
 
-	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_EMPTY_SLOT;
-
 	for (i = 0; i < SFQ_DEPTH; i++) {
 		q->dep[i].next = i + SFQ_SLOTS;
 		q->dep[i].prev = i + SFQ_SLOTS;
@@ -533,6 +534,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->limit = SFQ_DEPTH - 1;
 	q->cur_depth = 0;
 	q->tail = NULL;
+	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -544,10 +546,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
+	sz = sizeof(q->ht[0]) * q->divisor;
+	q->ht = kmalloc(sz, GFP_KERNEL);
+	if (!q->ht && sz > PAGE_SIZE)
+		q->ht = vmalloc(sz);
+	if (!q->ht)
+		return -ENOMEM;
+	for (i = 0; i < q->divisor; i++)
+		q->ht[i] = SFQ_EMPTY_SLOT;
+
 	for (i = 0; i < SFQ_SLOTS; i++) {
 		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
 	}
+	if (q->limit >= 1)
+		sch->flags |= TCQ_F_CAN_BYPASS;
+	else
+		sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -558,6 +573,10 @@ static void sfq_destroy(struct Qdisc *sch)
 	tcf_destroy_chain(&q->filter_list);
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
+	if (is_vmalloc_addr(q->ht))
+		vfree(q->ht);
+	else
+		kfree(q->ht);
 }
 
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -570,7 +589,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.perturb_period = q->perturb_period / HZ;
 
 	opt.limit = q->limit;
-	opt.divisor = SFQ_HASH_DIVISOR;
+	opt.divisor = q->divisor;
 	opt.flows = q->limit;
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -595,6 +614,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 			      u32 classid)
 {
+	/* we cannot bypass queue discipline anymore */
+	sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -648,7 +669,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	if (arg->stop)
 		return;
 
-	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+	for (i = 0; i < q->divisor; i++) {
 		if (q->ht[i] == SFQ_EMPTY_SLOT ||
 		    arg->count < arg->skip) {
 			arg->count++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 77565e72181..1dcfb5223a8 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
 	changed the limit is not effective anymore.
 */
 
-struct tbf_sched_data
-{
+struct tbf_sched_data {
 /* Parameters */
 	u32		limit;		/* Maximal length of backlog: bytes */
 	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
 	struct qdisc_watchdog watchdog;	/* Watchdog timer */
 };
 
-#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
-#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
+#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
 
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
@@ -134,11 +133,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	sch->q.qlen++;
-	qdisc_bstats_update(sch, skb);
 	return NET_XMIT_SUCCESS;
 }
 
-static unsigned int tbf_drop(struct Qdisc* sch)
+static unsigned int tbf_drop(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	unsigned int len = 0;
@@ -150,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
 	return len;
 }
 
-static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
@@ -186,7 +184,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			q->tokens = toks;
 			q->ptokens = ptoks;
 			sch->q.qlen--;
-			sch->flags &= ~TCQ_F_THROTTLED;
+			qdisc_unthrottled(sch);
+			qdisc_bstats_update(sch, skb);
 			return skb;
 		}
 
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 	return NULL;
 }
 
-static void tbf_reset(struct Qdisc* sch)
+static void tbf_reset(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
 	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 };
 
-static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	int err;
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
 	struct Qdisc *child = NULL;
-	int max_size,n;
+	int max_size, n;
 
 	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
 	if (err < 0)
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	}
 
 	for (n = 0; n < 256; n++)
-		if (rtab->data[n] > qopt->buffer) break;
-	max_size = (n << qopt->rate.cell_log)-1;
+		if (rtab->data[n] > qopt->buffer)
+			break;
+	max_size = (n << qopt->rate.cell_log) - 1;
 	if (ptab) {
 		int size;
 
 		for (n = 0; n < 256; n++)
-			if (ptab->data[n] > qopt->mtu) break;
-		size = (n << qopt->peakrate.cell_log)-1;
-		if (size < max_size) max_size = size;
+			if (ptab->data[n] > qopt->mtu)
+				break;
+		size = (n << qopt->peakrate.cell_log) - 1;
+		if (size < max_size)
+			max_size = size;
 	}
 	if (max_size < 0)
 		goto done;
@@ -310,7 +312,7 @@ done:
 	return err;
 }
 
-static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static const struct Qdisc_class_ops tbf_class_ops =
-{
+static const struct Qdisc_class_ops tbf_class_ops = {
 	.graft		=	tbf_graft,
 	.leaf		=	tbf_leaf,
 	.get		=	tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 84ce48eadff..45cd30098e3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,8 +53,7 @@
       which will not break load balancing, though native slave
       traffic will have the highest priority.  */
 
-struct teql_master
-{
+struct teql_master {
 	struct Qdisc_ops qops;
 	struct net_device *dev;
 	struct Qdisc *slaves;
@@ -65,29 +64,27 @@ struct teql_master
 	unsigned long	tx_dropped;
 };
 
-struct teql_sched_data
-{
+struct teql_sched_data {
 	struct Qdisc *next;
 	struct teql_master *m;
 	struct neighbour *ncache;
 	struct sk_buff_head q;
 };
 
-#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
 
-#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
 
 /* "teql*" qdisc routines */
 
 static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct teql_sched_data *q = qdisc_priv(sch);
 
 	if (q->q.qlen < dev->tx_queue_len) {
 		__skb_queue_tail(&q->q, skb);
-		qdisc_bstats_update(sch, skb);
 		return NET_XMIT_SUCCESS;
 	}
 
@@ -97,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 }
 
 static struct sk_buff *
-teql_dequeue(struct Qdisc* sch)
+teql_dequeue(struct Qdisc *sch)
 {
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct netdev_queue *dat_queue;
@@ -111,19 +108,21 @@ teql_dequeue(struct Qdisc* sch)
 			dat->m->slaves = sch;
 			netif_wake_queue(m);
 		}
+	} else {
+		qdisc_bstats_update(sch, skb);
 	}
 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 	return skb;
 }
 
 static struct sk_buff *
-teql_peek(struct Qdisc* sch)
+teql_peek(struct Qdisc *sch)
 {
 	/* teql is meant to be used as root qdisc */
 	return NULL;
 }
 
-static __inline__ void
+static inline void
 teql_neigh_release(struct neighbour *n)
 {
 	if (n)
@@ -131,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
 }
 
 static void
-teql_reset(struct Qdisc* sch)
+teql_reset(struct Qdisc *sch)
 {
 	struct teql_sched_data *dat = qdisc_priv(sch);
 
@@ -141,13 +140,14 @@ teql_reset(struct Qdisc* sch)
 }
 
 static void
-teql_destroy(struct Qdisc* sch)
+teql_destroy(struct Qdisc *sch)
 {
 	struct Qdisc *q, *prev;
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct teql_master *master = dat->m;
 
-	if ((prev = master->slaves) != NULL) {
+	prev = master->slaves;
+	if (prev) {
 		do {
 			q = NEXT_SLAVE(prev);
 			if (q == sch) {
@@ -179,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct net_device *dev = qdisc_dev(sch);
-	struct teql_master *m = (struct teql_master*)sch->ops;
+	struct teql_master *m = (struct teql_master *)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
 	if (dev->hard_header_len > m->dev->hard_header_len)
@@ -290,7 +290,8 @@ restart:
 	nores = 0;
 	busy = 0;
 
-	if ((q = start) == NULL)
+	q = start;
+	if (!q)
 		goto drop;
 
 	do {
@@ -355,10 +356,10 @@ drop:
 
 static int teql_master_open(struct net_device *dev)
 {
-	struct Qdisc * q;
+	struct Qdisc *q;
 	struct teql_master *m = netdev_priv(dev);
 	int mtu = 0xFFFE;
-	unsigned flags = IFF_NOARP|IFF_MULTICAST;
+	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 
 	if (m->slaves == NULL)
 		return -EUNATCH;
@@ -426,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
 		do {
 			if (new_mtu > qdisc_dev(q)->mtu)
 				return -EINVAL;
-		} while ((q=NEXT_SLAVE(q)) != m->slaves);
+		} while ((q = NEXT_SLAVE(q)) != m->slaves);
 	}
 
 	dev->mtu = new_mtu;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d28620..d8d98d5b508 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1475,6 +1475,12 @@ restart:
 			goto out_free;
 	}
 
+	if (sk_filter(other, skb) < 0) {
+		/* Toss the packet but do not return any error to the sender */
+		err = len;
+		goto out_free;
+	}
+
 	unix_state_lock(other);
 	err = -EPERM;
 	if (!unix_may_send(sk, other))
@@ -1978,36 +1984,38 @@ static int unix_shutdown(struct socket *sock, int mode)
 
 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
 
-	if (mode) {
-		unix_state_lock(sk);
-		sk->sk_shutdown |= mode;
-		other = unix_peer(sk);
-		if (other)
-			sock_hold(other);
-		unix_state_unlock(sk);
-		sk->sk_state_change(sk);
-
-		if (other &&
-			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
-
-			int peer_mode = 0;
-
-			if (mode&RCV_SHUTDOWN)
-				peer_mode |= SEND_SHUTDOWN;
-			if (mode&SEND_SHUTDOWN)
-				peer_mode |= RCV_SHUTDOWN;
-			unix_state_lock(other);
-			other->sk_shutdown |= peer_mode;
-			unix_state_unlock(other);
-			other->sk_state_change(other);
-			if (peer_mode == SHUTDOWN_MASK)
-				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
-			else if (peer_mode & RCV_SHUTDOWN)
-				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-		}
-		if (other)
-			sock_put(other);
+	if (!mode)
+		return 0;
+
+	unix_state_lock(sk);
+	sk->sk_shutdown |= mode;
+	other = unix_peer(sk);
+	if (other)
+		sock_hold(other);
+	unix_state_unlock(sk);
+	sk->sk_state_change(sk);
+
+	if (other &&
+		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
+
+		int peer_mode = 0;
+
+		if (mode&RCV_SHUTDOWN)
+			peer_mode |= SEND_SHUTDOWN;
+		if (mode&SEND_SHUTDOWN)
+			peer_mode |= RCV_SHUTDOWN;
+		unix_state_lock(other);
+		other->sk_shutdown |= peer_mode;
+		unix_state_unlock(other);
+		other->sk_state_change(other);
+		if (peer_mode == SHUTDOWN_MASK)
+			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
+		else if (peer_mode & RCV_SHUTDOWN)
+			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
 	}
+	if (other)
+		sock_put(other);
+
 	return 0;
 }
 
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 74944a2dd43..788a12c1eb5 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -59,8 +59,6 @@
 #include <asm/uaccess.h>        /* copy_to/from_user */
 #include <linux/init.h>         /* __initfunc et al. */
 
-#define KMEM_SAFETYZONE 8
-
 #define DEV_TO_SLAVE(dev)	(*((struct net_device **)netdev_priv(dev)))
 
 /*
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 37693b6ef23..c565689f0b9 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1801,9 +1801,9 @@ void regulatory_hint_disconnect(void)
 
 static bool freq_is_chan_12_13_14(u16 freq)
 {
-	if (freq == ieee80211_channel_to_frequency(12) ||
-	    freq == ieee80211_channel_to_frequency(13) ||
-	    freq == ieee80211_channel_to_frequency(14))
+	if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) ||
+	    freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) ||
+	    freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ))
 		return true;
 	return false;
 }
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7620ae2fcf1..4ed065d8bb5 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
 }
 EXPORT_SYMBOL(ieee80211_get_response_rate);
 
-int ieee80211_channel_to_frequency(int chan)
+int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band)
 {
-	if (chan < 14)
-		return 2407 + chan * 5;
-
-	if (chan == 14)
-		return 2484;
-
-	/* FIXME: 802.11j 17.3.8.3.2 */
-	return (chan + 1000) * 5;
+	/* see 802.11 17.3.8.3.2 and Annex J
+	 * there are overlapping channel numbers in 5GHz and 2GHz bands */
+	if (band == IEEE80211_BAND_5GHZ) {
+		if (chan >= 182 && chan <= 196)
+			return 4000 + chan * 5;
+		else
+			return 5000 + chan * 5;
+	} else { /* IEEE80211_BAND_2GHZ */
+		if (chan == 14)
+			return 2484;
+		else if (chan < 14)
+			return 2407 + chan * 5;
+		else
+			return 0; /* not supported */
+	}
 }
 EXPORT_SYMBOL(ieee80211_channel_to_frequency);
 
 int ieee80211_frequency_to_channel(int freq)
 {
+	/* see 802.11 17.3.8.3.2 and Annex J */
 	if (freq == 2484)
 		return 14;
-
-	if (freq < 2484)
+	else if (freq < 2484)
 		return (freq - 2407) / 5;
-
-	/* FIXME: 802.11j 17.3.8.3.2 */
-	return freq/5 - 1000;
+	else if (freq >= 4910 && freq <= 4980)
+		return (freq - 4000) / 5;
+	else
+		return (freq - 5000) / 5;
 }
 EXPORT_SYMBOL(ieee80211_frequency_to_channel);
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3e5dbd4e4cd..7f1f4ec4904 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq)
 	 * -EINVAL for impossible things.
 	 */
 	if (freq->e == 0) {
+		enum ieee80211_band band = IEEE80211_BAND_2GHZ;
 		if (freq->m < 0)
 			return 0;
-		return ieee80211_channel_to_frequency(freq->m);
+		if (freq->m > 14)
+			band = IEEE80211_BAND_5GHZ;
+		return ieee80211_channel_to_frequency(freq->m, band);
 	} else {
 		int i, div = 1000000;
 		for (i = 0; i < freq->e; i++)