445 files changed, 31865 insertions, 8041 deletions
diff --git a/net/802/Makefile b/net/802/Makefile
index 01861929591..977704a54f6 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the Linux 802.x protocol layers.
 #
 
-obj-y			:= p8023.o
-
 # Check the p8022 selections against net/core/Makefile.
 obj-$(CONFIG_SYSCTL)	+= sysctl_net_802.o
 obj-$(CONFIG_LLC)	+= p8022.o psnap.o
@@ -11,5 +9,5 @@ obj-$(CONFIG_TR)	+= p8022.o psnap.o tr.o sysctl_net_802.o
 obj-$(CONFIG_NET_FC)	+=                 fc.o
 obj-$(CONFIG_FDDI)	+=                 fddi.o
 obj-$(CONFIG_HIPPI)	+=                 hippi.o
-obj-$(CONFIG_IPX)	+= p8022.o psnap.o
+obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 91e412b0ab0..fa76220708c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -19,6 +19,7 @@
  */
 
 #include <asm/uaccess.h> /* for copy_from_user */
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -753,6 +754,8 @@ static int vlan_ioctl_handler(void __user *arg)
 		break;
 	case GET_VLAN_REALDEV_NAME_CMD:
 		err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
+		if (err)
+			goto out;
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args))) {
 			err = -EFAULT;
@@ -761,6 +764,8 @@ static int vlan_ioctl_handler(void __user *arg)
 
 	case GET_VLAN_VID_CMD:
 		err = vlan_dev_get_vid(args.device1, &vid);
+		if (err)
+			goto out;
 		args.u.VID = vid;
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args))) {
@@ -774,7 +779,7 @@ static int vlan_ioctl_handler(void __user *arg)
 			__FUNCTION__, args.cmd);
 		return -EINVAL;
 	};
-
+out:
 	return err;
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b7486488967..0f604d227da 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -165,6 +165,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */
 
+	/* Need to correct hardware checksum */
+	skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+
 	/* Ok, lets check to make sure the device (dev) we
 	 * came in on is what this VLAN is attached to.
 	 */
@@ -211,7 +214,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		 * This allows the VLAN to have a different MAC than the underlying
 		 * device, and still route correctly.
 		 */
-		if (memcmp(eth_hdr(skb)->h_dest, skb->dev->dev_addr, ETH_ALEN) == 0) {
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) {
 			/* It is for our (changed) MAC-address! */
 			skb->pkt_type = PACKET_HOST;
 		}
diff --git a/net/Kconfig b/net/Kconfig
index 60f6f321bd7..bc603d9aea5 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -150,6 +150,7 @@ endif
 
 source "net/dccp/Kconfig"
 source "net/sctp/Kconfig"
+source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
 source "net/bridge/Kconfig"
 source "net/8021q/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index f5141b9d4f3..065796f5fb1 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_VLAN_8021Q)	+= 8021q/
 obj-$(CONFIG_IP_DCCP)		+= dccp/
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_IEEE80211)		+= ieee80211/
+obj-$(CONFIG_TIPC)		+= tipc/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7982656b9c8..697ac55e29d 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -52,6 +52,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
 #include <linux/termios.h>	/* For TIOCOUTQ/INQ */
@@ -63,7 +64,7 @@
 #include <linux/atalk.h>
 
 struct datalink_proto *ddp_dl, *aarp_dl;
-static struct proto_ops atalk_dgram_ops;
+static const struct proto_ops atalk_dgram_ops;
 
 /**************************************************************************\
 *                                                                          *
@@ -1763,7 +1764,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
  */
 static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
-	int rc = -EINVAL;
+	int rc = -ENOIOCTLCMD;
 	struct sock *sk = sock->sk;
 	void __user *argp = (void __user *)arg;
 
@@ -1813,23 +1814,6 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = atif_ioctl(cmd, argp);
 			rtnl_unlock();
 			break;
-		/* Physical layer ioctl calls */
-		case SIOCSIFLINK:
-		case SIOCGIFHWADDR:
-		case SIOCSIFHWADDR:
-		case SIOCGIFFLAGS:
-		case SIOCSIFFLAGS:
-		case SIOCGIFTXQLEN:
-		case SIOCSIFTXQLEN:
-		case SIOCGIFMTU:
-		case SIOCGIFCONF:
-		case SIOCADDMULTI:
-		case SIOCDELMULTI:
-		case SIOCGIFCOUNT:
-		case SIOCGIFINDEX:
-		case SIOCGIFNAME:
-			rc = dev_ioctl(cmd, argp);
-			break;
 	}
 
 	return rc;
@@ -1841,7 +1825,7 @@ static struct net_proto_family atalk_family_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
 	.family		= PF_APPLETALK,
 	.owner		= THIS_MODULE,
 	.release	= atalk_release,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 72f3f7b8de8..680ccb12aae 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -18,6 +18,7 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary
 #include <net/arp.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
+#include <linux/capability.h>
 #include <linux/seq_file.h>
 
 #include <linux/atmbr2684.h>
@@ -295,14 +296,14 @@ static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
 	unsigned char *rawp;
 	eth = eth_hdr(skb);
 
-	if (*eth->h_dest & 1) {
-		if (memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0)
+	if (is_multicast_ether_addr(eth->h_dest)) {
+		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
 			skb->pkt_type = PACKET_BROADCAST;
 		else
 			skb->pkt_type = PACKET_MULTICAST;
 	}
 
-	else if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN))
+	else if (compare_ether_addr(eth->h_dest, dev->dev_addr))
 		skb->pkt_type = PACKET_OTHERHOST;
 
 	if (ntohs(eth->h_proto) >= 1536)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 4f54c9a5e84..73370de9753 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -19,6 +19,7 @@
 #include <linux/atmdev.h>
 #include <linux/atmclip.h>
 #include <linux/atmarp.h>
+#include <linux/capability.h>
 #include <linux/ip.h> /* for net/route.h */
 #include <linux/in.h> /* for struct sockaddr_in */
 #include <linux/if.h> /* for IFF_UP */
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index a150198b05a..eb109af7eb4 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -12,6 +12,7 @@
 #include <linux/atmdev.h>
 #include <linux/atmclip.h>	/* CLIP_*ENCAP */
 #include <linux/atmarp.h>	/* manifest constants */
+#include <linux/capability.h>
 #include <linux/sonet.h>	/* for ioctls */
 #include <linux/atmsvc.h>
 #include <linux/atmmpc.h>
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ad840b9afba..c4fc722fef9 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -7,6 +7,7 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 
 /* We are ethernet device */
 #include <linux/if_ether.h>
@@ -1321,7 +1322,7 @@ static int lane2_associate_req (struct net_device *dev, u8 *lan_dst,
         struct sk_buff *skb;
         struct lec_priv *priv = (struct lec_priv*)dev->priv;
 
-        if ( memcmp(lan_dst, dev->dev_addr, ETH_ALEN) != 0 )
+        if (compare_ether_addr(lan_dst, dev->dev_addr))
                 return (0);       /* not our mac address */
 
         kfree(priv->tlvs); /* NULL if there was no previous association */
@@ -1798,7 +1799,7 @@ lec_arp_find(struct lec_priv *priv,
   
         to_return = priv->lec_arp_tables[place];
         while(to_return) {
-                if (memcmp(mac_addr, to_return->mac_addr, ETH_ALEN) == 0) {
+                if (!compare_ether_addr(mac_addr, to_return->mac_addr)) {
                         return to_return;
                 }
                 to_return = to_return->next;
@@ -1811,8 +1812,7 @@ make_entry(struct lec_priv *priv, unsigned char *mac_addr)
 {
         struct lec_arp_table *to_return;
 
-        to_return = (struct lec_arp_table *) kmalloc(sizeof(struct lec_arp_table),
-						     GFP_ATOMIC);
+        to_return = kmalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
         if (!to_return) {
                 printk("LEC: Arp entry kmalloc failed\n");
                 return NULL;
@@ -2002,7 +2002,7 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find,
                         return priv->mcast_vcc;
                         break;
                 case 2:  /* LANE2 wants arp for multicast addresses */
-                        if ( memcmp(mac_to_find, bus_mac, ETH_ALEN) == 0)
+                        if (!compare_ether_addr(mac_to_find, bus_mac))
                                 return priv->mcast_vcc;
                         break;
                 default:
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 526d9531411..c304ef1513b 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -3,6 +3,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/seq_file.h>
 
 /* We are an ethernet device */
@@ -552,7 +553,7 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev)
 		goto non_ip; /* Multi-Protocol Over ATM :-) */
 
 	while (i < mpc->number_of_mps_macs) {
-		if (memcmp(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN), ETH_ALEN) == 0)
+		if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN)))
 			if ( send_via_shortcut(skb, mpc) == 0 )           /* try shortcut */
 				return 0;                                 /* success!     */
 		i++;
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 58f4a2b5aeb..1489067c1e8 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -39,6 +39,7 @@
 #include <linux/skbuff.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
+#include <linux/capability.h>
 #include <linux/ppp_defs.h>
 #include <linux/if_ppp.h>
 #include <linux/ppp_channel.h>
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2684a92da22..f2c541774dc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
 }
 
 
-static struct proto_ops pvc_proto_ops = {
+static const struct proto_ops pvc_proto_ops = {
 	.family =	PF_ATMPVC,
 	.owner =	THIS_MODULE,
 
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 4a0466e91aa..3e57b17ca52 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/atmdev.h>
+#include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/mm.h>
diff --git a/net/atm/resources.c b/net/atm/resources.c
index c8c459fcb03..224190537c9 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h> /* for barrier */
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/delay.h>
 #include <net/sock.h>	 /* for struct sock */
 
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d7b266136bf..3a180cfd7b4 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return error;
 }
 
-static struct proto_ops svc_proto_ops = {
+static const struct proto_ops svc_proto_ops = {
 	.family =	PF_ATMSVC,
 	.owner =	THIS_MODULE,
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1b683f30265..dbf9b47681f 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -14,6 +14,7 @@
  * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
  */
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -54,7 +55,7 @@
 HLIST_HEAD(ax25_list);
 DEFINE_SPINLOCK(ax25_list_lock);
 
-static struct proto_ops ax25_proto_ops;
+static const struct proto_ops ax25_proto_ops;
 
 static void ax25_free_sock(struct sock *sk)
 {
@@ -1827,7 +1828,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		break;
 
 	default:
-		res = dev_ioctl(cmd, argp);
+		res = -ENOIOCTLCMD;
 		break;
 	}
 	release_sock(sk);
@@ -1944,7 +1945,7 @@ static struct net_proto_family ax25_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops ax25_proto_ops = {
+static const struct proto_ops ax25_proto_ops = {
 	.family		= PF_AX25,
 	.owner		= THIS_MODULE,
 	.release	= ax25_release,
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b1e945bd6ed..f04f8630fd2 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -11,6 +11,8 @@
  * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
  * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
  */
+
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index d53cc861586..b8b5854bce9 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -6,6 +6,8 @@
  *
  * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
  */
+
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ea616e3fc98..fb031fe9be9 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -287,10 +287,9 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
 
-		if (sk->sk_err) {
-			err = sock_error(sk);
+		err = sock_error(sk);
+		if (err)
 			break;
-		}
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk->sk_sleep, &wait);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 682bf20af52..cbb20c32a6c 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -75,7 +75,7 @@ static struct bnep_session *__bnep_get_session(u8 *dst)
 
 	list_for_each(p, &bnep_session_list) {
 		s = list_entry(p, struct bnep_session, list);	
-		if (!memcmp(dst, s->eh.h_source, ETH_ALEN))
+		if (!compare_ether_addr(dst, s->eh.h_source))
 			return s;
 	}
 	return NULL;
@@ -420,10 +420,10 @@ static inline int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb)
 	iv[il++] = (struct kvec) { &type, 1 };
 	len++;
 
-	if (!memcmp(eh->h_dest, s->eh.h_source, ETH_ALEN))
+	if (!compare_ether_addr(eh->h_dest, s->eh.h_source))
 		type |= 0x01;
 
-	if (!memcmp(eh->h_source, s->eh.h_dest, ETH_ALEN))
+	if (!compare_ether_addr(eh->h_source, s->eh.h_dest))
 		type |= 0x02;
 
 	if (type)
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 9778c6acd53..2bfe796cf05 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -146,7 +147,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return 0;
 }
 
-static struct proto_ops bnep_sock_ops = {
+static const struct proto_ops bnep_sock_ops = {
 	.family     = PF_BLUETOOTH,
 	.owner      = THIS_MODULE,
 	.release    = bnep_sock_release,
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index beb045bf571..8f8fad23f78 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -137,7 +138,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
-static struct proto_ops cmtp_sock_ops = {
+static const struct proto_ops cmtp_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= cmtp_sock_release,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a31244e5888..f812ed129e5 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -403,7 +403,7 @@ int hci_get_conn_list(void __user *arg)
 
 	size = sizeof(req) + req.conn_num * sizeof(*ci);
 
-	if (!(cl = (void *) kmalloc(size, GFP_KERNEL)))
+	if (!(cl = kmalloc(size, GFP_KERNEL)))
 		return -ENOMEM;
 
 	if (!(hdev = hci_dev_get(req.dev_id))) {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d6d0a15c09..bdb6458c6bd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -575,7 +576,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char
 	return 0;
 }
 
-static struct proto_ops hci_sock_ops = {
+static const struct proto_ops hci_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= hci_sock_release,
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index bd7568ac87f..0ed38740388 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -78,7 +78,7 @@ static struct class_device_attribute *bt_attrs[] = {
 };
 
 #ifdef CONFIG_HOTPLUG
-static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
 {
 	struct hci_dev *hdev = class_get_devdata(cdev);
 	int n, i = 0;
@@ -107,7 +107,7 @@ struct class bt_class = {
 	.name		= "bluetooth",
 	.release	= bt_release,
 #ifdef CONFIG_HOTPLUG
-	.hotplug	= bt_hotplug,
+	.uevent		= bt_uevent,
 #endif
 };
 
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index f8986f88143..b8f67761b88 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -143,7 +144,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
-static struct proto_ops hidp_sock_ops = {
+static const struct proto_ops hidp_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= hidp_sock_release,
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e3bb11ca423..f6b4a808535 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -57,7 +58,7 @@
 
 #define VERSION "2.8"
 
-static struct proto_ops l2cap_sock_ops;
+static const struct proto_ops l2cap_sock_ops;
 
 static struct bt_sock_list l2cap_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -767,8 +768,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	if (sk->sk_err)
-		return sock_error(sk);
+	err = sock_error(sk);
+	if (err)
+		return err;
 
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
@@ -2160,7 +2162,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL);
 
-static struct proto_ops l2cap_sock_ops = {
+static const struct proto_ops l2cap_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= l2cap_sock_release,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 6c34261b232..757d2dd3b02 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -58,7 +58,7 @@
 #define BT_DBG(D...)
 #endif
 
-static struct proto_ops rfcomm_sock_ops;
+static const struct proto_ops rfcomm_sock_ops;
 
 static struct bt_sock_list rfcomm_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL);
 
-static struct proto_ops rfcomm_sock_ops = {
+static const struct proto_ops rfcomm_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= rfcomm_sock_release,
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 158a9c46d86..74368f79ee5 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -34,6 +34,7 @@
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
 
+#include <linux/capability.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 
@@ -480,13 +481,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 	BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
 
 	if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
-		register int i;
-		for (i = 0; i < skb->len; i++) {
-			if (tty->flip.count >= TTY_FLIPBUF_SIZE)
-				tty_flip_buffer_push(tty);
-
-			tty_insert_flip_char(tty, skb->data[i], 0);
-		}
+		tty_buffer_request_room(tty, skb->len);
+		tty_insert_flip_string(tty, skb->data, skb->len);
 		tty_flip_buffer_push(tty);
 	} else
 		tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 9cb00dc6c08..6b61323ce23 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -56,7 +56,7 @@
 
 #define VERSION "0.5"
 
-static struct proto_ops sco_sock_ops;
+static const struct proto_ops sco_sock_ops;
 
 static struct bt_sock_list sco_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -637,8 +637,9 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	if (sk->sk_err)
-		return sock_error(sk);
+	err = sock_error(sk);
+	if (err)
+		return err;
 
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
@@ -913,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL);
 
-static struct proto_ops sco_sock_ops = {
+static const struct proto_ops sco_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= sco_sock_release,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index f8f184942aa..188cc1ac49e 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -67,3 +67,4 @@ EXPORT_SYMBOL(br_should_route_hook);
 module_init(br_init)
 module_exit(br_deinit)
 MODULE_LICENSE("GPL");
+MODULE_VERSION(BR_VERSION);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f564ee99782..0b33a7b3a00 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -15,7 +15,9 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+
 #include <asm/uaccess.h>
 #include "br_private.h"
 
@@ -82,6 +84,87 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+/* Allow setting mac address of pseudo-bridge to be same as
+ * any of the bound interfaces
+ */
+static int br_set_mac_address(struct net_device *dev, void *p)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct sockaddr *addr = p;
+	struct net_bridge_port *port;
+	int err = -EADDRNOTAVAIL;
+
+	spin_lock_bh(&br->lock);
+	list_for_each_entry(port, &br->port_list, list) {
+		if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
+			br_stp_change_bridge_id(br, addr->sa_data);
+			err = 0;
+			break;
+		}
+	}
+	spin_unlock_bh(&br->lock);
+
+	return err;
+}
+
+static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "bridge");
+	strcpy(info->version, BR_VERSION);
+	strcpy(info->fw_version, "N/A");
+	strcpy(info->bus_info, "N/A");
+}
+
+static int br_set_sg(struct net_device *dev, u32 data)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (data)
+		br->feature_mask |= NETIF_F_SG;
+	else
+		br->feature_mask &= ~NETIF_F_SG;
+
+	br_features_recompute(br);
+	return 0;
+}
+
+static int br_set_tso(struct net_device *dev, u32 data)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (data)
+		br->feature_mask |= NETIF_F_TSO;
+	else
+		br->feature_mask &= ~NETIF_F_TSO;
+
+	br_features_recompute(br);
+	return 0;
+}
+
+static int br_set_tx_csum(struct net_device *dev, u32 data)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (data)
+		br->feature_mask |= NETIF_F_IP_CSUM;
+	else
+		br->feature_mask &= ~NETIF_F_IP_CSUM;
+
+	br_features_recompute(br);
+	return 0;
+}
+
+static struct ethtool_ops br_ethtool_ops = {
+	.get_drvinfo = br_getinfo,
+	.get_link = ethtool_op_get_link,
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = br_set_sg,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = br_set_tx_csum,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = br_set_tso,
+};
+
 void br_dev_setup(struct net_device *dev)
 {
 	memset(dev->dev_addr, 0, ETH_ALEN);
@@ -96,8 +179,12 @@ void br_dev_setup(struct net_device *dev)
 	dev->change_mtu = br_change_mtu;
 	dev->destructor = free_netdev;
 	SET_MODULE_OWNER(dev);
+ 	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
 	dev->stop = br_dev_stop;
 	dev->tx_queue_len = 0;
-	dev->set_mac_address = NULL;
+	dev->set_mac_address = br_set_mac_address;
 	dev->priv_flags = IFF_EBRIDGE;
+
+ 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
+ 		| NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 975abe254b7..ba442883e87 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/rtnetlink.h>
+#include <linux/if_ether.h>
 #include <net/sock.h>
 
 #include "br_private.h"
@@ -32,9 +33,8 @@
  * ethtool, use ethtool_ops.  Also, since driver might sleep need to
  * not be holding any locks.
  */
-static int br_initial_port_cost(struct net_device *dev)
+static int port_cost(struct net_device *dev)
 {
-
 	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
 	struct ifreq ifr;
 	mm_segment_t old_fs;
@@ -58,10 +58,6 @@ static int br_initial_port_cost(struct net_device *dev)
 			return 2;
 		case SPEED_10:
 			return 100;
-		default:
-			pr_info("bridge: can't decode speed from %s: %d\n",
-				dev->name, ecmd.speed);
-			return 100;
 		}
 	}
 
@@ -75,6 +71,35 @@ static int br_initial_port_cost(struct net_device *dev)
 	return 100;	/* assume old 10Mbps */
 }
 
+
+/*
+ * Check for port carrier transistions.
+ * Called from work queue to allow for calling functions that
+ * might sleep (such as speed check), and to debounce.
+ */
+static void port_carrier_check(void *arg)
+{
+	struct net_bridge_port *p = arg;
+
+	rtnl_lock();
+	if (netif_carrier_ok(p->dev)) {
+		u32 cost = port_cost(p->dev);
+
+		spin_lock_bh(&p->br->lock);
+		if (p->state == BR_STATE_DISABLED) {
+			p->path_cost = cost;
+			br_stp_enable_port(p);
+		}
+		spin_unlock_bh(&p->br->lock);
+	} else {
+		spin_lock_bh(&p->br->lock);
+		if (p->state != BR_STATE_DISABLED)
+			br_stp_disable_port(p);
+		spin_unlock_bh(&p->br->lock);
+	}
+	rtnl_unlock();
+}
+
 static void destroy_nbp(struct net_bridge_port *p)
 {
 	struct net_device *dev = p->dev;
@@ -102,6 +127,9 @@ static void del_nbp(struct net_bridge_port *p)
 	dev->br_port = NULL;
 	dev_set_promiscuity(dev, -1);
 
+	cancel_delayed_work(&p->carrier_check);
+	flush_scheduled_work();
+
 	spin_lock_bh(&br->lock);
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
@@ -155,6 +183,7 @@ static struct net_device *new_bridge_dev(const char *name)
 	br->bridge_id.prio[1] = 0x00;
 	memset(br->bridge_id.addr, 0, ETH_ALEN);
 
+	br->feature_mask = dev->features;
 	br->stp_enabled = 0;
 	br->designated_root = br->bridge_id;
 	br->root_path_cost = 0;
@@ -195,10 +224,9 @@ static int find_portno(struct net_bridge *br)
 	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
 }
 
-/* called with RTNL */
+/* called with RTNL but without bridge lock */
 static struct net_bridge_port *new_nbp(struct net_bridge *br, 
-				       struct net_device *dev,
-				       unsigned long cost)
+				       struct net_device *dev)
 {
 	int index;
 	struct net_bridge_port *p;
@@ -215,12 +243,13 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	p->br = br;
 	dev_hold(dev);
 	p->dev = dev;
-	p->path_cost = cost;
+	p->path_cost = port_cost(dev);
  	p->priority = 0x8000 >> BR_PORT_BITS;
 	dev->br_port = p;
 	p->port_no = index;
 	br_init_port(p);
 	p->state = BR_STATE_DISABLED;
+	INIT_WORK(&p->carrier_check, port_carrier_check, p);
 	kobject_init(&p->kobj);
 
 	return p;
@@ -295,7 +324,7 @@ int br_del_bridge(const char *name)
 	return ret;
 }
 
-/* Mtu of the bridge pseudo-device 1500 or the minimum of the ports */
+/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
 int br_min_mtu(const struct net_bridge *br)
 {
 	const struct net_bridge_port *p;
@@ -304,7 +333,7 @@ int br_min_mtu(const struct net_bridge *br)
 	ASSERT_RTNL();
 
 	if (list_empty(&br->port_list))
-		mtu = 1500;
+		mtu = ETH_DATA_LEN;
 	else {
 		list_for_each_entry(p, &br->port_list, list) {
 			if (!mtu  || p->dev->mtu < mtu)
@@ -322,9 +351,8 @@ void br_features_recompute(struct net_bridge *br)
 	struct net_bridge_port *p;
 	unsigned long features, checksum;
 
-	features = NETIF_F_SG | NETIF_F_FRAGLIST 
-		| NETIF_F_HIGHDMA | NETIF_F_TSO;
-	checksum = NETIF_F_IP_CSUM;	/* least commmon subset */
+	features = br->feature_mask &~ NETIF_F_IP_CSUM;
+	checksum = br->feature_mask & NETIF_F_IP_CSUM;
 
 	list_for_each_entry(p, &br->port_list, list) {
 		if (!(p->dev->features 
@@ -351,7 +379,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (dev->br_port != NULL)
 		return -EBUSY;
 
-	if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev))))
+	if (IS_ERR(p = new_nbp(br, dev)))
 		return PTR_ERR(p);
 
  	if ((err = br_fdb_insert(br, p, dev->dev_addr)))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b88220a64cd..e3a73cead6b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -53,6 +53,11 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
 
+	if (p->state == BR_STATE_LEARNING) {
+		kfree_skb(skb);
+		goto out;
+	}
+
 	if (br->dev->flags & IFF_PROMISC) {
 		struct sk_buff *skb2;
 
@@ -63,7 +68,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 		}
 	}
 
-	if (dest[0] & 1) {
+	if (is_multicast_ether_addr(dest)) {
 		br_flood_forward(br, skb, !passedup);
 		if (!passedup)
 			br_pass_frame_up(br, skb);
@@ -107,9 +112,6 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto err;
 
-	if (p->state == BR_STATE_LEARNING)
-		br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
-
 	if (p->br->stp_enabled &&
 	    !memcmp(dest, bridge_ula, 5) &&
 	    !(dest[5] & 0xF0)) {
@@ -118,9 +120,10 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 				NULL, br_stp_handle_bpdu);
 			return 1;
 		}
+		goto err;
 	}
 
-	else if (p->state == BR_STATE_FORWARDING) {
+	if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
 		if (br_should_route_hook) {
 			if (br_should_route_hook(pskb)) 
 				return 0;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index b8ce14b2218..159fb840982 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -13,6 +13,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/if_bridge.h>
 #include <linux/netdevice.h>
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d8e36b77512..7cac3fb9f80 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -26,6 +26,7 @@
 #include <linux/ip.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -33,8 +34,11 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
 #include <linux/in_route.h>
+
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/route.h>
+
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
 #include "br_private.h"
@@ -295,7 +299,7 @@ static int check_hbh_len(struct sk_buff *skb)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = raw[off+1]+2;
+		int optlen = skb->nh.raw[off+1]+2;
 
 		switch (skb->nh.raw[off]) {
 		case IPV6_TLV_PAD0:
@@ -308,18 +312,15 @@ static int check_hbh_len(struct sk_buff *skb)
 		case IPV6_TLV_JUMBO:
 			if (skb->nh.raw[off+1] != 4 || (off&3) != 2)
 				goto bad;
-
 			pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2));
-
+			if (pkt_len <= IPV6_MAXPLEN ||
+			    skb->nh.ipv6h->payload_len)
+				goto bad;
 			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
 				goto bad;
-			if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
-				if (__pskb_trim(skb,
-				    pkt_len + sizeof(struct ipv6hdr)))
-					goto bad;
-				if (skb->ip_summed == CHECKSUM_HW)
-					skb->ip_summed = CHECKSUM_NONE;
-			}
+			if (pskb_trim_rcsum(skb,
+			    pkt_len+sizeof(struct ipv6hdr)))
+				goto bad;
 			break;
 		default:
 			if (optlen > len)
@@ -372,6 +373,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
 			goto inhdr_error;
 
+ 	nf_bridge_put(skb->nf_bridge);
 	if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
 		return NF_DROP;
 	setup_pre_routing(skb);
@@ -392,8 +394,9 @@ inhdr_error:
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
 static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
-   const struct net_device *in, const struct net_device *out,
-   int (*okfn)(struct sk_buff *))
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
 {
 	struct iphdr *iph;
 	__u32 len;
@@ -410,8 +413,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 			goto out;
 
 		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+			u8 *vhdr = skb->data;
 			skb_pull(skb, VLAN_HLEN);
-			(skb)->nh.raw += VLAN_HLEN;
+			skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+			skb->nh.raw += VLAN_HLEN;
 		}
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
@@ -427,8 +432,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 		goto out;
 
 	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		u8 *vhdr = skb->data;
 		skb_pull(skb, VLAN_HLEN);
-		(skb)->nh.raw += VLAN_HLEN;
+		skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+		skb->nh.raw += VLAN_HLEN;
 	}
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -455,6 +462,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 			skb->ip_summed = CHECKSUM_NONE;
 	}
 
+ 	nf_bridge_put(skb->nf_bridge);
 	if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
 		return NF_DROP;
 	setup_pre_routing(skb);
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 917311c6828..a43a9c1d50d 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -52,17 +52,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		br_stp_recalculate_bridge_id(br);
 		break;
 
-	case NETDEV_CHANGE:	/* device is up but carrier changed */
-		if (!(br->dev->flags & IFF_UP))
-			break;
-
-		if (netif_carrier_ok(dev)) {
-			if (p->state == BR_STATE_DISABLED)
-				br_stp_enable_port(p);
-		} else {
-			if (p->state != BR_STATE_DISABLED)
-				br_stp_disable_port(p);
-		}
+	case NETDEV_CHANGE:
+		if (br->dev->flags & IFF_UP)
+			schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE);
 		break;
 
 	case NETDEV_FEAT_CHANGE:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index bdf95a74d8c..c5bd631ffcd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -27,6 +27,10 @@
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
 
+#define BR_PORT_DEBOUNCE (HZ/10)
+
+#define BR_VERSION	"2.1"
+
 typedef struct bridge_id bridge_id;
 typedef struct mac_addr mac_addr;
 typedef __u16 port_id;
@@ -78,6 +82,7 @@ struct net_bridge_port
 	struct timer_list		hold_timer;
 	struct timer_list		message_age_timer;
 	struct kobject			kobj;
+	struct work_struct		carrier_check;
 	struct rcu_head			rcu;
 };
 
@@ -90,6 +95,7 @@ struct net_bridge
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	struct list_head		age_list;
+	unsigned long			feature_mask;
 
 	/* STP */
 	bridge_id			designated_root;
@@ -201,6 +207,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br);
 extern void br_stp_enable_port(struct net_bridge_port *p);
 extern void br_stp_disable_port(struct net_bridge_port *p);
 extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
+extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
 extern void br_stp_set_bridge_priority(struct net_bridge *br,
 				       u16 newprio);
 extern void br_stp_set_port_priority(struct net_bridge_port *p,
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index ac09b6a2352..cc047f7fb6e 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -120,8 +120,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
 }
 
 /* called under bridge lock */
-static void br_stp_change_bridge_id(struct net_bridge *br, 
-				    const unsigned char *addr)
+void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 {
 	unsigned char oldaddr[6];
 	struct net_bridge_port *p;
@@ -158,7 +157,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
 
 	list_for_each_entry(p, &br->port_list, list) {
 		if (addr == br_mac_zero ||
-		    compare_ether_addr(p->dev->dev_addr, addr) < 0)
+		    memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
 			addr = p->dev->dev_addr;
 
 	}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 98cf53c81fa..6f577f16c4c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -11,6 +11,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index f6a19d53eae..0ac0355d16d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -11,6 +11,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
@@ -248,7 +249,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
 	if (err)
 		goto out2;
 
-	kobject_hotplug(&p->kobj, KOBJ_ADD);
+	kobject_uevent(&p->kobj, KOBJ_ADD);
 	return 0;
  out2:
 	kobject_del(&p->kobj);
@@ -260,7 +261,7 @@ void br_sysfs_removeif(struct net_bridge_port *p)
 {
 	pr_debug("br_sysfs_removeif\n");
 	sysfs_remove_link(&p->br->ifobj, p->dev->name);
-	kobject_hotplug(&p->kobj, KOBJ_REMOVE);
+	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 }
 
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index c70b3be2302..b84fc6075fe 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -196,9 +196,13 @@ config BRIDGE_EBT_LOG
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config BRIDGE_EBT_ULOG
-	tristate "ebt: ulog support"
+	tristate "ebt: ulog support (OBSOLETE)"
 	depends on BRIDGE_NF_EBTABLES
 	help
+	  This option enables the old bridge-specific "ebt_ulog" implementation
+	  which has been obsoleted by the new "nfnetlink_log" code (see
+	  CONFIG_NETFILTER_NETLINK_LOG).
+
 	  This option adds the ulog watcher, that you can use in any rule
 	  in any ebtables table. The packet is passed to a userspace
 	  logging daemon using netlink multicast sockets. This differs
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 7323805b972..dc5d0b2427c 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -15,6 +15,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ip.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/in.h>
 #include <linux/module.h>
 
@@ -51,6 +52,8 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
 		if (!(info->bitmask & EBT_IP_DPORT) &&
 		    !(info->bitmask & EBT_IP_SPORT))
 			return EBT_MATCH;
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			return EBT_NOMATCH;
 		pptr = skb_header_pointer(skb, ih->ihl*4,
 					  sizeof(_ports), &_ports);
 		if (pptr == NULL)
@@ -89,7 +92,9 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask,
 		if (info->invflags & EBT_IP_PROTO)
 			return -EINVAL;
 		if (info->protocol != IPPROTO_TCP &&
-		    info->protocol != IPPROTO_UDP)
+		    info->protocol != IPPROTO_UDP &&
+		    info->protocol != IPPROTO_SCTP &&
+		    info->protocol != IPPROTO_DCCP)
 			 return -EINVAL;
 	}
 	if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1])
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 662975be3d1..0128fbbe232 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -3,15 +3,19 @@
  *
  *	Authors:
  *	Bart De Schuymer <bdschuym@pandora.be>
+ *	Harald Welte <laforge@netfilter.org>
  *
  *  April, 2002
  *
  */
 
+#include <linux/in.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_log.h>
+#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/ip.h>
+#include <linux/in.h>
 #include <linux/if_arp.h>
 #include <linux/spinlock.h>
 
@@ -55,27 +59,30 @@ static void print_MAC(unsigned char *p)
 }
 
 #define myNIPQUAD(a) a[0], a[1], a[2], a[3]
-static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static void
+ebt_log_packet(unsigned int pf, unsigned int hooknum,
+   const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const struct nf_loginfo *loginfo,
+   const char *prefix)
 {
-	struct ebt_log_info *info = (struct ebt_log_info *)data;
-	char level_string[4] = "< >";
+	unsigned int bitmask;
 
-	level_string[1] = '0' + info->loglevel;
 	spin_lock_bh(&ebt_log_lock);
-	printk(level_string);
-	printk("%s IN=%s OUT=%s ", info->prefix, in ? in->name : "",
-	   out ? out->name : "");
+	printk("<%c>%s IN=%s OUT=%s MAC source = ", '0' + loginfo->u.log.level,
+	       prefix, in ? in->name : "", out ? out->name : "");
 
-	printk("MAC source = ");
 	print_MAC(eth_hdr(skb)->h_source);
 	printk("MAC dest = ");
 	print_MAC(eth_hdr(skb)->h_dest);
 
 	printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto));
 
-	if ((info->bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
+	if (loginfo->type == NF_LOG_TYPE_LOG)
+		bitmask = loginfo->u.log.logflags;
+	else
+		bitmask = NF_LOG_MASK;
+
+	if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
 	   htons(ETH_P_IP)){
 		struct iphdr _iph, *ih;
 
@@ -84,12 +91,13 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 			printk(" INCOMPLETE IP header");
 			goto out;
 		}
-		printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,",
-		   NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
-		printk(" IP tos=0x%02X, IP proto=%d", ih->tos,
-		       ih->protocol);
+		printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP "
+		       "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr),
+		       NIPQUAD(ih->daddr), ih->tos, ih->protocol);
 		if (ih->protocol == IPPROTO_TCP ||
-		    ih->protocol == IPPROTO_UDP) {
+		    ih->protocol == IPPROTO_UDP ||
+		    ih->protocol == IPPROTO_SCTP ||
+		    ih->protocol == IPPROTO_DCCP) {
 			struct tcpudphdr _ports, *pptr;
 
 			pptr = skb_header_pointer(skb, ih->ihl*4,
@@ -104,7 +112,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 		goto out;
 	}
 
-	if ((info->bitmask & EBT_LOG_ARP) &&
+	if ((bitmask & EBT_LOG_ARP) &&
 	    ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
 	     (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) {
 		struct arphdr _arph, *ah;
@@ -144,6 +152,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 out:
 	printk("\n");
 	spin_unlock_bh(&ebt_log_lock);
+
+}
+
+static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_log_info *info = (struct ebt_log_info *)data;
+	struct nf_loginfo li;
+
+	li.type = NF_LOG_TYPE_LOG;
+	li.u.log.level = info->loglevel;
+	li.u.log.logflags = info->bitmask;
+
+	nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix);
 }
 
 static struct ebt_watcher log =
@@ -154,13 +177,32 @@ static struct ebt_watcher log =
 	.me		= THIS_MODULE,
 };
 
+static struct nf_logger ebt_log_logger = {
+	.name 		= "ebt_log",
+	.logfn		= &ebt_log_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __init init(void)
 {
-	return ebt_register_watcher(&log);
+	int ret;
+
+	ret = ebt_register_watcher(&log);
+	if (ret < 0)
+		return ret;
+	if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
+		printk(KERN_WARNING "ebt_log: not logging via system console "
+		       "since somebody else already registered for PF_INET\n");
+		/* we cannot make module load fail here, since otherwise 
+		 * ebtables userspace would abort */
+	}
+
+	return 0;
 }
 
 static void __exit fini(void)
 {
+	nf_log_unregister_logger(&ebt_log_logger);
 	ebt_unregister_watcher(&log);
 }
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index f8a8cdec16e..0248c67277e 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -10,6 +10,7 @@
 
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_stp.h>
+#include <linux/etherdevice.h>
 #include <linux/module.h>
 
 #define BPDU_TYPE_CONFIG 0
@@ -164,8 +165,8 @@ static int ebt_stp_check(const char *tablename, unsigned int hookmask,
 	if (datalen != len)
 		return -EINVAL;
 	/* Make sure the match only receives stp frames */
-	if (memcmp(e->destmac, bridge_ula, ETH_ALEN) ||
-	    memcmp(e->destmsk, msk, ETH_ALEN) || !(e->bitmask & EBT_DESTMAC))
+	if (compare_ether_addr(e->destmac, bridge_ula) ||
+	    compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
 		return -EINVAL;
 
 	return 0;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index aae26ae2e61..ce617b3dbbb 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -3,6 +3,7 @@
  *
  *	Authors:
  *	Bart De Schuymer <bdschuym@pandora.be>
+ *	Harald Welte <laforge@netfilter.org>
  *
  *  November, 2004
  *
@@ -115,14 +116,13 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 	return skb;
 }
 
-static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
+static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+   const struct ebt_ulog_info *uloginfo, const char *prefix)
 {
 	ebt_ulog_packet_msg_t *pm;
 	size_t size, copy_len;
 	struct nlmsghdr *nlh;
-	struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
 	unsigned int group = uloginfo->nlgroup;
 	ebt_ulog_buff_t *ub = &ulog_buffers[group];
 	spinlock_t *lock = &ub->lock;
@@ -216,6 +216,39 @@ alloc_failure:
 	goto unlock;
 }
 
+/* this function is registered with the netfilter core */
+static void ebt_log_packet(unsigned int pf, unsigned int hooknum,
+   const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const struct nf_loginfo *li,
+   const char *prefix)
+{
+	struct ebt_ulog_info loginfo;
+
+	if (!li || li->type != NF_LOG_TYPE_ULOG) {
+		loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP;
+		loginfo.cprange = 0;
+		loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD;
+		loginfo.prefix[0] = '\0';
+	} else {
+		loginfo.nlgroup = li->u.ulog.group;
+		loginfo.cprange = li->u.ulog.copy_len;
+		loginfo.qthreshold = li->u.ulog.qthreshold;
+		strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
+	}
+
+	ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+}
+
+static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
+
+	ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
+}
+
+
 static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
@@ -240,6 +273,12 @@ static struct ebt_watcher ulog = {
 	.me		= THIS_MODULE,
 };
 
+static struct nf_logger ebt_ulog_logger = {
+	.name		= EBT_ULOG_WATCHER,
+	.logfn		= &ebt_log_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __init init(void)
 {
 	int i, ret = 0;
@@ -265,6 +304,13 @@ static int __init init(void)
 	else if ((ret = ebt_register_watcher(&ulog)))
 		sock_release(ebtulognl->sk_socket);
 
+	if (nf_log_register(PF_BRIDGE, &ebt_ulog_logger) < 0) {
+		printk(KERN_WARNING "ebt_ulog: not logging via ulog "
+		       "since somebody else already registered for PF_BRIDGE\n");
+		/* we cannot make module load fail here, since otherwise
+		 * ebtables userspace would abort */
+	}
+
 	return ret;
 }
 
@@ -273,6 +319,7 @@ static void __exit fini(void)
 	ebt_ulog_buff_t *ub;
 	int i;
 
+	nf_log_unregister_logger(&ebt_ulog_logger);
 	ebt_unregister_watcher(&ulog);
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f8ffbf6e233..00729b3604f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -944,7 +944,7 @@ static int do_replace(void __user *user, unsigned int len)
 	if (countersize)
 		memset(newinfo->counters, 0, countersize);
 
-	newinfo->entries = (char *)vmalloc(tmp.entries_size);
+	newinfo->entries = vmalloc(tmp.entries_size);
 	if (!newinfo->entries) {
 		ret = -ENOMEM;
 		goto free_newinfo;
@@ -1146,7 +1146,7 @@ int ebt_register_table(struct ebt_table *table)
 	if (!newinfo)
 		return -ENOMEM;
 
-	newinfo->entries = (char *)vmalloc(table->table->entries_size);
+	newinfo->entries = vmalloc(table->table->entries_size);
 	if (!(newinfo->entries))
 		goto free_newinfo;
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 1bcfef51ac5..f8d322e1ea9 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -47,6 +47,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/poll.h>
 #include <linux/highmem.h>
+#include <linux/spinlock.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -200,6 +201,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
+ *	skb_kill_datagram - Free a datagram skbuff forcibly
+ *	@sk: socket
+ *	@skb: datagram skbuff
+ *	@flags: MSG_ flags
+ *
+ *	This function frees a datagram skbuff that was received by
+ *	skb_recv_datagram.  The flags argument must match the one
+ *	used for skb_recv_datagram.
+ *
+ *	If the MSG_PEEK flag is set, and the packet is still on the
+ *	receive queue of the socket, it will be taken off the queue
+ *	before it is freed.
+ *
+ *	This function currently only disables BH when acquiring the
+ *	sk_receive_queue lock.  Therefore it must not be used in a
+ *	context where that lock is acquired in an IRQ context.
+ */
+
+void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+{
+	if (flags & MSG_PEEK) {
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			atomic_dec(&skb->users);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+
+	kfree_skb(skb);
+}
+
+EXPORT_SYMBOL(skb_kill_datagram);
+
+/**
  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
  *	@skb: buffer to copy
  *	@offset: offset in the buffer to start copying from
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b48e294aaf..fd070a098f2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -75,6 +75,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/cpu.h>
 #include <linux/types.h>
@@ -626,7 +627,7 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
  *	Network device names need to be valid file names to
  *	to allow sysfs to work
  */
-static int dev_valid_name(const char *name)
+int dev_valid_name(const char *name)
 {
 	return !(*name == '\0' 
 		 || !strcmp(name, ".")
@@ -1092,15 +1093,12 @@ int skb_checksum_help(struct sk_buff *skb, int inward)
 			goto out;
 	}
 
-	if (offset > (int)skb->len)
-		BUG();
+	BUG_ON(offset > (int)skb->len);
 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
 	offset = skb->tail - skb->h.raw;
-	if (offset <= 0)
-		BUG();
-	if (skb->csum + 2 > offset)
-		BUG();
+	BUG_ON(offset <= 0);
+	BUG_ON(skb->csum + 2 > offset);
 
 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
 	skb->ip_summed = CHECKSUM_NONE;
@@ -1113,7 +1111,8 @@ out:
 void netdev_rx_csum_fault(struct net_device *dev)
 {
 	if (net_ratelimit()) {
-		printk(KERN_ERR "%s: hw csum failure.\n", dev->name);
+		printk(KERN_ERR "%s: hw csum failure.\n", 
+			dev ? dev->name : "<unknown>");
 		dump_stack();
 	}
 }
@@ -3269,13 +3268,13 @@ EXPORT_SYMBOL(__dev_get_by_index);
 EXPORT_SYMBOL(__dev_get_by_name);
 EXPORT_SYMBOL(__dev_remove_pack);
 EXPORT_SYMBOL(__skb_linearize);
+EXPORT_SYMBOL(dev_valid_name);
 EXPORT_SYMBOL(dev_add_pack);
 EXPORT_SYMBOL(dev_alloc_name);
 EXPORT_SYMBOL(dev_close);
 EXPORT_SYMBOL(dev_get_by_flags);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);
-EXPORT_SYMBOL(dev_ioctl);
 EXPORT_SYMBOL(dev_open);
 EXPORT_SYMBOL(dev_queue_xmit);
 EXPORT_SYMBOL(dev_remove_pack);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index cb530eef0e3..05d60850840 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -158,7 +158,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 	int err = 0;
 	struct dev_mc_list *dmi, *dmi1;
 
-	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+	dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
 
 	spin_lock_bh(&dev->xmit_lock);
 	for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
diff --git a/net/core/dv.c b/net/core/dv.c
index 3f25f4aa4e6..cf581407538 100644
--- a/net/core/dv.c
+++ b/net/core/dv.c
@@ -24,6 +24,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <net/dst.h>
@@ -457,7 +458,7 @@ void divert_frame(struct sk_buff *skb)
 	unsigned char			*skb_data_end = skb->data + skb->len;
 
 	/* Packet is already aimed at us, return */
-	if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN))
+	if (!compare_ether_addr(eth->h_dest, skb->dev->dev_addr))
 		return;
 	
 	/* proto is not IP, do nothing */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0350586e919..e6f76106a99 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index 2841bfce29d..9540946a48f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -13,6 +13,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Andi Kleen - Fix a few bad bugs and races.
+ * Kris Katterjohn - Added many additional checks in sk_chk_filter()
  */
 
 #include <linux/module.h>
@@ -73,8 +74,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
  * filtering, filter is the array of filter instructions, and
  * len is the number of filter blocks in the array.
  */
- 
-int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
 	struct sock_filter *fentry;	/* We walk down these */
 	void *ptr;
@@ -174,7 +174,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 			continue;
 		case BPF_LD|BPF_W|BPF_ABS:
 			k = fentry->k;
- load_w:
+load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
 				A = ntohl(*(u32 *)ptr);
@@ -183,7 +183,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 			break;
 		case BPF_LD|BPF_H|BPF_ABS:
 			k = fentry->k;
- load_h:
+load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
 				A = ntohs(*(u16 *)ptr);
@@ -240,9 +240,9 @@ load_b:
 			A = X;
 			continue;
 		case BPF_RET|BPF_K:
-			return ((unsigned int)fentry->k);
+			return fentry->k;
 		case BPF_RET|BPF_A:
-			return ((unsigned int)A);
+			return A;
 		case BPF_ST:
 			mem[fentry->k] = A;
 			continue;
@@ -250,7 +250,7 @@ load_b:
 			mem[fentry->k] = X;
 			continue;
 		default:
-			/* Invalid instruction counts as RET */
+			WARN_ON(1);
 			return 0;
 		}
 
@@ -283,64 +283,107 @@ load_b:
  *
  * Check the user's filter code. If we let some ugly
  * filter code slip through kaboom! The filter must contain
- * no references or jumps that are out of range, no illegal instructions
- * and no backward jumps. It must end with a RET instruction
+ * no references or jumps that are out of range, no illegal
+ * instructions, and must end with a RET instruction.
  *
- * Returns 0 if the rule set is legal or a negative errno code if not.
+ * All jumps are forward as they are not signed.
+ *
+ * Returns 0 if the rule set is legal or -EINVAL if not.
  */
 int sk_chk_filter(struct sock_filter *filter, int flen)
 {
 	struct sock_filter *ftest;
 	int pc;
 
-	if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
+	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
 
 	/* check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
-		/* all jumps are forward as they are not signed */
 		ftest = &filter[pc];
-		if (BPF_CLASS(ftest->code) == BPF_JMP) {
-			/* but they mustn't jump off the end */
-			if (BPF_OP(ftest->code) == BPF_JA) {
-				/*
-				 * Note, the large ftest->k might cause loops.
-				 * Compare this with conditional jumps below,
-				 * where offsets are limited. --ANK (981016)
-				 */
-				if (ftest->k >= (unsigned)(flen-pc-1))
-					return -EINVAL;
-			} else {
-				/* for conditionals both must be safe */
- 				if (pc + ftest->jt +1 >= flen ||
-				    pc + ftest->jf +1 >= flen)
-					return -EINVAL;
-			}
-		}
 
-		/* check for division by zero   -Kris Katterjohn 2005-10-30 */
-		if (ftest->code == (BPF_ALU|BPF_DIV|BPF_K) && ftest->k == 0)
-			return -EINVAL;
+		/* Only allow valid instructions */
+		switch (ftest->code) {
+		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_ALU|BPF_NEG:
+		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_LD|BPF_IMM:
+		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_LDX|BPF_IMM:
+		case BPF_MISC|BPF_TAX:
+		case BPF_MISC|BPF_TXA:
+		case BPF_RET|BPF_K:
+		case BPF_RET|BPF_A:
+			break;
+
+		/* Some instructions need special checks */
 
-		/* check that memory operations use valid addresses. */
-		if (ftest->k >= BPF_MEMWORDS) {
-			/* but it might not be a memory operation... */
-			switch (ftest->code) {
-			case BPF_ST:	
-			case BPF_STX:	
-			case BPF_LD|BPF_MEM:	
-			case BPF_LDX|BPF_MEM:	
+		case BPF_ALU|BPF_DIV|BPF_K:
+			/* check for division by zero */
+			if (ftest->k == 0)
 				return -EINVAL;
-			}
+			break;
+
+		case BPF_LD|BPF_MEM:
+		case BPF_LDX|BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			/* check for invalid memory addresses */
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			break;
+
+		case BPF_JMP|BPF_JA:
+			/*
+			 * Note, the large ftest->k might cause loops.
+			 * Compare this with conditional jumps below,
+			 * where offsets are limited. --ANK (981016)
+			 */
+			if (ftest->k >= (unsigned)(flen-pc-1))
+				return -EINVAL;
+			break;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_JMP|BPF_JSET|BPF_X:
+			/* for conditionals both must be safe */
+			if (pc + ftest->jt + 1 >= flen ||
+			    pc + ftest->jf + 1 >= flen)
+				return -EINVAL;
+			break;
+
+		default:
+			return -EINVAL;
 		}
 	}
 
-	/*
-	 * The program must end with a return. We don't care where they
-	 * jumped within the script (its always forwards) but in the end
-	 * they _will_ hit this.
-	 */
-        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 
 /**
@@ -360,8 +403,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	int err;
 
 	/* Make sure new filter is there and in the right amounts. */
-        if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
-                return -EINVAL;
+	if (fprog->filter == NULL)
+		return -EINVAL;
 
 	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
 	if (!fp)
diff --git a/net/core/flow.c b/net/core/flow.c
index 7e95b39de9f..c4f25385029 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -23,6 +23,7 @@
 #include <net/flow.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
+#include <linux/security.h>
 
 struct flow_cache_entry {
 	struct flow_cache_entry	*next;
@@ -30,6 +31,7 @@ struct flow_cache_entry {
 	u8			dir;
 	struct flowi		key;
 	u32			genid;
+	u32			sk_sid;
 	void			*object;
 	atomic_t		*object_ref;
 };
@@ -162,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -186,6 +188,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
+		    fle->sk_sid == sk_sid &&
 		    flow_key_compare(key, &fle->key) == 0) {
 			if (fle->genid == atomic_read(&flow_cache_genid)) {
 				void *ret = fle->object;
@@ -210,6 +213,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
+			fle->sk_sid = sk_sid;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
 			flow_count(cpu)++;
@@ -221,7 +225,7 @@ nocache:
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, family, dir, &obj, &obj_ref);
+		resolver(key, sk_sid, family, dir, &obj, &obj_ref);
 
 		if (fle) {
 			fle->genid = atomic_read(&flow_cache_genid);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e2137f3e489..e8b2acbc8ea 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -9,6 +9,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
@@ -16,6 +17,7 @@
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
+#include <net/iw_handler.h>
 
 #define to_class_dev(obj) container_of(obj,struct class_device,kobj)
 #define to_net_dev(class) container_of(class, struct net_device, class_dev)
@@ -84,16 +86,11 @@ static ssize_t netdev_store(struct class_device *dev,
 	return ret;
 }
 
-/* generate a read-only network device class attribute */
-#define NETDEVICE_ATTR(field, format_string)				\
-NETDEVICE_SHOW(field, format_string)					\
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL)		\
-
-NETDEVICE_ATTR(addr_len, fmt_dec);
-NETDEVICE_ATTR(iflink, fmt_dec);
-NETDEVICE_ATTR(ifindex, fmt_dec);
-NETDEVICE_ATTR(features, fmt_long_hex);
-NETDEVICE_ATTR(type, fmt_dec);
+NETDEVICE_SHOW(addr_len, fmt_dec);
+NETDEVICE_SHOW(iflink, fmt_dec);
+NETDEVICE_SHOW(ifindex, fmt_dec);
+NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(type, fmt_dec);
 
 /* use same locking rules as GIFHWADDR ioctl's */
 static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
@@ -136,10 +133,6 @@ static ssize_t show_carrier(struct class_device *dev, char *buf)
 	return -EINVAL;
 }
 
-static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
-static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-
 /* read-write attributes */
 NETDEVICE_SHOW(mtu, fmt_dec);
 
@@ -153,8 +146,6 @@ static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len)
 	return netdev_store(dev, buf, len, change_mtu);
 }
 
-static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu);
-
 NETDEVICE_SHOW(flags, fmt_hex);
 
 static int change_flags(struct net_device *net, unsigned long new_flags)
@@ -167,8 +158,6 @@ static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len
 	return netdev_store(dev, buf, len, change_flags);
 }
 
-static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
-
 NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
 
 static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
@@ -182,9 +171,6 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
 	return netdev_store(dev, buf, len, change_tx_queue_len);
 }
 
-static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 
-			 store_tx_queue_len);
-
 NETDEVICE_SHOW(weight, fmt_dec);
 
 static int change_weight(struct net_device *net, unsigned long new_weight)
@@ -198,24 +184,21 @@ static ssize_t store_weight(struct class_device *dev, const char *buf, size_t le
 	return netdev_store(dev, buf, len, change_weight);
 }
 
-static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, 
-			 store_weight);
-
-
-static struct class_device_attribute *net_class_attributes[] = {
-	&class_device_attr_ifindex,
-	&class_device_attr_iflink,
-	&class_device_attr_addr_len,
-	&class_device_attr_tx_queue_len,
-	&class_device_attr_features,
-	&class_device_attr_mtu,
-	&class_device_attr_flags,
-	&class_device_attr_weight,
-	&class_device_attr_type,
-	&class_device_attr_address,
-	&class_device_attr_broadcast,
-	&class_device_attr_carrier,
-	NULL
+static struct class_device_attribute net_class_attributes[] = {
+	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
+	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
+	__ATTR(features, S_IRUGO, show_features, NULL),
+	__ATTR(type, S_IRUGO, show_type, NULL),
+	__ATTR(address, S_IRUGO, show_address, NULL),
+	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
+	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
+	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
+	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
+	       store_tx_queue_len),
+	__ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
+	{}
 };
 
 /* Show a given an attribute in the statistics group */
@@ -313,13 +296,19 @@ static ssize_t wireless_show(struct class_device *cd, char *buf,
 					       char *))
 {
 	struct net_device *dev = to_net_dev(cd);
-	const struct iw_statistics *iw;
+	const struct iw_statistics *iw = NULL;
 	ssize_t ret = -EINVAL;
 	
 	read_lock(&dev_base_lock);
-	if (dev_isalive(dev) && dev->get_wireless_stats 
-	    && (iw = dev->get_wireless_stats(dev)) != NULL) 
-		ret = (*format)(iw, buf);
+	if (dev_isalive(dev)) {
+		if(dev->wireless_handlers &&
+		   dev->wireless_handlers->get_wireless_stats)
+			iw = dev->wireless_handlers->get_wireless_stats(dev);
+		else if (dev->get_wireless_stats)
+			iw = dev->get_wireless_stats(dev);
+		if (iw != NULL)
+			ret = (*format)(iw, buf);
+	}
 	read_unlock(&dev_base_lock);
 
 	return ret;
@@ -369,14 +358,14 @@ static struct attribute_group wireless_group = {
 #endif
 
 #ifdef CONFIG_HOTPLUG
-static int netdev_hotplug(struct class_device *cd, char **envp,
-			  int num_envp, char *buf, int size)
+static int netdev_uevent(struct class_device *cd, char **envp,
+			 int num_envp, char *buf, int size)
 {
 	struct net_device *dev = to_net_dev(cd);
 	int i = 0;
 	int n;
 
-	/* pass interface in env to hotplug. */
+	/* pass interface to uevent. */
 	envp[i++] = buf;
 	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
 	buf += n;
@@ -407,8 +396,9 @@ static void netdev_release(struct class_device *cd)
 static struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
+	.class_dev_attrs = net_class_attributes,
 #ifdef CONFIG_HOTPLUG
-	.hotplug = netdev_hotplug,
+	.uevent = netdev_uevent,
 #endif
 };
 
@@ -420,7 +410,8 @@ void netdev_unregister_sysfs(struct net_device * net)
 		sysfs_remove_group(&class_dev->kobj, &netstat_group);
 
 #ifdef WIRELESS_EXT
-	if (net->get_wireless_stats)
+	if (net->get_wireless_stats || (net->wireless_handlers &&
+			net->wireless_handlers->get_wireless_stats))
 		sysfs_remove_group(&class_dev->kobj, &wireless_group);
 #endif
 	class_device_del(class_dev);
@@ -431,8 +422,6 @@ void netdev_unregister_sysfs(struct net_device * net)
 int netdev_register_sysfs(struct net_device *net)
 {
 	struct class_device *class_dev = &(net->class_dev);
-	int i;
-	struct class_device_attribute *attr;
 	int ret;
 
 	class_dev->class = &net_class;
@@ -442,21 +431,17 @@ int netdev_register_sysfs(struct net_device *net)
 	if ((ret = class_device_register(class_dev)))
 		goto out;
 
-	for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) {
-		if ((ret = class_device_create_file(class_dev, attr)))
-		    goto out_unreg;
-	}
-
-
 	if (net->get_stats &&
 	    (ret = sysfs_create_group(&class_dev->kobj, &netstat_group)))
 		goto out_unreg; 
 
 #ifdef WIRELESS_EXT
-	if (net->get_wireless_stats &&
-	    (ret = sysfs_create_group(&class_dev->kobj, &wireless_group)))
-		goto out_cleanup; 
-
+	if (net->get_wireless_stats || (net->wireless_handlers &&
+			net->wireless_handlers->get_wireless_stats)) {
+		ret = sysfs_create_group(&class_dev->kobj, &wireless_group);
+		if (ret)
+			goto out_cleanup;
+	}
 	return 0;
 out_cleanup:
 	if (net->get_stats)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 49424a42a2c..ea51f8d02eb 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -13,6 +13,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/string.h>
+#include <linux/if_arp.h>
 #include <linux/inetdevice.h>
 #include <linux/inet.h>
 #include <linux/interrupt.h>
@@ -702,7 +703,7 @@ int netpoll_setup(struct netpoll *np)
 		}
 	}
 
-	if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
+	if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
 		memcpy(np->local_mac, ndev->dev_addr, 6);
 
 	if (!np->local_ip) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7fc3e9e28c3..da16f8fd149 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -116,13 +116,13 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/sched.h>
 #include <linux/unistd.h>
 #include <linux/string.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/capability.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/init.h>
@@ -139,6 +139,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/wait.h>
+#include <linux/etherdevice.h>
 #include <net/checksum.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
@@ -281,8 +282,8 @@ struct pktgen_dev {
         __u32 src_mac_count; /* How many MACs to iterate through */
         __u32 dst_mac_count; /* How many MACs to iterate through */
         
-        unsigned char dst_mac[6];
-        unsigned char src_mac[6];
+        unsigned char dst_mac[ETH_ALEN];
+        unsigned char src_mac[ETH_ALEN];
         
         __u32 cur_dst_mac_offset;
         __u32 cur_src_mac_offset;
@@ -473,7 +474,6 @@ static char version[] __initdata = VERSION;
 
 static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
 static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
-static struct pktgen_thread* pktgen_find_thread(const char* name);
 static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname);
 static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
 static void pktgen_run_all_threads(void);
@@ -487,9 +487,9 @@ static unsigned int fmt_ip6(char *s,const char ip[16]);
 
 /* Module parameters, defaults. */
 static int pg_count_d = 1000; /* 1000 pkts by default */
-static int pg_delay_d = 0;
-static int pg_clone_skb_d = 0;
-static int debug = 0;
+static int pg_delay_d;
+static int pg_clone_skb_d;
+static int debug;
 
 static DECLARE_MUTEX(pktgen_sem);
 static struct pktgen_thread *pktgen_threads = NULL;
@@ -595,16 +595,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 
 	seq_puts(seq, "     src_mac: ");
 
-	if ((pkt_dev->src_mac[0] == 0) && 
-	    (pkt_dev->src_mac[1] == 0) && 
-	    (pkt_dev->src_mac[2] == 0) && 
-	    (pkt_dev->src_mac[3] == 0) && 
-	    (pkt_dev->src_mac[4] == 0) && 
-	    (pkt_dev->src_mac[5] == 0)) 
-
+	if (is_zero_ether_addr(pkt_dev->src_mac))
 		for (i = 0; i < 6; i++) 
 			seq_printf(seq,  "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? "  " : ":");
-
 	else 
 		for (i = 0; i < 6; i++) 
 			seq_printf(seq,  "%02X%s", pkt_dev->src_mac[i], i == 5 ? "  " : ":");
@@ -1190,9 +1183,9 @@ static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer
 	}
 	if (!strcmp(name, "dst_mac")) {
 		char *v = valstr;
-                unsigned char old_dmac[6];
+		unsigned char old_dmac[ETH_ALEN];
 		unsigned char *m = pkt_dev->dst_mac;
-                memcpy(old_dmac, pkt_dev->dst_mac, 6);
+		memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN);
                 
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
                 if (len < 0) { return len; }
@@ -1221,8 +1214,8 @@ static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer
 		}
 
 		/* Set up Dest MAC */
-                if (memcmp(old_dmac, pkt_dev->dst_mac, 6) != 0) 
-                        memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6);
+		if (compare_ether_addr(old_dmac, pkt_dev->dst_mac))
+			memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
                 
 		sprintf(pg_result, "OK: dstmac");
 		return count;
@@ -1561,17 +1554,11 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
         
         /* Default to the interface's mac if not explicitly set. */
 
-	if ((pkt_dev->src_mac[0] == 0) && 
-	    (pkt_dev->src_mac[1] == 0) && 
-	    (pkt_dev->src_mac[2] == 0) && 
-	    (pkt_dev->src_mac[3] == 0) && 
-	    (pkt_dev->src_mac[4] == 0) && 
-	    (pkt_dev->src_mac[5] == 0)) {
+	if (is_zero_ether_addr(pkt_dev->src_mac))
+	       memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN);
 
-	       memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, 6);
-       }
         /* Set up Dest MAC */
-        memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6);
+	memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
 
         /* Set up pkt size */
         pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size;
@@ -1873,13 +1860,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	 */
 	mod_cur_headers(pkt_dev);
 
-	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
+	datalen = (odev->hard_header_len + 16) & ~0xf;
+	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen, GFP_ATOMIC);
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
 	}
 
-	skb_reserve(skb, 16);
+	skb_reserve(skb, datalen);
 
 	/*  Reserve for ethernet and IP header  */
 	eth = (__u8 *) skb_push(skb, 14);
@@ -2883,7 +2871,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname)
 	return add_dev_to_thread(t, pkt_dev);
 }
 
-static struct pktgen_thread *pktgen_find_thread(const char* name) 
+static struct pktgen_thread * __init pktgen_find_thread(const char* name) 
 {
         struct pktgen_thread *t = NULL;
 
@@ -2900,7 +2888,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name)
         return t;
 }
 
-static int pktgen_create_thread(const char* name, int cpu) 
+static int __init pktgen_create_thread(const char* name, int cpu) 
 {
         struct pktgen_thread *t = NULL;
 	struct proc_dir_entry *pe;
diff --git a/net/core/scm.c b/net/core/scm.c
index e887d19be50..649d01ef35b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/signal.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b7d13a4fff4..d0732e9c856 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int fclone)
 {
+	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	u8 *data;
 
 	/* Get the HEAD */
-	if (fclone)
-		skb = kmem_cache_alloc(skbuff_fclone_cache,
-				       gfp_mask & ~__GFP_DMA);
-	else
-		skb = kmem_cache_alloc(skbuff_head_cache,
-				       gfp_mask & ~__GFP_DMA);
-
+	skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache,
+				gfp_mask & ~__GFP_DMA);
 	if (!skb)
 		goto out;
 
@@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	atomic_set(&shinfo->dataref, 1);
+	shinfo->nr_frags  = 0;
+	shinfo->tso_size = 0;
+	shinfo->tso_segs = 0;
+	shinfo->ufo_size = 0;
+	shinfo->ip6_frag_id = 0;
+	shinfo->frag_list = NULL;
+
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
-	atomic_set(&(skb_shinfo(skb)->dataref), 1);
-	skb_shinfo(skb)->nr_frags  = 0;
-	skb_shinfo(skb)->tso_size = 0;
-	skb_shinfo(skb)->tso_segs = 0;
-	skb_shinfo(skb)->frag_list = NULL;
-	skb_shinfo(skb)->ufo_size = 0;
-	skb_shinfo(skb)->ip6_frag_id = 0;
 out:
 	return skb;
 nodata:
@@ -792,8 +791,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
 		int end = offset + skb_shinfo(skb)->frags[i].size;
 		if (end > len) {
 			if (skb_cloned(skb)) {
-				if (!realloc)
-					BUG();
+				BUG_ON(!realloc);
 				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 					return -ENOMEM;
 			}
@@ -895,8 +893,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 		struct sk_buff *insp = NULL;
 
 		do {
-			if (!list)
-				BUG();
+			BUG_ON(!list);
 
 			if (list->len <= eat) {
 				/* Eaten as whole. */
@@ -1200,8 +1197,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset,
 			start = end;
 		}
 	}
-	if (len)
-		BUG();
+	BUG_ON(len);
 
 	return csum;
 }
@@ -1283,8 +1279,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 			start = end;
 		}
 	}
-	if (len)
-		BUG();
+	BUG_ON(len);
 	return csum;
 }
 
@@ -1298,8 +1293,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	else
 		csstart = skb_headlen(skb);
 
-	if (csstart > skb_headlen(skb))
-		BUG();
+	BUG_ON(csstart > skb_headlen(skb));
 
 	memcpy(to, skb->data, csstart);
 
@@ -1725,7 +1719,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
  * of the skb if any page alloc fails user this procedure returns  -ENOMEM
  */
 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int getfrag(void *from, char *to, int offset,
+			int (*getfrag)(void *from, char *to, int offset,
 					int len, int odd, struct sk_buff *skb),
 			void *from, int length)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 13cc3be4f05..6e00811d44b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -91,6 +91,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -1488,7 +1489,7 @@ int proto_register(struct proto *prot, int alloc_slab)
 			}
 		}
 
-		if (prot->twsk_obj_size) {
+		if (prot->twsk_prot != NULL) {
 			static const char mask[] = "tw_sock_%s";
 
 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
@@ -1497,11 +1498,12 @@ int proto_register(struct proto *prot, int alloc_slab)
 				goto out_free_request_sock_slab;
 
 			sprintf(timewait_sock_slab_name, mask, prot->name);
-			prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
-							    prot->twsk_obj_size,
-							    0, SLAB_HWCACHE_ALIGN,
-							    NULL, NULL);
-			if (prot->twsk_slab == NULL)
+			prot->twsk_prot->twsk_slab =
+				kmem_cache_create(timewait_sock_slab_name,
+						  prot->twsk_prot->twsk_obj_size,
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL, NULL);
+			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
 		}
 	}
@@ -1548,12 +1550,12 @@ void proto_unregister(struct proto *prot)
 		prot->rsk_prot->slab = NULL;
 	}
 
-	if (prot->twsk_slab != NULL) {
-		const char *name = kmem_cache_name(prot->twsk_slab);
+	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
+		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
 
-		kmem_cache_destroy(prot->twsk_slab);
+		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
 		kfree(name);
-		prot->twsk_slab = NULL;
+		prot->twsk_prot->twsk_slab = NULL;
 	}
 }
 
diff --git a/net/core/stream.c b/net/core/stream.c
index 15bfd03e802..35e25259fd9 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -55,8 +55,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 	int done;
 
 	do {
-		if (sk->sk_err)
-			return sock_error(sk);
+		int err = sock_error(sk);
+		if (err)
+			return err;
 		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
 			return -EPIPE;
 		if (!*timeo_p)
@@ -67,6 +68,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		sk->sk_write_pending++;
 		done = sk_wait_event(sk, timeo_p,
+				     !sk->sk_err &&
 				     !((1 << sk->sk_state) & 
 				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
 		finish_wait(sk->sk_sleep, &wait);
@@ -137,7 +139,9 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
-		sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+		sk_wait_event(sk, &current_timeo, !sk->sk_err && 
+						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+						  sk_stream_memory_free(sk) &&
 						  vm_wait);
 		sk->sk_write_pending--;
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 7b5970fc9e4..ac1d1fcf867 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL(net_srandom);
  * is otherwise not dependent on the TCP/IP stack.
  */
 
-__u32 in_aton(const char *str)
+__be32 in_aton(const char *str)
 {
 	unsigned long l;
 	unsigned int val;
@@ -175,7 +175,7 @@ __u32 in_aton(const char *str)
 		if (*str != '\0')
 		{
 			val = 0;
-			while (*str != '\0' && *str != '.')
+			while (*str != '\0' && *str != '.' && *str != '\n')
 			{
 				val *= 10;
 				val += *str - '0';
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 271ddb35b0b..2add7ed609e 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -78,6 +78,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>			/* for __init */
 #include <linux/if_arp.h>		/* ARPHRD_ETHER */
+#include <linux/etherdevice.h>		/* compare_ether_addr */
 
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
@@ -1506,7 +1507,7 @@ void wireless_spy_update(struct net_device *	dev,
 
 	/* Update all records that match */
 	for(i = 0; i < spydata->spy_number; i++)
-		if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) {
+		if(!compare_ether_addr(address, spydata->spy_address[i])) {
 			memcpy(&(spydata->spy_stat[i]), wstats,
 			       sizeof(struct iw_quality));
 			match = i;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 344a8da153f..87b27fff6e3 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,7 @@
+obj-$(CONFIG_IPV6) += dccp_ipv6.o
+
+dccp_ipv6-y := ipv6.o
+
 obj-$(CONFIG_IP_DCCP) += dccp.o
 
 dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index c9a62cca22f..2c77dafbd09 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -55,8 +55,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	from = av->dccpav_buf + av->dccpav_buf_head;
 
 	/* Check if buf_head wraps */
-	if (av->dccpav_buf_head + len > av->dccpav_vec_len) {
-		const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head);
+	if ((int)av->dccpav_buf_head + len > av->dccpav_vec_len) {
+		const u32 tailsize = av->dccpav_vec_len - av->dccpav_buf_head;
 
 		memcpy(to, from, tailsize);
 		to   += tailsize;
@@ -93,8 +93,14 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
 				      const gfp_t priority)
 {
-	struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
+	struct dccp_ackvec *av;
 
+	BUG_ON(len == 0);
+
+	if (len > DCCP_MAX_ACKVEC_LEN)
+		return NULL;
+
+	av = kmalloc(sizeof(*av) + len, priority);
 	if (av != NULL) {
 		av->dccpav_buf_len	= len;
 		av->dccpav_buf_head	=
@@ -117,13 +123,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
 }
 
 static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
-				   const unsigned int index)
+				   const u8 index)
 {
 	return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
 }
 
 static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
-				 const unsigned int index)
+				 const u8 index)
 {
 	return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
 }
@@ -135,10 +141,10 @@ static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
  */
 static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 						 const unsigned int packets,
-						  const unsigned char state)
+						 const unsigned char state)
 {
 	unsigned int gap;
-	signed long new_head;
+	long new_head;
 
 	if (av->dccpav_vec_len + packets > av->dccpav_buf_len)
 		return -ENOBUFS;
@@ -223,7 +229,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 *	could reduce the complexity of this scan.)
 		 */
 		u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
-		unsigned int index = av->dccpav_buf_head;
+		u8 index = av->dccpav_buf_head;
 
 		while (1) {
 			const u8 len = dccp_ackvec_len(av, index);
@@ -291,7 +297,7 @@ void dccp_ackvec_print(const struct dccp_ackvec *av)
 }
 #endif
 
-static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
+static void dccp_ackvec_throw_away_ack_record(struct dccp_ackvec *av)
 {
 	/*
 	 * As we're keeping track of the ack vector size (dccpav_vec_len) and
@@ -301,9 +307,10 @@ static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
 	 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
 	 */
 #if 0
-	av->dccpav_buf_tail = av->dccpav_ack_ptr + 1;
-	if (av->dccpav_buf_tail >= av->dccpav_vec_len)
-		av->dccpav_buf_tail -= av->dccpav_vec_len;
+	u32 new_buf_tail = av->dccpav_ack_ptr + 1;
+	if (new_buf_tail >= av->dccpav_vec_len)
+		new_buf_tail -= av->dccpav_vec_len;
+	av->dccpav_buf_tail = new_buf_tail;
 #endif
 	av->dccpav_vec_len -= av->dccpav_sent_len;
 }
@@ -326,7 +333,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
 			      debug_prefix, 1,
 			      (unsigned long long)av->dccpav_ack_seqno,
 			      (unsigned long long)av->dccpav_ack_ackno);
-		dccp_ackvec_trow_away_ack_record(av);
+		dccp_ackvec_throw_away_ack_record(av);
 		av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
 	}
 }
@@ -389,7 +396,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 					      av->dccpav_ack_seqno,
 					      (unsigned long long)
 					      av->dccpav_ack_ackno);
-				dccp_ackvec_trow_away_ack_record(av);
+				dccp_ackvec_throw_away_ack_record(av);
 			}
 			/*
 			 * If dccpav_ack_seqno was not received, no problem
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index d0fd6c60c57..f7dfb5f67b8 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -54,16 +54,16 @@
  * @dccpav_buf - circular buffer of acknowledgeable packets
  */
 struct dccp_ackvec {
-	unsigned int	dccpav_buf_head;
-	unsigned int	dccpav_buf_tail;
 	u64		dccpav_buf_ackno;
 	u64		dccpav_ack_seqno;
 	u64		dccpav_ack_ackno;
-	unsigned int	dccpav_ack_ptr;
-	unsigned int	dccpav_sent_len;
-	unsigned int	dccpav_vec_len;
-	unsigned int	dccpav_buf_len;
 	struct timeval	dccpav_time;
+	u8		dccpav_buf_head;
+	u8		dccpav_buf_tail;
+	u8		dccpav_ack_ptr;
+	u8		dccpav_sent_len;
+	u8		dccpav_vec_len;
+	u8		dccpav_buf_len;
 	u8		dccpav_buf_nonce;
 	u8		dccpav_ack_nonce;
 	u8		dccpav_buf[0];
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c37eeeaf5c6..de681c6ad08 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -21,6 +21,8 @@
 
 #define CCID_MAX 255
 
+struct tcp_info;
+
 struct ccid {
 	unsigned char	ccid_id;
 	const char	*ccid_name;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f97b85d55ad..93f26dd6e6c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -59,7 +59,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 
 #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
 
-extern struct proto dccp_v4_prot;
+extern struct proto dccp_prot;
 
 /* is seq1 < seq2 ? */
 static inline int before48(const u64 seq1, const u64 seq2)
@@ -228,6 +228,9 @@ extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				const struct dccp_hdr *dh, const unsigned len);
 
+extern int dccp_v4_init_sock(struct sock *sk);
+extern int dccp_v4_destroy_sock(struct sock *sk);
+
 extern void		dccp_close(struct sock *sk, long timeout);
 extern struct sk_buff	*dccp_make_response(struct sock *sk,
 					    struct dst_entry *dst,
@@ -238,6 +241,7 @@ extern struct sk_buff	*dccp_make_reset(struct sock *sk,
 
 extern int	   dccp_connect(struct sock *sk);
 extern int	   dccp_disconnect(struct sock *sk, int flags);
+extern void	   dccp_unhash(struct sock *sk);
 extern int	   dccp_getsockopt(struct sock *sk, int level, int optname,
 				   char __user *optval, int __user *optlen);
 extern int	   dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -249,6 +253,13 @@ extern int	   dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
 				struct msghdr *msg, size_t len, int nonblock,
 				int flags, int *addr_len);
 extern void	   dccp_shutdown(struct sock *sk, int how);
+extern int	   inet_dccp_listen(struct socket *sock, int backlog);
+extern unsigned int dccp_poll(struct file *file, struct socket *sock,
+			     poll_table *wait);
+extern void	   dccp_v4_send_check(struct sock *sk, int len,
+				      struct sk_buff *skb);
+extern int	   dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+				   int addr_len);
 
 extern int	   dccp_v4_checksum(const struct sk_buff *skb,
 				    const u32 saddr, const u32 daddr);
@@ -256,6 +267,17 @@ extern int	   dccp_v4_checksum(const struct sk_buff *skb,
 extern int	   dccp_v4_send_reset(struct sock *sk,
 				      enum dccp_reset_codes code);
 extern void	   dccp_send_close(struct sock *sk, const int active);
+extern int	   dccp_invalid_packet(struct sk_buff *skb);
+
+static inline int dccp_bad_service_code(const struct sock *sk,
+					const __u32 service)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+
+	if (dp->dccps_service == service)
+		return 0;
+	return !dccp_list_has_service(dp->dccps_service_list, service);
+}
 
 struct dccp_skb_cb {
 	__u8  dccpd_type:4;
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index f675d8e642d..3f78c00e382 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_retransmits	= icsk->icsk_retransmits;
 	info->tcpi_probes	= icsk->icsk_probes_out;
 	info->tcpi_backoff	= icsk->icsk_backoff;
-	info->tcpi_pmtu		= dp->dccps_pmtu_cookie;
+	info->tcpi_pmtu		= icsk->icsk_pmtu_cookie;
 
 	if (dp->dccps_options.dccpo_send_ack_vector)
 		info->tcpi_options |= TCPI_OPT_SACK;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3454d594190..b6cba72b44e 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -151,29 +151,12 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct dccp_hdr *dh, const unsigned len)
+static inline int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+					 const struct dccp_hdr *dh,
+					 const unsigned len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	if (dccp_check_seqno(sk, skb))
-		goto discard;
-
-	if (dccp_parse_options(sk, skb))
-		goto discard;
-
-	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-		dccp_event_ack_recv(sk, skb);
-
-	if (dp->dccps_options.dccpo_send_ack_vector &&
-	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq,
-			    DCCP_ACKVEC_STATE_RECEIVED))
-		goto discard;
-
-	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
-	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
-
 	switch (dccp_hdr(skb)->dccph_type) {
 	case DCCP_PKT_DATAACK:
 	case DCCP_PKT_DATA:
@@ -250,6 +233,37 @@ discard:
 	return 0;
 }
 
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			 const struct dccp_hdr *dh, const unsigned len)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	if (dccp_check_seqno(sk, skb))
+		goto discard;
+
+	if (dccp_parse_options(sk, skb))
+		goto discard;
+
+	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_event_ack_recv(sk, skb);
+
+	if (dp->dccps_options.dccpo_send_ack_vector &&
+	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+			    DCCP_SKB_CB(skb)->dccpd_seq,
+			    DCCP_ACKVEC_STATE_RECEIVED))
+		goto discard;
+
+	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+	return __dccp_rcv_established(sk, skb, dh, len);
+discard:
+	__kfree_skb(skb);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rcv_established);
+
 static int dccp_rcv_request_sent_state_process(struct sock *sk,
 					       struct sk_buff *skb,
 					       const struct dccp_hdr *dh,
@@ -286,6 +300,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
+                if (dp->dccps_options.dccpo_send_ack_vector &&
+                    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+                                    DCCP_SKB_CB(skb)->dccpd_seq,
+                                    DCCP_ACKVEC_STATE_RECEIVED))
+                        goto out_invalid_packet; /* FIXME: change error code */
+
 		dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
 		dccp_update_gsr(sk, dp->dccps_isr);
 		/*
@@ -309,7 +329,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
-		dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+		dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 
 		/*
 		 *    Step 10: Process REQUEST state (second part)
@@ -329,7 +349,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		dccp_set_state(sk, DCCP_PARTOPEN);
 
 		/* Make sure socket is routed, for correct metrics. */
-		inet_sk_rebuild_header(sk);
+		icsk->icsk_af_ops->rebuild_header(sk);
 
 		if (!sock_flag(sk, SOCK_DEAD)) {
 			sk->sk_state_change(sk);
@@ -398,9 +418,9 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 
 		if (dh->dccph_type == DCCP_PKT_DATAACK ||
 		    dh->dccph_type == DCCP_PKT_DATA) {
-			dccp_rcv_established(sk, skb, dh, len);
+			__dccp_rcv_established(sk, skb, dh, len);
 			queued = 1; /* packet was queued
-				       (by dccp_rcv_established) */
+				       (by __dccp_rcv_established) */
 		}
 		break;
 	}
@@ -444,7 +464,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (sk->sk_state == DCCP_LISTEN) {
 		if (dh->dccph_type == DCCP_PKT_REQUEST) {
-			if (dccp_v4_conn_request(sk, skb) < 0)
+			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
+								    skb) < 0)
 				return 1;
 
 			/* FIXME: do congestion control initialization */
@@ -471,14 +492,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
 
-		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
-		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
-
  		if (dp->dccps_options.dccpo_send_ack_vector &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
  				    DCCP_SKB_CB(skb)->dccpd_seq,
  				    DCCP_ACKVEC_STATE_RECEIVED))
  			goto discard;
+
+		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
 	}
 
 	/*
@@ -566,3 +587,5 @@ discard:
 	}
 	return 0;
 }
+
+EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ca03521112c..00f98322667 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -19,7 +19,9 @@
 
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
 #include <net/sock.h>
+#include <net/timewait_sock.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
@@ -37,7 +39,8 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo);
 
 static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
 {
-	return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+	return inet_csk_get_port(&dccp_hashinfo, sk, snum,
+				 inet_csk_bind_conflict);
 }
 
 static void dccp_v4_hash(struct sock *sk)
@@ -45,171 +48,14 @@ static void dccp_v4_hash(struct sock *sk)
 	inet_hash(&dccp_hashinfo, sk);
 }
 
-static void dccp_v4_unhash(struct sock *sk)
+void dccp_unhash(struct sock *sk)
 {
 	inet_unhash(&dccp_hashinfo, sk);
 }
 
-/* called with local bh disabled */
-static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const u32 daddr = inet->rcv_saddr;
-	const u32 saddr = inet->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
-	const struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
+EXPORT_SYMBOL_GPL(dccp_unhash);
 
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp != NULL) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw != NULL) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &dccp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static int dccp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (snum == 0) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
- 		int remaining = (high - low) + 1;
- 		int rover = net_random() % (high - low) + low;
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
- 		do {
- 			head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
-						    dccp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == rover) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__dccp_v4_check_established(sk,
-									 rover,
-									 &tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
-						     head, rover);
- 			if (tb == NULL) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 			if (++rover > high)
- 				rover = low;
- 		} while (--remaining > 0);
-
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
- 		/* All locks still held and bhs disabled */
- 		inet_bind_hash(sk, tb, rover);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(rover);
- 			__inet_hash(&dccp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw != NULL) {
- 			inet_twsk_deschedule(tw, &dccp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
-						 dccp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
-		__inet_hash(&dccp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __dccp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
-static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
-			   int addr_len)
+int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -259,9 +105,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	dp->dccps_ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt != NULL)
-		dp->dccps_ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 	/*
 	 * Socket identity is still unknown (sport may be zero).
 	 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -269,7 +115,7 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 	 * complete initialization after this.
 	 */
 	dccp_set_state(sk, DCCP_REQUESTING);
-	err = dccp_v4_hash_connect(sk);
+	err = inet_hash_connect(&dccp_death_row, sk);
 	if (err != 0)
 		goto failure;
 
@@ -287,16 +133,6 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 							    usin->sin_port);
 	dccp_update_gss(sk, dp->dccps_iss);
 
-	/*
-	 * SWL and AWL are initially adjusted so that they are not less than
-	 * the initial Sequence Numbers received and sent, respectively:
-	 *	SWL := max(GSR + 1 - floor(W/4), ISR),
-	 *	AWL := max(GSS - W' + 1, ISS).
-	 * These adjustments MUST be applied only at the beginning of the
-	 * connection.
-	 */
-	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
-
 	inet->id = dp->dccps_iss ^ jiffies;
 
 	err = dccp_connect(sk);
@@ -316,6 +152,8 @@ failure:
 	goto out;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_connect);
+
 /*
  * This routine does path mtu discovery as defined in RFC1191.
  */
@@ -354,7 +192,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    dp->dccps_pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		dccp_sync_mss(sk, mtu);
 
 		/*
@@ -606,6 +444,17 @@ out:
 	sock_put(sk);
 }
 
+/* This routine computes an IPv4 DCCP checksum. */
+void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	struct dccp_hdr *dh = dccp_hdr(skb);
+
+	dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_send_check);
+
 int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
 {
 	struct sk_buff *skb;
@@ -641,16 +490,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
 					   dccp_hdr(skb)->dccph_sport);
 }
 
-static inline int dccp_bad_service_code(const struct sock *sk,
-					const __u32 service)
-{
-	const struct dccp_sock *dp = dccp_sk(sk);
-
-	if (dp->dccps_service == service)
-		return 0;
-	return !dccp_list_has_service(dp->dccps_service_list, service);
-}
-
 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -662,7 +501,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
  	const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
-	struct dst_entry *dst = NULL;
 
 	/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
 	if (((struct rtable *)skb->dst)->rt_flags &
@@ -703,7 +541,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
-	/* FIXME: Merge Aristeu's option parsing code when ready */
 	req->rcv_wnd	= 100; /* Fake, option parsing will get the
 				  right value */
 	ireq->opt	= NULL;
@@ -721,23 +558,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_iss	   = dccp_v4_init_sequence(sk, skb);
 	dreq->dreq_service = service;
 
-	if (dccp_v4_send_response(sk, req, dst))
+	if (dccp_v4_send_response(sk, req, NULL))
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
 	return 0;
 
 drop_and_free:
-	/*
-	 * FIXME: should be reqsk_free after implementing req->rsk_ops
-	 */
-	__reqsk_free(req);
+	reqsk_free(req);
 drop:
 	DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
 	dcb->dccpd_reset_code = reset_code;
 	return -1;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
 /*
  * The three way handshake has completed - we got a valid ACK or DATAACK -
  * now create the new socket.
@@ -792,6 +628,8 @@ exit:
 	return NULL;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
+
 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -1011,7 +849,9 @@ discard:
 	return 0;
 }
 
-static inline int dccp_invalid_packet(struct sk_buff *skb)
+EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
+
+int dccp_invalid_packet(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
 
@@ -1065,29 +905,30 @@ static inline int dccp_invalid_packet(struct sk_buff *skb)
 		return 1;
 	}
 
-	/* If the header checksum is incorrect, drop packet and return */
-	if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
-				    skb->nh.iph->daddr) < 0) {
-		LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
-					    "incorrect\n");
-		return 1;
-	}
-
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_invalid_packet);
+
 /* this is called when real data arrives */
 int dccp_v4_rcv(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
 	struct sock *sk;
-	int rc;
 
 	/* Step 1: Check header basics: */
 
 	if (dccp_invalid_packet(skb))
 		goto discard_it;
 
+	/* If the header checksum is incorrect, drop packet and return */
+	if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+				    skb->nh.iph->daddr) < 0) {
+		LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
+			       __FUNCTION__);
+		goto discard_it;
+	}
+
 	dh = dccp_hdr(skb);
 
 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
@@ -1143,28 +984,11 @@ int dccp_v4_rcv(struct sk_buff *skb)
                 goto do_time_wait;
 	}
 
-	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
-		dccp_pr_debug("xfrm4_policy_check failed\n");
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
-	}
-
-        if (sk_filter(sk, skb, 0)) {
-		dccp_pr_debug("sk_filter failed\n");
-                goto discard_and_relse;
-	}
-
-	skb->dev = NULL;
-
-	bh_lock_sock(sk);
-	rc = 0;
-	if (!sock_owned_by_user(sk))
-		rc = dccp_v4_do_rcv(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
-	bh_unlock_sock(sk);
+	nf_reset(skb);
 
-	sock_put(sk);
-	return rc;
+	return sk_receive_skb(sk, skb);
 
 no_dccp_socket:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -1194,9 +1018,23 @@ do_time_wait:
 	goto no_dccp_socket;
 }
 
-static int dccp_v4_init_sock(struct sock *sk)
+struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+	.queue_xmit	= ip_queue_xmit,
+	.send_check	= dccp_v4_send_check,
+	.rebuild_header	= inet_sk_rebuild_header,
+	.conn_request	= dccp_v4_conn_request,
+	.syn_recv_sock	= dccp_v4_request_recv_sock,
+	.net_header_len	= sizeof(struct iphdr),
+	.setsockopt	= ip_setsockopt,
+	.getsockopt	= ip_getsockopt,
+	.addr2sockaddr	= inet_csk_addr2sockaddr,
+	.sockaddr_len	= sizeof(struct sockaddr_in),
+};
+
+int dccp_v4_init_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	static int dccp_ctl_socket_init = 1;
 
 	dccp_options_init(&dp->dccps_options);
@@ -1236,9 +1074,11 @@ static int dccp_v4_init_sock(struct sock *sk)
 		dccp_ctl_socket_init = 0;
 
 	dccp_init_xmit_timers(sk);
-	inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+	icsk->icsk_rto = DCCP_TIMEOUT_INIT;
 	sk->sk_state = DCCP_CLOSED;
 	sk->sk_write_space = dccp_write_space;
+	icsk->icsk_af_ops = &dccp_ipv4_af_ops;
+	icsk->icsk_sync_mss = dccp_sync_mss;
 	dp->dccps_mss_cache = 536;
 	dp->dccps_role = DCCP_ROLE_UNDEFINED;
 	dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
@@ -1246,12 +1086,14 @@ static int dccp_v4_init_sock(struct sock *sk)
 	return 0;
 }
 
-static int dccp_v4_destroy_sock(struct sock *sk)
+EXPORT_SYMBOL_GPL(dccp_v4_init_sock);
+
+int dccp_v4_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	/*
-	 * DCCP doesn't use sk_qrite_queue, just sk_send_head
+	 * DCCP doesn't use sk_write_queue, just sk_send_head
 	 * for retransmissions
 	 */
 	if (sk->sk_send_head != NULL) {
@@ -1279,6 +1121,8 @@ static int dccp_v4_destroy_sock(struct sock *sk)
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock);
+
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
 	kfree(inet_rsk(req)->opt);
@@ -1293,7 +1137,11 @@ static struct request_sock_ops dccp_request_sock_ops = {
 	.send_reset	= dccp_v4_ctl_send_reset,
 };
 
-struct proto dccp_v4_prot = {
+static struct timewait_sock_ops dccp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct inet_timewait_sock),
+};
+
+struct proto dccp_prot = {
 	.name			= "DCCP",
 	.owner			= THIS_MODULE,
 	.close			= dccp_close,
@@ -1307,7 +1155,7 @@ struct proto dccp_v4_prot = {
 	.recvmsg		= dccp_recvmsg,
 	.backlog_rcv		= dccp_v4_do_rcv,
 	.hash			= dccp_v4_hash,
-	.unhash			= dccp_v4_unhash,
+	.unhash			= dccp_unhash,
 	.accept			= inet_csk_accept,
 	.get_port		= dccp_v4_get_port,
 	.shutdown		= dccp_shutdown,
@@ -1316,5 +1164,7 @@ struct proto dccp_v4_prot = {
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
 	.rsk_prot		= &dccp_request_sock_ops,
-	.twsk_obj_size		= sizeof(struct inet_timewait_sock),
+	.twsk_prot		= &dccp_timewait_sock_ops,
 };
+
+EXPORT_SYMBOL_GPL(dccp_prot);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
new file mode 100644
index 00000000000..df074259f9c
--- /dev/null
+++ b/net/dccp/ipv6.c
@@ -0,0 +1,1262 @@
+/*
+ *	DCCP over IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Based on net/dccp6/ipv6.c
+ *
+ *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/xfrm.h>
+
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
+#include <net/inet6_connection_sock.h>
+#include <net/inet6_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+
+#include "dccp.h"
+#include "ipv6.h"
+
+static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
+static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
+				   struct request_sock *req);
+static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb);
+
+static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+
+static struct inet_connection_sock_af_ops dccp_ipv6_mapped;
+static struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
+
+static int dccp_v6_get_port(struct sock *sk, unsigned short snum)
+{
+	return inet_csk_get_port(&dccp_hashinfo, sk, snum,
+				 inet6_csk_bind_conflict);
+}
+
+static void dccp_v6_hash(struct sock *sk)
+{
+	if (sk->sk_state != DCCP_CLOSED) {
+		if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) {
+			dccp_prot.hash(sk);
+			return;
+		}
+		local_bh_disable();
+		__inet6_hash(&dccp_hashinfo, sk);
+		local_bh_enable();
+	}
+}
+
+static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len,
+				struct in6_addr *saddr, 
+				struct in6_addr *daddr, 
+				unsigned long base)
+{
+	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base);
+}
+
+static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
+						    skb->nh.ipv6h->saddr.s6_addr32,
+						    dh->dccph_dport,
+						    dh->dccph_sport);
+	else
+		return secure_dccp_sequence_number(skb->nh.iph->daddr,
+						   skb->nh.iph->saddr,
+						   dh->dccph_dport,
+						   dh->dccph_sport);
+}
+
+static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
+			   int addr_len)
+{
+	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct flowi fl;
+	struct dst_entry *dst;
+	int addr_type;
+	int err;
+
+	dp->dccps_role = DCCP_ROLE_CLIENT;
+
+	if (addr_len < SIN6_LEN_RFC2133) 
+		return -EINVAL;
+
+	if (usin->sin6_family != AF_INET6) 
+		return -EAFNOSUPPORT;
+
+	memset(&fl, 0, sizeof(fl));
+
+	if (np->sndflow) {
+		fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+		IP6_ECN_flow_init(fl.fl6_flowlabel);
+		if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
+			struct ip6_flowlabel *flowlabel;
+			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			if (flowlabel == NULL)
+				return -EINVAL;
+			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			fl6_sock_release(flowlabel);
+		}
+	}
+
+	/*
+  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+  	 */
+  	
+  	if (ipv6_addr_any(&usin->sin6_addr))
+		usin->sin6_addr.s6_addr[15] = 0x1; 
+
+	addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+	if(addr_type & IPV6_ADDR_MULTICAST)
+		return -ENETUNREACH;
+
+	if (addr_type & IPV6_ADDR_LINKLOCAL) {
+		if (addr_len >= sizeof(struct sockaddr_in6) &&
+		    usin->sin6_scope_id) {
+			/* If interface is set while binding, indices
+			 * must coincide.
+			 */
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != usin->sin6_scope_id)
+				return -EINVAL;
+
+			sk->sk_bound_dev_if = usin->sin6_scope_id;
+		}
+
+		/* Connect to link-local address requires an interface */
+		if (!sk->sk_bound_dev_if)
+			return -EINVAL;
+	}
+
+	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->flow_label = fl.fl6_flowlabel;
+
+	/*
+	 *	DCCP over IPv4
+	 */
+
+	if (addr_type == IPV6_ADDR_MAPPED) {
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
+		struct sockaddr_in sin;
+
+		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+
+		if (__ipv6_only_sock(sk))
+			return -ENETUNREACH;
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = usin->sin6_port;
+		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+		icsk->icsk_af_ops = &dccp_ipv6_mapped;
+		sk->sk_backlog_rcv = dccp_v4_do_rcv;
+
+		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+
+		if (err) {
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
+			sk->sk_backlog_rcv = dccp_v6_do_rcv;
+			goto failure;
+		} else {
+			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
+				      inet->saddr);
+			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
+				      inet->rcv_saddr);
+		}
+
+		return err;
+	}
+
+	if (!ipv6_addr_any(&np->rcv_saddr))
+		saddr = &np->rcv_saddr;
+
+	fl.proto = IPPROTO_DCCP;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_dport = usin->sin6_port;
+	fl.fl_ip_sport = inet->sport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto failure;
+	if (final_p)
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		goto failure;
+
+	if (saddr == NULL) {
+		saddr = &fl.fl6_src;
+		ipv6_addr_copy(&np->rcv_saddr, saddr);
+	}
+
+	/* set the source address */
+	ipv6_addr_copy(&np->saddr, saddr);
+	inet->rcv_saddr = LOOPBACK4_IPV6;
+
+	ip6_dst_store(sk, dst, NULL);
+
+	icsk->icsk_ext_hdr_len = 0;
+	if (np->opt)
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
+
+	inet->dport = usin->sin6_port;
+
+	dccp_set_state(sk, DCCP_REQUESTING);
+	err = inet6_hash_connect(&dccp_death_row, sk);
+	if (err)
+		goto late_failure;
+	/* FIXME */
+#if 0
+	dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32,
+						       np->daddr.s6_addr32,
+						       inet->sport,
+						       inet->dport);
+#endif
+	err = dccp_connect(sk);
+	if (err)
+		goto late_failure;
+
+	return 0;
+
+late_failure:
+	dccp_set_state(sk, DCCP_CLOSED);
+	__sk_dst_reset(sk);
+failure:
+	inet->dport = 0;
+	sk->sk_route_caps = 0;
+	return err;
+}
+
+static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
+	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+	int err;
+	__u64 seq;
+
+	sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
+			  &hdr->saddr, dh->dccph_sport, skb->dev->ifindex);
+
+	if (sk == NULL) {
+		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		return;
+	}
+
+	if (sk->sk_state == DCCP_TIME_WAIT) {
+		inet_twsk_put((struct inet_timewait_sock *)sk);
+		return;
+	}
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk))
+		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+	if (sk->sk_state == DCCP_CLOSED)
+		goto out;
+
+	np = inet6_sk(sk);
+
+	if (type == ICMPV6_PKT_TOOBIG) {
+		struct dst_entry *dst = NULL;
+
+		if (sock_owned_by_user(sk))
+			goto out;
+		if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
+			goto out;
+
+		/* icmp should have updated the destination cache entry */
+		dst = __sk_dst_check(sk, np->dst_cookie);
+
+		if (dst == NULL) {
+			struct inet_sock *inet = inet_sk(sk);
+			struct flowi fl;
+
+			/* BUGGG_FUTURE: Again, it is not clear how
+			   to handle rthdr case. Ignore this complexity
+			   for now.
+			 */
+			memset(&fl, 0, sizeof(fl));
+			fl.proto = IPPROTO_DCCP;
+			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+			fl.oif = sk->sk_bound_dev_if;
+			fl.fl_ip_dport = inet->dport;
+			fl.fl_ip_sport = inet->sport;
+
+			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
+				sk->sk_err_soft = -err;
+				goto out;
+			}
+
+			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+				sk->sk_err_soft = -err;
+				goto out;
+			}
+
+		} else
+			dst_hold(dst);
+
+		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+			dccp_sync_mss(sk, dst_mtu(dst));
+		} /* else let the usual retransmit timer handle it */
+		dst_release(dst);
+		goto out;
+	}
+
+	icmpv6_err_convert(type, code, &err);
+
+	seq = DCCP_SKB_CB(skb)->dccpd_seq;
+	/* Might be for an request_sock */
+	switch (sk->sk_state) {
+		struct request_sock *req, **prev;
+	case DCCP_LISTEN:
+		if (sock_owned_by_user(sk))
+			goto out;
+
+		req = inet6_csk_search_req(sk, &prev, dh->dccph_dport,
+					   &hdr->daddr, &hdr->saddr,
+					   inet6_iif(skb));
+		if (!req)
+			goto out;
+
+		/* ICMPs are not backlogged, hence we cannot get
+		 * an established socket here.
+		 */
+		BUG_TRAP(req->sk == NULL);
+
+		if (seq != dccp_rsk(req)->dreq_iss) {
+			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			goto out;
+		}
+
+		inet_csk_reqsk_queue_drop(sk, req, prev);
+		goto out;
+
+	case DCCP_REQUESTING:
+	case DCCP_RESPOND:  /* Cannot happen.
+			       It can, it SYNs are crossed. --ANK */ 
+		if (!sock_owned_by_user(sk)) {
+			DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+			sk->sk_err = err;
+			/*
+			 * Wake people up to see the error
+			 * (see connect in sock.c)
+			 */
+			sk->sk_error_report(sk);
+
+			dccp_done(sk);
+		} else
+			sk->sk_err_soft = err;
+		goto out;
+	}
+
+	if (!sock_owned_by_user(sk) && np->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else
+		sk->sk_err_soft = err;
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
+				 struct dst_entry *dst)
+{
+	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct ipv6_txoptions *opt = NULL;
+	struct in6_addr *final_p = NULL, final;
+	struct flowi fl;
+	int err = -1;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_DCCP;
+	ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+	ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+	fl.fl6_flowlabel = 0;
+	fl.oif = ireq6->iif;
+	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+	fl.fl_ip_sport = inet_sk(sk)->sport;
+
+	if (dst == NULL) {
+		opt = np->opt;
+		if (opt == NULL &&
+		    np->rxopt.bits.osrcrt == 2 &&
+		    ireq6->pktopts) {
+			struct sk_buff *pktopts = ireq6->pktopts;
+			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
+			if (rxopt->srcrt)
+				opt = ipv6_invert_rthdr(sk,
+					(struct ipv6_rt_hdr *)(pktopts->nh.raw +
+							       rxopt->srcrt));
+		}
+
+		if (opt && opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err)
+			goto done;
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto done;
+	}
+
+	skb = dccp_make_response(sk, dst, req);
+	if (skb != NULL) {
+		struct dccp_hdr *dh = dccp_hdr(skb);
+		dh->dccph_checksum = dccp_v6_check(dh, skb->len,
+						   &ireq6->loc_addr,
+						   &ireq6->rmt_addr,
+						   csum_partial((char *)dh,
+								skb->len,
+								skb->csum));
+		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		if (err == NET_XMIT_CN)
+			err = 0;
+	}
+
+done:
+        if (opt && opt != np->opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+	return err;
+}
+
+static void dccp_v6_reqsk_destructor(struct request_sock *req)
+{
+	if (inet6_rsk(req)->pktopts != NULL)
+		kfree_skb(inet6_rsk(req)->pktopts);
+}
+
+static struct request_sock_ops dccp6_request_sock_ops = {
+	.family		= AF_INET6,
+	.obj_size	= sizeof(struct dccp6_request_sock),
+	.rtx_syn_ack	= dccp_v6_send_response,
+	.send_ack	= dccp_v6_reqsk_send_ack,
+	.destructor	= dccp_v6_reqsk_destructor,
+	.send_reset	= dccp_v6_ctl_send_reset,
+};
+
+static struct timewait_sock_ops dccp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
+};
+
+static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct dccp_hdr *dh = dccp_hdr(skb);
+
+	dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr,
+					     len, IPPROTO_DCCP, 
+					     csum_partial((char *)dh,
+							  dh->dccph_doff << 2,
+							  skb->csum));
+}
+
+static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
+{
+	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 
+	const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+				       sizeof(struct dccp_hdr_ext) +
+				       sizeof(struct dccp_hdr_reset);
+	struct sk_buff *skb;
+	struct flowi fl;
+	u64 seqno;
+
+	if (rxdh->dccph_type == DCCP_PKT_RESET)
+		return;
+
+	if (!ipv6_unicast_destination(rxskb))
+		return; 
+
+	/*
+	 * We need to grab some memory, and put together an RST,
+	 * and then put it into the queue to be sent.
+	 */
+
+	skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
+			dccp_hdr_reset_len, GFP_ATOMIC);
+	if (skb == NULL) 
+	  	return;
+
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
+		    dccp_hdr_reset_len);
+
+	skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
+	dh = dccp_hdr(skb);
+	memset(dh, 0, dccp_hdr_reset_len);
+
+	/* Swap the send and the receive. */
+	dh->dccph_type	= DCCP_PKT_RESET;
+	dh->dccph_sport	= rxdh->dccph_dport;
+	dh->dccph_dport	= rxdh->dccph_sport;
+	dh->dccph_doff	= dccp_hdr_reset_len / 4;
+	dh->dccph_x	= 1;
+	dccp_hdr_reset(skb)->dccph_reset_code =
+				DCCP_SKB_CB(rxskb)->dccpd_reset_code;
+
+	/* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
+	seqno = 0;
+	if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
+
+	dccp_hdr_set_seq(dh, seqno);
+	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+			 DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+	dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
+					     sizeof(*dh), IPPROTO_DCCP,
+					     skb->csum);
+	fl.proto = IPPROTO_DCCP;
+	fl.oif = inet6_iif(rxskb);
+	fl.fl_ip_dport = dh->dccph_dport;
+	fl.fl_ip_sport = dh->dccph_sport;
+
+	/* sk = NULL, but it is safe for now. RST socket required. */
+	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
+		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+			ip6_xmit(NULL, skb, &fl, NULL, 0);
+			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+			return;
+		}
+	}
+
+	kfree_skb(skb);
+}
+
+static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
+{
+	struct flowi fl;
+	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+	const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+				     sizeof(struct dccp_hdr_ext) +
+				     sizeof(struct dccp_hdr_ack_bits);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
+			dccp_hdr_ack_len, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
+			 dccp_hdr_ack_len);
+
+	skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
+	dh = dccp_hdr(skb);
+	memset(dh, 0, dccp_hdr_ack_len);
+
+	/* Build DCCP header and checksum it. */
+	dh->dccph_type	= DCCP_PKT_ACK;
+	dh->dccph_sport = rxdh->dccph_dport;
+	dh->dccph_dport = rxdh->dccph_sport;
+	dh->dccph_doff	= dccp_hdr_ack_len / 4;
+	dh->dccph_x	= 1;
+	
+	dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+			 DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+
+	/* FIXME: calculate checksum, IPv4 also should... */
+
+	fl.proto = IPPROTO_DCCP;
+	fl.oif = inet6_iif(rxskb);
+	fl.fl_ip_dport = dh->dccph_dport;
+	fl.fl_ip_sport = dh->dccph_sport;
+
+	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
+		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+			ip6_xmit(NULL, skb, &fl, NULL, 0);
+			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+			return;
+		}
+	}
+
+	kfree_skb(skb);
+}
+
+static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
+				   struct request_sock *req)
+{
+	dccp_v6_ctl_send_ack(skb);
+}
+
+static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+	const struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct sock *nsk;
+	struct request_sock **prev;
+	/* Find possible connection requests. */
+	struct request_sock *req = inet6_csk_search_req(sk, &prev,
+							dh->dccph_sport,
+							&iph->saddr,
+							&iph->daddr,
+							inet6_iif(skb));
+	if (req != NULL)
+		return dccp_check_req(sk, skb, req, prev);
+
+	nsk = __inet6_lookup_established(&dccp_hashinfo,
+					 &iph->saddr, dh->dccph_sport,
+					 &iph->daddr, ntohs(dh->dccph_dport),
+					 inet6_iif(skb));
+
+	if (nsk != NULL) {
+		if (nsk->sk_state != DCCP_TIME_WAIT) {
+			bh_lock_sock(nsk);
+			return nsk;
+		}
+		inet_twsk_put((struct inet_timewait_sock *)nsk);
+		return NULL;
+	}
+
+	return sk;
+}
+
+static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+	struct inet_request_sock *ireq;
+	struct dccp_sock dp;
+	struct request_sock *req;
+	struct dccp_request_sock *dreq;
+	struct inet6_request_sock *ireq6;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+ 	const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+	__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return dccp_v4_conn_request(sk, skb);
+
+	if (!ipv6_unicast_destination(skb))
+		goto drop; 
+
+	if (dccp_bad_service_code(sk, service)) {
+		reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+		goto drop;
+ 	}
+	/*
+	 *	There are no SYN attacks on IPv6, yet...	
+	 */
+	if (inet_csk_reqsk_queue_is_full(sk))
+		goto drop;		
+
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+		goto drop;
+
+	req = inet6_reqsk_alloc(sk->sk_prot->rsk_prot);
+	if (req == NULL)
+		goto drop;
+
+	/* FIXME: process options */
+
+	dccp_openreq_init(req, &dp, skb);
+
+	ireq6 = inet6_rsk(req);
+	ireq = inet_rsk(req);
+	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
+	req->rcv_wnd	= 100; /* Fake, option parsing will get the
+				  right value */
+	ireq6->pktopts	= NULL;
+
+	if (ipv6_opt_accepted(sk, skb) ||
+	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+		atomic_inc(&skb->users);
+		ireq6->pktopts = skb;
+	}
+	ireq6->iif = sk->sk_bound_dev_if;
+
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq6->iif = inet6_iif(skb);
+
+	/* 
+	 * Step 3: Process LISTEN state
+	 *
+	 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+	 *
+	 * In fact we defer setting S.GSR, S.SWL, S.SWH to
+	 * dccp_create_openreq_child.
+	 */
+	dreq = dccp_rsk(req);
+	dreq->dreq_isr	   = dcb->dccpd_seq;
+	dreq->dreq_iss	   = dccp_v6_init_sequence(sk, skb);
+	dreq->dreq_service = service;
+
+	if (dccp_v6_send_response(sk, req, NULL))
+		goto drop_and_free;
+
+	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+	return 0;
+
+drop_and_free:
+	reqsk_free(req);
+drop:
+	DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+	dcb->dccpd_reset_code = reset_code;
+	return -1;
+}
+
+static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
+					      struct sk_buff *skb,
+					      struct request_sock *req,
+					      struct dst_entry *dst)
+{
+	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct inet_sock *newinet;
+	struct dccp_sock *newdp;
+	struct dccp6_sock *newdp6;
+	struct sock *newsk;
+	struct ipv6_txoptions *opt;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		/*
+		 *	v6 mapped
+		 */
+
+		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
+		if (newsk == NULL) 
+			return NULL;
+
+		newdp6 = (struct dccp6_sock *)newsk;
+		newdp = dccp_sk(newsk);
+		newinet = inet_sk(newsk);
+		newinet->pinet6 = &newdp6->inet6;
+		newnp = inet6_sk(newsk);
+
+		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
+			      newinet->daddr);
+
+		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
+			      newinet->saddr);
+
+		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+
+		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
+		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
+		newnp->pktoptions  = NULL;
+		newnp->opt	   = NULL;
+		newnp->mcast_oif   = inet6_iif(skb);
+		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+
+		/*
+		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
+		 * here, dccp_create_openreq_child now does this for us, see the comment in
+		 * that function for the gory details. -acme
+		 */
+
+		/* It is tricky place. Until this moment IPv4 tcp
+		   worked with IPv6 icsk.icsk_af_ops.
+		   Sync it now.
+		 */
+		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
+
+		return newsk;
+	}
+
+	opt = np->opt;
+
+	if (sk_acceptq_is_full(sk))
+		goto out_overflow;
+
+	if (np->rxopt.bits.osrcrt == 2 &&
+	    opt == NULL && ireq6->pktopts) {
+		struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts);
+		if (rxopt->srcrt)
+			opt = ipv6_invert_rthdr(sk,
+				(struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
+						       rxopt->srcrt));
+	}
+
+	if (dst == NULL) {
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = IPPROTO_DCCP;
+		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+		if (opt && opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+		fl.fl_ip_sport = inet_sk(sk)->sport;
+
+		if (ip6_dst_lookup(sk, &dst, &fl))
+			goto out;
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto out;
+	} 
+
+	newsk = dccp_create_openreq_child(sk, req, skb);
+	if (newsk == NULL)
+		goto out;
+
+	/*
+	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
+	 * count here, dccp_create_openreq_child now does this for us, see the
+	 * comment in that function for the gory details. -acme
+	 */
+
+	ip6_dst_store(newsk, dst, NULL);
+	newsk->sk_route_caps = dst->dev->features &
+		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+
+	newdp6 = (struct dccp6_sock *)newsk;
+	newinet = inet_sk(newsk);
+	newinet->pinet6 = &newdp6->inet6;
+	newdp = dccp_sk(newsk);
+	newnp = inet6_sk(newsk);
+
+	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+	ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
+	ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
+	ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
+	newsk->sk_bound_dev_if = ireq6->iif;
+
+	/* Now IPv6 options... 
+
+	   First: no IPv4 options.
+	 */
+	newinet->opt = NULL;
+
+	/* Clone RX bits */
+	newnp->rxopt.all = np->rxopt.all;
+
+	/* Clone pktoptions received with SYN */
+	newnp->pktoptions = NULL;
+	if (ireq6->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
+		kfree_skb(ireq6->pktopts);
+		ireq6->pktopts = NULL;
+		if (newnp->pktoptions)
+			skb_set_owner_r(newnp->pktoptions, newsk);
+	}
+	newnp->opt	  = NULL;
+	newnp->mcast_oif  = inet6_iif(skb);
+	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+
+	/* Clone native IPv6 options from listening socket (if any)
+
+	   Yes, keeping reference count would be much more clever,
+	   but we make one more one thing there: reattach optmem
+	   to newsk.
+	 */
+	if (opt) {
+		newnp->opt = ipv6_dup_options(newsk, opt);
+		if (opt != np->opt)
+			sock_kfree_s(sk, opt, opt->tot_len);
+	}
+
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
+	if (newnp->opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
+
+	dccp_sync_mss(newsk, dst_mtu(dst));
+
+	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+
+	__inet6_hash(&dccp_hashinfo, newsk);
+	inet_inherit_port(&dccp_hashinfo, sk, newsk);
+
+	return newsk;
+
+out_overflow:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+out:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	if (opt && opt != np->opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+	dst_release(dst);
+	return NULL;
+}
+
+/* The socket must have it's spinlock held when we get
+ * here.
+ *
+ * We have a potential double-lock case here, so even when
+ * doing backlog processing we use the BH locking scheme.
+ * This is because we cannot sleep with the original spinlock
+ * held.
+ */
+static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *opt_skb = NULL;
+
+	/* Imagine: socket is IPv6. IPv4 packet arrives,
+	   goes to IPv4 receive handler and backlogged.
+	   From backlog it always goes here. Kerboom...
+	   Fortunately, dccp_rcv_established and rcv_established
+	   handle them correctly, but it is not case with
+	   dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
+	 */
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return dccp_v4_do_rcv(sk, skb);
+
+	if (sk_filter(sk, skb, 0))
+		goto discard;
+
+	/*
+	 *	socket locking is here for SMP purposes as backlog rcv
+	 *	is currently called with bh processing disabled.
+	 */
+
+	/* Do Stevens' IPV6_PKTOPTIONS.
+
+	   Yes, guys, it is the only place in our code, where we
+	   may make it not affecting IPv4.
+	   The rest of code is protocol independent,
+	   and I do not like idea to uglify IPv4.
+
+	   Actually, all the idea behind IPV6_PKTOPTIONS
+	   looks not very well thought. For now we latch
+	   options, received in the last packet, enqueued
+	   by tcp. Feel free to propose better solution.
+	                                       --ANK (980728)
+	 */
+	if (np->rxopt.all)
+		opt_skb = skb_clone(skb, GFP_ATOMIC);
+
+	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
+			goto reset;
+		return 0;
+	}
+
+	if (sk->sk_state == DCCP_LISTEN) { 
+		struct sock *nsk = dccp_v6_hnd_req(sk, skb);
+		if (!nsk)
+			goto discard;
+
+		/*
+		 * Queue it on the new socket if the new socket is active,
+		 * otherwise we just shortcircuit this and continue with
+		 * the new socket..
+		 */
+ 		if(nsk != sk) {
+			if (dccp_child_process(sk, nsk, skb))
+				goto reset;
+			if (opt_skb)
+				__kfree_skb(opt_skb);
+			return 0;
+		}
+	}
+
+	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
+		goto reset;
+	return 0;
+
+reset:
+	dccp_v6_ctl_send_reset(skb);
+discard:
+	if (opt_skb)
+		__kfree_skb(opt_skb);
+	kfree_skb(skb);
+	return 0;
+}
+
+static int dccp_v6_rcv(struct sk_buff **pskb)
+{
+	const struct dccp_hdr *dh;
+	struct sk_buff *skb = *pskb;
+	struct sock *sk;
+
+	/* Step 1: Check header basics: */
+
+	if (dccp_invalid_packet(skb))
+		goto discard_it;
+
+	dh = dccp_hdr(skb);
+
+	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
+	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
+
+	if (dccp_packet_without_ack(skb))
+		DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
+	else
+		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+
+	/* Step 2:
+	 * 	Look up flow ID in table and get corresponding socket */
+	sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
+			    dh->dccph_sport,
+			    &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
+			    inet6_iif(skb));
+	/* 
+	 * Step 2:
+	 * 	If no socket ...
+	 *		Generate Reset(No Connection) unless P.type == Reset
+	 *		Drop packet and return
+	 */
+	if (sk == NULL)
+		goto no_dccp_socket;
+
+	/* 
+	 * Step 2:
+	 * 	... or S.state == TIMEWAIT,
+	 *		Generate Reset(No Connection) unless P.type == Reset
+	 *		Drop packet and return
+	 */
+	       
+	if (sk->sk_state == DCCP_TIME_WAIT)
+                goto do_time_wait;
+
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
+	return sk_receive_skb(sk, skb) ? -1 : 0;
+
+no_dccp_socket:
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_it;
+	/*
+	 * Step 2:
+	 *		Generate Reset(No Connection) unless P.type == Reset
+	 *		Drop packet and return
+	 */
+	if (dh->dccph_type != DCCP_PKT_RESET) {
+		DCCP_SKB_CB(skb)->dccpd_reset_code =
+					DCCP_RESET_CODE_NO_CONNECTION;
+		dccp_v6_ctl_send_reset(skb);
+	}
+discard_it:
+
+	/*
+	 *	Discard frame
+	 */
+
+	kfree_skb(skb);
+	return 0;
+
+discard_and_relse:
+	sock_put(sk);
+	goto discard_it;
+
+do_time_wait:
+	inet_twsk_put((struct inet_timewait_sock *)sk);
+	goto no_dccp_socket;
+}
+
+static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
+	.queue_xmit	=	inet6_csk_xmit,
+	.send_check	=	dccp_v6_send_check,
+	.rebuild_header	=	inet6_sk_rebuild_header,
+	.conn_request	=	dccp_v6_conn_request,
+	.syn_recv_sock	=	dccp_v6_request_recv_sock,
+	.net_header_len	=	sizeof(struct ipv6hdr),
+	.setsockopt	=	ipv6_setsockopt,
+	.getsockopt	=	ipv6_getsockopt,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+};
+
+/*
+ *	DCCP over IPv4 via INET6 API
+ */
+static struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
+	.queue_xmit	=	ip_queue_xmit,
+	.send_check	=	dccp_v4_send_check,
+	.rebuild_header	=	inet_sk_rebuild_header,
+	.conn_request	=	dccp_v6_conn_request,
+	.syn_recv_sock	=	dccp_v6_request_recv_sock,
+	.net_header_len	=	sizeof(struct iphdr),
+	.setsockopt	=	ipv6_setsockopt,
+	.getsockopt	=	ipv6_getsockopt,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+};
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+static int dccp_v6_init_sock(struct sock *sk)
+{
+	int err = dccp_v4_init_sock(sk);
+
+	if (err == 0)
+		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
+
+	return err;
+}
+
+static int dccp_v6_destroy_sock(struct sock *sk)
+{
+	dccp_v4_destroy_sock(sk);
+	return inet6_destroy_sock(sk);
+}
+
+static struct proto dccp_v6_prot = {
+	.name			= "DCCPv6",
+	.owner			= THIS_MODULE,
+	.close			= dccp_close,
+	.connect		= dccp_v6_connect,
+	.disconnect		= dccp_disconnect,
+	.ioctl			= dccp_ioctl,
+	.init			= dccp_v6_init_sock,
+	.setsockopt		= dccp_setsockopt,
+	.getsockopt		= dccp_getsockopt,
+	.sendmsg		= dccp_sendmsg,
+	.recvmsg		= dccp_recvmsg,
+	.backlog_rcv		= dccp_v6_do_rcv,
+	.hash			= dccp_v6_hash,
+	.unhash			= dccp_unhash,
+	.accept			= inet_csk_accept,
+	.get_port		= dccp_v6_get_port,
+	.shutdown		= dccp_shutdown,
+	.destroy		= dccp_v6_destroy_sock,
+	.orphan_count		= &dccp_orphan_count,
+	.max_header		= MAX_DCCP_HEADER,
+	.obj_size		= sizeof(struct dccp6_sock),
+	.rsk_prot		= &dccp6_request_sock_ops,
+	.twsk_prot		= &dccp6_timewait_sock_ops,
+};
+
+static struct inet6_protocol dccp_v6_protocol = {
+	.handler	=	dccp_v6_rcv,
+	.err_handler	=	dccp_v6_err,
+	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+};
+
+static struct proto_ops inet6_dccp_ops = {
+	.family		= PF_INET6,
+	.owner		= THIS_MODULE,
+	.release	= inet6_release,
+	.bind		= inet6_bind,
+	.connect	= inet_stream_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= inet_accept,
+	.getname	= inet6_getname,
+	.poll		= dccp_poll,
+	.ioctl		= inet6_ioctl,
+	.listen		= inet_dccp_listen,
+	.shutdown	= inet_shutdown,
+	.setsockopt	= sock_common_setsockopt,
+	.getsockopt	= sock_common_getsockopt,
+	.sendmsg	= inet_sendmsg,
+	.recvmsg	= sock_common_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+static struct inet_protosw dccp_v6_protosw = {
+	.type		= SOCK_DCCP,
+	.protocol	= IPPROTO_DCCP,
+	.prot		= &dccp_v6_prot,
+	.ops		= &inet6_dccp_ops,
+	.capability	= -1,
+	.flags		= INET_PROTOSW_ICSK,
+};
+
+static int __init dccp_v6_init(void)
+{
+	int err = proto_register(&dccp_v6_prot, 1);
+
+	if (err != 0)
+		goto out;
+
+	err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
+	if (err != 0)
+		goto out_unregister_proto;
+
+	inet6_register_protosw(&dccp_v6_protosw);
+out:
+	return err;
+out_unregister_proto:
+	proto_unregister(&dccp_v6_prot);
+	goto out;
+}
+
+static void __exit dccp_v6_exit(void)
+{
+	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
+	inet6_unregister_protosw(&dccp_v6_protosw);
+	proto_unregister(&dccp_v6_prot);
+}
+
+module_init(dccp_v6_init);
+module_exit(dccp_v6_exit);
+
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
new file mode 100644
index 00000000000..e4d4e930927
--- /dev/null
+++ b/net/dccp/ipv6.h
@@ -0,0 +1,37 @@
+#ifndef _DCCP_IPV6_H
+#define _DCCP_IPV6_H
+/*
+ *  net/dccp/ipv6.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/ipv6.h>
+
+struct dccp6_sock {
+	struct dccp_sock  dccp;
+	/*
+	 * ipv6_pinfo has to be the last member of dccp6_sock,
+	 * see inet6_sk_generic.
+	 */
+	struct ipv6_pinfo inet6;
+};
+
+struct dccp6_request_sock {
+	struct dccp_request_sock  dccp;
+	struct inet6_request_sock inet6;
+};
+
+struct dccp6_timewait_sock {
+	struct inet_timewait_sock   inet;
+	struct inet6_timewait_sock  tw6;
+};
+
+#endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1393461898b..29261fc198e 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -40,6 +40,8 @@ struct inet_timewait_death_row dccp_death_row = {
 					    (unsigned long)&dccp_death_row),
 };
 
+EXPORT_SYMBOL_GPL(dccp_death_row);
+
 void dccp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
@@ -50,7 +52,18 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 	if (tw != NULL) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if (tw->tw_family == PF_INET6) {
+			const struct ipv6_pinfo *np = inet6_sk(sk);
+			struct inet6_timewait_sock *tw6;
+
+			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+			tw6 = inet6_twsk((struct sock *)tw);
+			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_ipv6only = np->ipv6only;
+		}
+#endif
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
 
@@ -170,6 +183,8 @@ out_free:
 	return newsk;
 }
 
+EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
+
 /* 
  * Process an incoming packet for RESPOND sockets represented
  * as an request_sock.
@@ -214,7 +229,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 		goto drop;
 	}
 
-	child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
 	if (child == NULL)
 		goto listen_overflow;
 
@@ -236,6 +251,8 @@ drop:
 	goto out;
 }
 
+EXPORT_SYMBOL_GPL(dccp_check_req);
+
 /*
  *  Queue segment on the new socket if the new socket is active,
  *  otherwise we just shortcircuit this and continue with
@@ -266,3 +283,5 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 	sock_put(child);
 	return ret;
 }
+
+EXPORT_SYMBOL_GPL(dccp_child_process);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 74ff8702587..efd7ffb903a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+#include <net/inet_sock.h>
 #include <net/sock.h>
 
 #include "ackvec.h"
@@ -43,6 +44,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 {
 	if (likely(skb != NULL)) {
 		const struct inet_sock *inet = inet_sk(sk);
+		const struct inet_connection_sock *icsk = inet_csk(sk);
 		struct dccp_sock *dp = dccp_sk(sk);
 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 		struct dccp_hdr *dh;
@@ -108,8 +110,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
-						      inet->daddr);
+		icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
 		if (set_ack)
 			dccp_event_ack_sent(sk);
@@ -117,7 +118,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 
 		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-		err = ip_queue_xmit(skb, 0);
+		err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 		if (err <= 0)
 			return err;
 
@@ -134,20 +135,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	int mss_now;
-
-	/*
-	 * FIXME: we really should be using the af_specific thing to support
-	 * 	  IPv6.
-	 * mss_now = pmtu - tp->af_specific->net_header_len -
-	 * 	     sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
-	 */
-	mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
-		  sizeof(struct dccp_hdr_ext);
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
+		       sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
 
 	/* Now subtract optional transport overhead */
-	mss_now -= dp->dccps_ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/*
 	 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -160,12 +154,14 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 	mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
 
 	/* And store cached results */
-	dp->dccps_pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	dp->dccps_mss_cache = mss_now;
 
 	return mss_now;
 }
 
+EXPORT_SYMBOL_GPL(dccp_sync_mss);
+
 void dccp_write_space(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
@@ -266,7 +262,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 
 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 {
-	if (inet_sk_rebuild_header(sk) != 0)
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
 	return dccp_transmit_skb(sk, (skb_cloned(skb) ?
@@ -321,6 +317,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	return skb;
 }
 
+EXPORT_SYMBOL_GPL(dccp_make_response);
+
 struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
 				const enum dccp_reset_codes code)
 				   
@@ -377,6 +375,7 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
  */
 static inline void dccp_connect_init(struct sock *sk)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -385,10 +384,16 @@ static inline void dccp_connect_init(struct sock *sk)
 	
 	dccp_sync_mss(sk, dst_mtu(dst));
 
-	/*
-	 * FIXME: set dp->{dccps_swh,dccps_swl}, with
-	 * something like dccp_inc_seq
-	 */
+	dccp_update_gss(sk, dp->dccps_iss);
+ 	/*
+	 * SWL and AWL are initially adjusted so that they are not less than
+	 * the initial Sequence Numbers received and sent, respectively:
+	 *	SWL := max(GSR + 1 - floor(W/4), ISR),
+	 *	AWL := max(GSS - W' + 1, ISS).
+	 * These adjustments MUST be applied only at the beginning of the
+	 * connection.
+ 	 */
+	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
 
 	icsk->icsk_retransmits = 0;
 }
@@ -420,6 +425,8 @@ int dccp_connect(struct sock *sk)
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_connect);
+
 void dccp_send_ack(struct sock *sk)
 {
 	/* If we have been reset, we may not send again. */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8a6b2a9e458..65b11ea90d8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,7 +24,7 @@
 #include <net/checksum.h>
 
 #include <net/inet_common.h>
-#include <net/ip.h>
+#include <net/inet_sock.h>
 #include <net/protocol.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
@@ -34,15 +34,18 @@
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/poll.h>
-#include <linux/dccp.h>
 
 #include "ccid.h"
 #include "dccp.h"
 
 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
+EXPORT_SYMBOL_GPL(dccp_statistics);
+
 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
 
+EXPORT_SYMBOL_GPL(dccp_orphan_count);
+
 static struct net_protocol dccp_protocol = {
 	.handler	= dccp_v4_rcv,
 	.err_handler	= dccp_v4_err,
@@ -149,6 +152,8 @@ int dccp_disconnect(struct sock *sk, int flags)
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(dccp_disconnect);
+
 /*
  *	Wait for a DCCP event.
  *
@@ -156,8 +161,8 @@ int dccp_disconnect(struct sock *sk, int flags)
  *	take care of normal races (between the test and the event) and we don't
  *	go look at any of the socket buffers directly.
  */
-static unsigned int dccp_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
+unsigned int dccp_poll(struct file *file, struct socket *sock,
+		       poll_table *wait)
 {
 	unsigned int mask;
 	struct sock *sk = sock->sk;
@@ -205,12 +210,16 @@ static unsigned int dccp_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
+EXPORT_SYMBOL_GPL(dccp_poll);
+
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	dccp_pr_debug("entry\n");
 	return -ENOIOCTLCMD;
 }
 
+EXPORT_SYMBOL_GPL(dccp_ioctl);
+
 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
 				   char __user *optval, int optlen)
 {
@@ -254,7 +263,9 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	int val;
 
 	if (level != SOL_DCCP)
-		return ip_setsockopt(sk, level, optname, optval, optlen);
+		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
+							     optname, optval,
+							     optlen);
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -282,6 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(dccp_setsockopt);
+
 static int dccp_getsockopt_service(struct sock *sk, int len,
 				   u32 __user *optval,
 				   int __user *optlen)
@@ -320,8 +333,9 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	int val, len;
 
 	if (level != SOL_DCCP)
-		return ip_getsockopt(sk, level, optname, optval, optlen);
-
+		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
+							     optname, optval,
+							     optlen);
 	if (get_user(len, optlen))
 		return -EFAULT;
 
@@ -354,6 +368,8 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_getsockopt);
+
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len)
 {
@@ -410,6 +426,8 @@ out_discard:
 	goto out_release;
 }
 
+EXPORT_SYMBOL_GPL(dccp_sendmsg);
+
 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len, int nonblock, int flags, int *addr_len)
 {
@@ -507,7 +525,9 @@ out:
 	return len;
 }
 
-static int inet_dccp_listen(struct socket *sock, int backlog)
+EXPORT_SYMBOL_GPL(dccp_recvmsg);
+
+int inet_dccp_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
 	unsigned char old_state;
@@ -543,6 +563,8 @@ out:
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(inet_dccp_listen);
+
 static const unsigned char dccp_new_state[] = {
 	/* current state:   new state:      action:	*/
 	[0]		  = DCCP_CLOSED,
@@ -648,12 +670,16 @@ adjudge_to_death:
 	sock_put(sk);
 }
 
+EXPORT_SYMBOL_GPL(dccp_close);
+
 void dccp_shutdown(struct sock *sk, int how)
 {
 	dccp_pr_debug("entry\n");
 }
 
-static struct proto_ops inet_dccp_ops = {
+EXPORT_SYMBOL_GPL(dccp_shutdown);
+
+static const struct proto_ops inet_dccp_ops = {
 	.family		= PF_INET,
 	.owner		= THIS_MODULE,
 	.release	= inet_release,
@@ -681,11 +707,11 @@ extern struct net_proto_family inet_family_ops;
 static struct inet_protosw dccp_v4_protosw = {
 	.type		= SOCK_DCCP,
 	.protocol	= IPPROTO_DCCP,
-	.prot		= &dccp_v4_prot,
+	.prot		= &dccp_prot,
 	.ops		= &inet_dccp_ops,
 	.capability	= -1,
 	.no_check	= 0,
-	.flags		= 0,
+	.flags		= INET_PROTOSW_ICSK,
 };
 
 /*
@@ -760,13 +786,15 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
 int dccp_debug;
 module_param(dccp_debug, int, 0444);
 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
+
+EXPORT_SYMBOL_GPL(dccp_debug);
 #endif
 
 static int __init dccp_init(void)
 {
 	unsigned long goal;
 	int ehash_order, bhash_order, i;
-	int rc = proto_register(&dccp_v4_prot, 1);
+	int rc = proto_register(&dccp_prot, 1);
 
 	if (rc)
 		goto out;
@@ -869,7 +897,7 @@ out_free_bind_bucket_cachep:
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
 	dccp_hashinfo.bind_bucket_cachep = NULL;
 out_proto_unregister:
-	proto_unregister(&dccp_v4_prot);
+	proto_unregister(&dccp_prot);
 	goto out;
 }
 
@@ -892,7 +920,7 @@ static void __exit dccp_fini(void)
 		   get_order(dccp_hashinfo.ehash_size *
 			     sizeof(struct inet_ehash_bucket)));
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
-	proto_unregister(&dccp_v4_prot);
+	proto_unregister(&dccp_prot);
 }
 
 module_init(dccp_init);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index f89e55f814d..ce4aaf94860 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -122,6 +122,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
 #include <net/flow.h>
 #include <asm/system.h>
 #include <asm/ioctls.h>
+#include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
@@ -149,10 +150,11 @@ static void dn_keepalive(struct sock *sk);
 #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
 
 
-static struct proto_ops dn_proto_ops;
+static const struct proto_ops dn_proto_ops;
 static DEFINE_RWLOCK(dn_hash_lock);
 static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
+static atomic_t decnet_memory_allocated;
 
 static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
 static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -446,10 +448,26 @@ static void dn_destruct(struct sock *sk)
 	dst_release(xchg(&sk->sk_dst_cache, NULL));
 }
 
+static int dn_memory_pressure;
+
+static void dn_enter_memory_pressure(void)
+{
+	if (!dn_memory_pressure) {
+		dn_memory_pressure = 1;
+	}
+}
+
 static struct proto dn_proto = {
-	.name	  = "DECNET",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct dn_sock),
+	.name			= "NSP",
+	.owner			= THIS_MODULE,
+	.enter_memory_pressure	= dn_enter_memory_pressure,
+	.memory_pressure	= &dn_memory_pressure,
+	.memory_allocated	= &decnet_memory_allocated,
+	.sysctl_mem		= sysctl_decnet_mem,
+	.sysctl_wmem		= sysctl_decnet_wmem,
+	.sysctl_rmem		= sysctl_decnet_rmem,
+	.max_header		= DN_MAX_NSP_DATA_HEADER + 64,
+	.obj_size		= sizeof(struct dn_sock),
 };
 
 static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
@@ -470,6 +488,8 @@ static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
 	sk->sk_family      = PF_DECnet;
 	sk->sk_protocol    = 0;
 	sk->sk_allocation  = gfp;
+	sk->sk_sndbuf	   = sysctl_decnet_wmem[1];
+	sk->sk_rcvbuf	   = sysctl_decnet_rmem[1];
 
 	/* Initialization of DECnet Session Control Port		*/
 	scp = DN_SK(sk);
@@ -1233,7 +1253,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		break;
 
 	default:
-		err = dev_ioctl(cmd, (void __user *)arg);
+		err = -ENOIOCTLCMD;
 		break;
 	}
 
@@ -2323,7 +2343,7 @@ static struct net_proto_family	dn_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops dn_proto_ops = {
+static const struct proto_ops dn_proto_ops = {
 	.family =	AF_DECnet,
 	.owner =	THIS_MODULE,
 	.release =	dn_release,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 5610bb16dbf..efbead83ba7 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/init.h>
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 8d0cc3cf3e4..33ab256cfd4 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -408,11 +408,14 @@ int dn_neigh_router_hello(struct sk_buff *skb)
 			}
 		}
 
-		if (!dn_db->router) {
-			dn_db->router = neigh_clone(neigh);
-		} else {
-			if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
-				neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
+		/* Only use routers in our area */
+		if ((dn_ntohs(src)>>10) == dn_ntohs((decnet_address)>>10)) {
+			if (!dn_db->router) {
+				dn_db->router = neigh_clone(neigh);
+			} else {
+				if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+					neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
+			}
 		}
 		write_unlock(&neigh->lock);
 		neigh_release(neigh);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 369f25b60f3..44bda85e678 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -793,7 +793,6 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
 got_it:
 	if (sk != NULL) {
 		struct dn_scp *scp = DN_SK(sk);
-		int ret;
 
 		/* Reset backoff */
 		scp->nsp_rxtshift = 0;
@@ -807,21 +806,7 @@ got_it:
 				goto free_out;
 		}
 
-		bh_lock_sock(sk);
-		ret = NET_RX_SUCCESS;
-		if (decnet_debug_level & 8)
-			printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n",
-				(int)cb->rt_flags, (int)cb->nsp_flags, 
-				(int)cb->src_port, (int)cb->dst_port, 
-				!!sock_owned_by_user(sk));
-		if (!sock_owned_by_user(sk))
-			ret = dn_nsp_backlog_rcv(sk, skb);
-		else
-			sk_add_backlog(sk, skb);
-		bh_unlock_sock(sk);
-		sock_put(sk);
-
-		return ret;
+		return sk_receive_skb(sk, skb);
 	}
 
 	return dn_nsp_no_socket(skb, reason);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 1ab94c6e22e..16a5a31e212 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -26,8 +26,6 @@
 #include <net/dn.h>
 #include <net/dn_route.h>
 
-#include <linux/netfilter_decnet.h>
-
 static struct sock *dnrmg = NULL;
 
 
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 02bca49cb50..0e9d2c57116 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -10,6 +10,7 @@
  *
  * Changes:
  * Steve Whitehouse - C99 changes and default device handling
+ * Steve Whitehouse - Memory buffer settings, like the tcp ones
  *
  */
 #include <linux/config.h>
@@ -37,6 +38,11 @@ int decnet_dr_count = 3;
 int decnet_log_martians = 1;
 int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
 
+/* Reasonable defaults, I hope, based on tcp's defaults */
+int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
+int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
+
 #ifdef CONFIG_SYSCTL
 extern int decnet_dst_gc_interval;
 static int min_decnet_time_wait[] = { 5 };
@@ -428,6 +434,33 @@ static ctl_table dn_table[] = {
 		.extra1 = &min_decnet_no_fc_max_cwnd,
 		.extra2 = &max_decnet_no_fc_max_cwnd
 	},
+       {
+                .ctl_name = NET_DECNET_MEM,
+                .procname = "decnet_mem",
+                .data = &sysctl_decnet_mem,
+                .maxlen = sizeof(sysctl_decnet_mem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = NET_DECNET_RMEM,
+                .procname = "decnet_rmem",
+                .data = &sysctl_decnet_rmem,
+                .maxlen = sizeof(sysctl_decnet_rmem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = NET_DECNET_WMEM,
+                .procname = "decnet_wmem",
+                .data = &sysctl_decnet_wmem,
+                .maxlen = sizeof(sysctl_decnet_wmem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 	{
 		.ctl_name = NET_DECNET_DEBUG_LEVEL,
 		.procname = "debug",
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 34fdac51df9..c792994d795 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
 #include <linux/if_arp.h>
 #include <linux/wireless.h>
 #include <linux/skbuff.h>
+#include <linux/udp.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <linux/stat.h>
@@ -45,7 +46,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-static struct proto_ops econet_ops;
+static const struct proto_ops econet_ops;
 static struct hlist_head econet_sklist;
 static DEFINE_RWLOCK(econet_lock);
 
@@ -56,7 +57,7 @@ static struct net_device *net2dev_map[256];
 #define EC_PORT_IP	0xd2
 
 #ifdef CONFIG_ECONET_AUNUDP
-static spinlock_t aun_queue_lock;
+static DEFINE_SPINLOCK(aun_queue_lock);
 static struct socket *udpsock;
 #define AUN_PORT	0x8000
 
@@ -686,7 +687,7 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
 			break;
 
 		default:
-			return dev_ioctl(cmd, argp);
+			return -ENOIOCTLCMD;
 	}
 	/*NOTREACHED*/
 	return 0;
@@ -698,7 +699,7 @@ static struct net_proto_family econet_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
 	.family =	PF_ECONET,
 	.owner =	THIS_MODULE,
 	.release =	econet_release,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index e2457736727..9890fd97e53 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -53,6 +53,7 @@
 #include <linux/errno.h>
 #include <linux/config.h>
 #include <linux/init.h>
+#include <linux/if_ether.h>
 #include <net/dst.h>
 #include <net/arp.h>
 #include <net/sock.h>
@@ -162,7 +163,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	skb_pull(skb,ETH_HLEN);
 	eth = eth_hdr(skb);
 	
-	if (*eth->h_dest&1) {
+	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
 			skb->pkt_type = PACKET_BROADCAST;
 		else
@@ -251,7 +252,7 @@ static int eth_mac_addr(struct net_device *dev, void *p)
 
 static int eth_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if ((new_mtu < 68) || (new_mtu > 1500))
+	if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
 		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
@@ -272,7 +273,7 @@ void ether_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_ETHER;
 	dev->hard_header_len 	= ETH_HLEN;
-	dev->mtu		= 1500; /* eth_mtu */
+	dev->mtu		= ETH_DATA_LEN;
 	dev->addr_len		= ETH_ALEN;
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 91b16fbf91f..d18ccba3ea9 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -55,7 +55,7 @@ config IEEE80211_CRYPT_CCMP
 
 config IEEE80211_CRYPT_TKIP
 	tristate "IEEE 802.11i TKIP encryption"
-	depends on IEEE80211
+	depends on IEEE80211 && NET_RADIO
 	select CRYPTO
 	select CRYPTO_MICHAEL_MIC
 	---help---
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 073aebdf0f6..f8dca31be5d 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -75,22 +75,14 @@ static void prism2_wep_deinit(void *priv)
 	kfree(priv);
 }
 
-/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
- * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
- * so the payload length increases with 8 bytes.
- *
- * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
- */
-static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
+static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
-	u32 crc, klen, len;
-	u8 key[WEP_KEY_LEN + 3];
-	u8 *pos, *icv;
-	struct scatterlist sg;
-
-	if (skb_headroom(skb) < 4 || skb_tailroom(skb) < 4 ||
-	    skb->len < hdr_len)
+	u32 klen, len;
+	u8 *pos;
+	
+	if (skb_headroom(skb) < 4 || skb->len < hdr_len)
 		return -1;
 
 	len = skb->len - hdr_len;
@@ -112,15 +104,47 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	}
 
 	/* Prepend 24-bit IV to RC4 key and TX frame */
-	*pos++ = key[0] = (wep->iv >> 16) & 0xff;
-	*pos++ = key[1] = (wep->iv >> 8) & 0xff;
-	*pos++ = key[2] = wep->iv & 0xff;
+	*pos++ = (wep->iv >> 16) & 0xff;
+	*pos++ = (wep->iv >> 8) & 0xff;
+	*pos++ = wep->iv & 0xff;
 	*pos++ = wep->key_idx << 6;
 
+	return 0;
+}
+
+/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
+ * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
+ * so the payload length increases with 8 bytes.
+ *
+ * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
+ */
+static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct prism2_wep_data *wep = priv;
+	u32 crc, klen, len;
+	u8 *pos, *icv;
+	struct scatterlist sg;
+	u8 key[WEP_KEY_LEN + 3];
+
+	/* other checks are in prism2_wep_build_iv */
+	if (skb_tailroom(skb) < 4)
+		return -1;
+	
+	/* add the IV to the frame */
+	if (prism2_wep_build_iv(skb, hdr_len, priv))
+		return -1;
+	
+	/* Copy the IV into the first 3 bytes of the key */
+	memcpy(key, skb->data + hdr_len, 3);
+
 	/* Copy rest of the WEP key (the secret part) */
 	memcpy(key + 3, wep->key, wep->key_len);
+	
+	len = skb->len - hdr_len - 4;
+	pos = skb->data + hdr_len + 4;
+	klen = 3 + wep->key_len;
 
-	/* Append little-endian CRC32 and encrypt it to produce ICV */
+	/* Append little-endian CRC32 over only the data and encrypt it to produce ICV */
 	crc = ~crc32_le(~0, pos, len);
 	icv = skb_put(skb, 4);
 	icv[0] = crc;
@@ -231,6 +255,7 @@ static struct ieee80211_crypto_ops ieee80211_crypt_wep = {
 	.name = "WEP",
 	.init = prism2_wep_init,
 	.deinit = prism2_wep_deinit,
+	.build_iv = prism2_wep_build_iv,
 	.encrypt_mpdu = prism2_wep_encrypt,
 	.decrypt_mpdu = prism2_wep_decrypt,
 	.encrypt_msdu = NULL,
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 321287bc887..90d18b72da3 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -62,7 +62,7 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION);
 MODULE_AUTHOR(DRV_COPYRIGHT);
 MODULE_LICENSE("GPL");
 
-static inline int ieee80211_networks_allocate(struct ieee80211_device *ieee)
+static int ieee80211_networks_allocate(struct ieee80211_device *ieee)
 {
 	if (ieee->networks)
 		return 0;
@@ -90,7 +90,7 @@ static inline void ieee80211_networks_free(struct ieee80211_device *ieee)
 	ieee->networks = NULL;
 }
 
-static inline void ieee80211_networks_initialize(struct ieee80211_device *ieee)
+static void ieee80211_networks_initialize(struct ieee80211_device *ieee)
 {
 	int i;
 
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 03efaacbdb7..7a121802faa 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -35,7 +35,7 @@
 
 #include <net/ieee80211.h>
 
-static inline void ieee80211_monitor_rx(struct ieee80211_device *ieee,
+static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
 					struct sk_buff *skb,
 					struct ieee80211_rx_stats *rx_stats)
 {
@@ -76,8 +76,8 @@ static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct
 
 		if (entry->skb != NULL && entry->seq == seq &&
 		    (entry->last_frag + 1 == frag || frag == -1) &&
-		    memcmp(entry->src_addr, src, ETH_ALEN) == 0 &&
-		    memcmp(entry->dst_addr, dst, ETH_ALEN) == 0)
+		    !compare_ether_addr(entry->src_addr, src) &&
+		    !compare_ether_addr(entry->dst_addr, dst))
 			return entry;
 	}
 
@@ -165,7 +165,7 @@ static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee,
  * Responsible for handling management control frames
  *
  * Called by ieee80211_rx */
-static inline int
+static int
 ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb,
 			struct ieee80211_rx_stats *rx_stats, u16 type,
 			u16 stype)
@@ -243,12 +243,12 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
 	/* check that the frame is unicast frame to us */
 	if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
 	    IEEE80211_FCTL_TODS &&
-	    memcmp(hdr->addr1, dev->dev_addr, ETH_ALEN) == 0 &&
-	    memcmp(hdr->addr3, dev->dev_addr, ETH_ALEN) == 0) {
+	    !compare_ether_addr(hdr->addr1, dev->dev_addr) &&
+	    !compare_ether_addr(hdr->addr3, dev->dev_addr)) {
 		/* ToDS frame with own addr BSSID and DA */
 	} else if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
 		   IEEE80211_FCTL_FROMDS &&
-		   memcmp(hdr->addr1, dev->dev_addr, ETH_ALEN) == 0) {
+		   !compare_ether_addr(hdr->addr1, dev->dev_addr)) {
 		/* FromDS frame with own addr as DA */
 	} else
 		return 0;
@@ -266,7 +266,7 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
 }
 
 /* Called only as a tasklet (software IRQ), by ieee80211_rx */
-static inline int
+static int
 ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
 			   struct ieee80211_crypt_data *crypt)
 {
@@ -297,7 +297,7 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
 }
 
 /* Called only as a tasklet (software IRQ), by ieee80211_rx */
-static inline int
+static int
 ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee,
 				struct sk_buff *skb, int keyidx,
 				struct ieee80211_crypt_data *crypt)
@@ -410,9 +410,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		return 1;
 	}
 
-	if ((is_multicast_ether_addr(hdr->addr1) ||
-	     is_broadcast_ether_addr(hdr->addr2)) ? ieee->host_mc_decrypt :
-	    ieee->host_decrypt) {
+	if (is_multicast_ether_addr(hdr->addr1)
+	    ? ieee->host_mc_decrypt : ieee->host_decrypt) {
 		int idx = 0;
 		if (skb->len >= hdrlen + 3)
 			idx = skb->data[hdrlen + 3] >> 6;
@@ -506,7 +505,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	if (ieee->iw_mode == IW_MODE_MASTER && !wds &&
 	    (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
 	    IEEE80211_FCTL_FROMDS && ieee->stadev
-	    && memcmp(hdr->addr2, ieee->assoc_ap_addr, ETH_ALEN) == 0) {
+	    && !compare_ether_addr(hdr->addr2, ieee->assoc_ap_addr)) {
 		/* Frame from BSSID of the AP for which we are a client */
 		skb->dev = dev = ieee->stadev;
 		stats = hostap_get_stats(dev);
@@ -1157,7 +1156,7 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee
 
 /***************************************************/
 
-static inline int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee80211_probe_response
+static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee80211_probe_response
 					 *beacon,
 					 struct ieee80211_network *network,
 					 struct ieee80211_rx_stats *stats)
@@ -1232,11 +1231,11 @@ static inline int is_same_network(struct ieee80211_network *src,
 	 * as one network */
 	return ((src->ssid_len == dst->ssid_len) &&
 		(src->channel == dst->channel) &&
-		!memcmp(src->bssid, dst->bssid, ETH_ALEN) &&
+		!compare_ether_addr(src->bssid, dst->bssid) &&
 		!memcmp(src->ssid, dst->ssid, src->ssid_len));
 }
 
-static inline void update_network(struct ieee80211_network *dst,
+static void update_network(struct ieee80211_network *dst,
 				  struct ieee80211_network *src)
 {
 	int qos_active;
@@ -1295,7 +1294,7 @@ static inline int is_beacon(int fc)
 	return (WLAN_FC_GET_STYPE(le16_to_cpu(fc)) == IEEE80211_STYPE_BEACON);
 }
 
-static inline void ieee80211_process_probe_response(struct ieee80211_device
+static void ieee80211_process_probe_response(struct ieee80211_device
 						    *ieee, struct
 						    ieee80211_probe_response
 						    *beacon, struct ieee80211_rx_stats
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 445f206e65e..8fdd943ebe8 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -127,7 +127,7 @@ payload of each frame is reduced to 492 bytes.
 static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 };
 static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 };
 
-static inline int ieee80211_copy_snap(u8 * data, u16 h_proto)
+static int ieee80211_copy_snap(u8 * data, u16 h_proto)
 {
 	struct ieee80211_snap_hdr *snap;
 	u8 *oui;
@@ -150,7 +150,7 @@ static inline int ieee80211_copy_snap(u8 * data, u16 h_proto)
 	return SNAP_SIZE + sizeof(u16);
 }
 
-static inline int ieee80211_encrypt_fragment(struct ieee80211_device *ieee,
+static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee,
 					     struct sk_buff *frag, int hdr_len)
 {
 	struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx];
@@ -288,7 +288,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Determine total amount of storage required for TXB packets */
 	bytes = skb->len + SNAP_SIZE + sizeof(u16);
 
-	if (host_encrypt)
+	if (host_encrypt || host_build_iv)
 		fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
 		    IEEE80211_FCTL_PROTECTED;
 	else
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 181755f2aa8..23e1630f50b 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -42,7 +42,7 @@ static const char *ieee80211_modes[] = {
 };
 
 #define MAX_CUSTOM_LEN 64
-static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee,
+static char *ipw2100_translate_scan(struct ieee80211_device *ieee,
 					   char *start, char *stop,
 					   struct ieee80211_network *network)
 {
@@ -284,7 +284,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 	};
 	int i, key, key_provided, len;
 	struct ieee80211_crypt_data **crypt;
-	int host_crypto = ieee->host_encrypt || ieee->host_decrypt;
+	int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv;
 
 	IEEE80211_DEBUG_WX("SET_ENCODE\n");
 
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e55136ae09f..011cca7ae02 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -456,6 +456,14 @@ config TCP_CONG_BIC
 	increase provides TCP friendliness.
 	See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
 
+config TCP_CONG_CUBIC
+	tristate "CUBIC TCP"
+	default m
+	---help---
+	This is version 2.0 of BIC-TCP which uses a cubic growth function
+	among other techniques.
+	See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+
 config TCP_CONG_WESTWOOD
 	tristate "TCP Westwood+"
 	default m
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f0435d00db6..35e5f599909 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -9,7 +9,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -28,12 +28,13 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
-obj-$(CONFIG_NETFILTER)	+= netfilter/
+obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
 obj-$(CONFIG_IP_VS) += ipvs/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
 obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
 obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaa150c33b0..97c276f95b3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -79,6 +79,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/capability.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -93,6 +94,7 @@
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -228,13 +230,14 @@ static int inet_create(struct socket *sock, int protocol)
 	unsigned char answer_flags;
 	char answer_no_check;
 	int try_loading_module = 0;
-	int err = -ESOCKTNOSUPPORT;
+	int err;
 
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
 	answer = NULL;
 lookup_protocol:
+	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
 	list_for_each_rcu(p, &inetsw[sock->type]) {
 		answer = list_entry(p, struct inet_protosw, list);
@@ -252,6 +255,7 @@ lookup_protocol:
 			if (IPPROTO_IP == answer->protocol)
 				break;
 		}
+		err = -EPROTONOSUPPORT;
 		answer = NULL;
 	}
 
@@ -280,9 +284,6 @@ lookup_protocol:
 	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
-	err = -EPROTONOSUPPORT;
-	if (!protocol)
-		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
@@ -303,6 +304,7 @@ lookup_protocol:
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
@@ -776,16 +778,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			err = devinet_ioctl(cmd, (void __user *)arg);
 			break;
 		default:
-			if (!sk->sk_prot->ioctl ||
-			    (err = sk->sk_prot->ioctl(sk, cmd, arg)) ==
-			    					-ENOIOCTLCMD)
-				err = dev_ioctl(cmd, (void __user *)arg);
+			if (sk->sk_prot->ioctl)
+				err = sk->sk_prot->ioctl(sk, cmd, arg);
+			else
+				err = -ENOIOCTLCMD;
 			break;
 	}
 	return err;
 }
 
-struct proto_ops inet_stream_ops = {
+const struct proto_ops inet_stream_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -806,7 +808,7 @@ struct proto_ops inet_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet_dgram_ops = {
+const struct proto_ops inet_dgram_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -831,7 +833,7 @@ struct proto_ops inet_dgram_ops = {
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
  * udp_poll
  */
-static struct proto_ops inet_sockraw_ops = {
+static const struct proto_ops inet_sockraw_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -870,7 +872,8 @@ static struct inet_protosw inetsw_array[] =
                 .ops =        &inet_stream_ops,
                 .capability = -1,
                 .no_check =   0,
-                .flags =      INET_PROTOSW_PERMANENT,
+                .flags =      INET_PROTOSW_PERMANENT |
+			      INET_PROTOSW_ICSK,
         },
 
         {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1b..aed537fa2c8 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <asm/scatterlist.h>
 
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d..accdefedfed 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -86,6 +87,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a..95b9d81ac48 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -58,6 +59,7 @@
 #endif
 #include <linux/kmod.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b..73bfcae8af9 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <net/udp.h>
 
 /* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 882f88f6d13..4e3d3811dea 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -20,6 +20,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -30,6 +31,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
@@ -287,13 +289,13 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
 {
 	int i;
 
-	for (i=1; i<=RTA_MAX; i++) {
-		struct rtattr *attr = rta[i-1];
+	for (i=1; i<=RTA_MAX; i++, rta++) {
+		struct rtattr *attr = *rta;
 		if (attr) {
 			if (RTA_PAYLOAD(attr) < 4)
 				return -EINVAL;
 			if (i != RTA_MULTIPATH && i != RTA_METRICS)
-				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
+				*rta = (struct rtattr*)RTA_DATA(attr);
 		}
 	}
 	return 0;
@@ -544,12 +546,16 @@ static void nl_fib_input(struct sock *sk, int len)
 	struct sk_buff *skb = NULL;
         struct nlmsghdr *nlh = NULL;
 	struct fib_result_nl *frn;
-	int err;
 	u32 pid;     
 	struct fib_table *tb;
 	
-	skb = skb_recv_datagram(sk, 0, 0, &err);
+	skb = skb_dequeue(&sk->sk_receive_queue);
 	nlh = (struct nlmsghdr *)skb->data;
+	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
+	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
+		kfree_skb(skb);
+		return;
+	}
 	
 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
 	tb = fib_get_table(frn->tb_id_in);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb16..e2890ec8159 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc151..0dd4d06e456 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e..ef4724de735 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
 #include <linux/netlink.h>
 #include <linux/init.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 705e3ce86df..e320b32373e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -41,6 +41,13 @@
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
+ *
+ * Substantial contributions to this work comes from:
+ *
+ *		David S. Miller, <davem@davemloft.net>
+ *		Stephen Hemminger <shemminger@osdl.org>
+ *		Paul E. McKenney <paulmck@us.ibm.com>
+ *		Patrick McHardy <kaber@trash.net>
  */
 
 #define VERSION "0.404"
@@ -59,6 +66,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d..105039eb762 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
@@ -898,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb)
 		u32 _mask, *mp;
 
 		mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
-		if (mp == NULL)
-			BUG();
+		BUG_ON(mp == NULL);
 		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 			if (*mp == ifa->ifa_mask &&
 			    inet_ifa_match(rt->rt_src, ifa))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c04607b4921..d8ce7133cd8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/times.h>
+
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -231,7 +233,18 @@ static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
 	case IGMPV3_MODE_IS_EXCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
-		return !(pmc->gsquery && !psf->sf_gsresp);
+		if (!(pmc->gsquery && !psf->sf_gsresp)) {
+			if (pmc->sfmode == MCAST_INCLUDE)
+				return 1;
+			/* don't include if this source is excluded
+			 * in all filters
+			 */
+			if (psf->sf_count[MCAST_INCLUDE])
+				return type == IGMPV3_MODE_IS_INCLUDE;
+			return pmc->sfcount[MCAST_EXCLUDE] ==
+				psf->sf_count[MCAST_EXCLUDE];
+		}
+		return 0;
 	case IGMPV3_CHANGE_TO_INCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
@@ -383,7 +396,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	struct igmpv3_report *pih;
 	struct igmpv3_grec *pgr = NULL;
 	struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
-	int scount, first, isquery, truncate;
+	int scount, stotal, first, isquery, truncate;
 
 	if (pmc->multiaddr == IGMP_ALL_HOSTS)
 		return skb;
@@ -393,25 +406,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
 		    type == IGMPV3_CHANGE_TO_EXCLUDE;
 
+	stotal = scount = 0;
+
 	psf_list = sdeleted ? &pmc->tomb : &pmc->sources;
 
-	if (!*psf_list) {
-		if (type == IGMPV3_ALLOW_NEW_SOURCES ||
-		    type == IGMPV3_BLOCK_OLD_SOURCES)
-			return skb;
-		if (pmc->crcount || isquery) {
-			/* make sure we have room for group header and at
-			 * least one source.
-			 */
-			if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)+
-			    sizeof(__u32)) {
-				igmpv3_sendpack(skb);
-				skb = NULL; /* add_grhead will get a new one */
-			}
-			skb = add_grhead(skb, pmc, type, &pgr);
-		}
-		return skb;
-	}
+	if (!*psf_list)
+		goto empty_source;
+
 	pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
@@ -424,7 +425,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 	}
 	first = 1;
-	scount = 0;
 	psf_prev = NULL;
 	for (psf=*psf_list; psf; psf=psf_next) {
 		u32 *psrc;
@@ -458,7 +458,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 		psrc = (u32 *)skb_put(skb, sizeof(u32));
 		*psrc = psf->sf_inaddr;
-		scount++;
+		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
 		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
 			psf->sf_crcount--;
@@ -473,6 +473,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 		psf_prev = psf;
 	}
+
+empty_source:
+	if (!stotal) {
+		if (type == IGMPV3_ALLOW_NEW_SOURCES ||
+		    type == IGMPV3_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->crcount || isquery) {
+			/* make sure we have room for group header */
+			if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) {
+				igmpv3_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+	}
 	if (pgr)
 		pgr->grec_nsrcs = htons(scount);
 
@@ -555,11 +570,11 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 			skb = add_grec(skb, pmc, dtype, 1, 1);
 		}
 		if (pmc->crcount) {
-			pmc->crcount--;
 			if (pmc->sfmode == MCAST_EXCLUDE) {
 				type = IGMPV3_CHANGE_TO_INCLUDE;
 				skb = add_grec(skb, pmc, type, 1, 0);
 			}
+			pmc->crcount--;
 			if (pmc->crcount == 0) {
 				igmpv3_clear_zeros(&pmc->tomb);
 				igmpv3_clear_zeros(&pmc->sources);
@@ -592,12 +607,12 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 
 		/* filter mode changes */
 		if (pmc->crcount) {
-			pmc->crcount--;
 			if (pmc->sfmode == MCAST_EXCLUDE)
 				type = IGMPV3_CHANGE_TO_EXCLUDE;
 			else
 				type = IGMPV3_CHANGE_TO_INCLUDE;
 			skb = add_grec(skb, pmc, type, 0, 0);
+			pmc->crcount--;
 		}
 		spin_unlock_bh(&pmc->lock);
 	}
@@ -733,7 +748,8 @@ static void igmp_timer_expire(unsigned long data)
 	ip_ma_put(im);
 }
 
-static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+/* mark EXCLUDE-mode sources */
+static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 {
 	struct ip_sf_list *psf;
 	int i, scount;
@@ -742,6 +758,37 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 	for (psf=pmc->sources; psf; psf=psf->sf_next) {
 		if (scount == nsrcs)
 			break;
+		for (i=0; i<nsrcs; i++) {
+			/* skip inactive filters */
+			if (pmc->sfcount[MCAST_INCLUDE] ||
+			    pmc->sfcount[MCAST_EXCLUDE] !=
+			    psf->sf_count[MCAST_EXCLUDE])
+				continue;
+			if (srcs[i] == psf->sf_inaddr) {
+				scount++;
+				break;
+			}
+		}
+	}
+	pmc->gsquery = 0;
+	if (scount == nsrcs)	/* all sources excluded */
+		return 0;
+	return 1;
+}
+
+static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+{
+	struct ip_sf_list *psf;
+	int i, scount;
+
+	if (pmc->sfmode == MCAST_EXCLUDE)
+		return igmp_xmarksources(pmc, nsrcs, srcs);
+
+	/* mark INCLUDE-mode sources */
+	scount = 0;
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
 		for (i=0; i<nsrcs; i++)
 			if (srcs[i] == psf->sf_inaddr) {
 				psf->sf_gsresp = 1;
@@ -749,6 +796,12 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 				break;
 			}
 	}
+	if (!scount) {
+		pmc->gsquery = 0;
+		return 0;
+	}
+	pmc->gsquery = 1;
+	return 1;
 }
 
 static void igmp_heard_report(struct in_device *in_dev, u32 group)
@@ -843,6 +896,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	 */
 	read_lock(&in_dev->mc_list_lock);
 	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+		int changed;
+
 		if (group && group != im->multiaddr)
 			continue;
 		if (im->multiaddr == IGMP_ALL_HOSTS)
@@ -852,10 +907,11 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 			im->gsquery = im->gsquery && mark;
 		else
 			im->gsquery = mark;
-		if (im->gsquery)
-			igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
+		changed = !im->gsquery ||
+		    	igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
 		spin_unlock_bh(&im->lock);
-		igmp_mod_timer(im, max_delay);
+		if (changed)
+			igmp_mod_timer(im, max_delay);
 	}
 	read_unlock(&in_dev->mc_list_lock);
 }
@@ -897,7 +953,10 @@ int igmp_rcv(struct sk_buff *skb)
 		/* Is it our report looped back? */
 		if (((struct rtable*)skb->dst)->fl.iif == 0)
 			break;
-		igmp_heard_report(in_dev, ih->group);
+		/* don't rely on MC router hearing unicast reports */
+		if (skb->pkt_type == PACKET_MULTICAST ||
+		    skb->pkt_type == PACKET_BROADCAST)
+			igmp_heard_report(in_dev, ih->group);
 		break;
 	case IGMP_PIM:
 #ifdef CONFIG_IP_PIMSM_V1
@@ -970,7 +1029,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = (struct ip_mc_list *)kmalloc(sizeof(*pmc), GFP_KERNEL);
+	pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
 	if (!pmc)
 		return;
 	memset(pmc, 0, sizeof(*pmc));
@@ -1150,7 +1209,7 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
 		}
 	}
 
-	im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL);
+	im = kmalloc(sizeof(*im), GFP_KERNEL);
 	if (!im)
 		goto out;
 
@@ -1471,7 +1530,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
 		psf_prev = psf;
 	}
 	if (!psf) {
-		psf = (struct ip_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+		psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
 		if (!psf)
 			return -ENOBUFS;
 		memset(psf, 0, sizeof(*psf));
@@ -1505,7 +1564,7 @@ static void sf_markstate(struct ip_mc_list *pmc)
 
 static int sf_setstate(struct ip_mc_list *pmc)
 {
-	struct ip_sf_list *psf;
+	struct ip_sf_list *psf, *dpsf;
 	int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
 	int qrv = pmc->interface->mr_qrv;
 	int new_in, rv;
@@ -1517,8 +1576,46 @@ static int sf_setstate(struct ip_mc_list *pmc)
 				!psf->sf_count[MCAST_INCLUDE];
 		} else
 			new_in = psf->sf_count[MCAST_INCLUDE] != 0;
-		if (new_in != psf->sf_oldin) {
-			psf->sf_crcount = qrv;
+		if (new_in) {
+			if (!psf->sf_oldin) {
+				struct ip_sf_list *prev = 0;
+
+				for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) {
+					if (dpsf->sf_inaddr == psf->sf_inaddr)
+						break;
+					prev = dpsf;
+				}
+				if (dpsf) {
+					if (prev)
+						prev->sf_next = dpsf->sf_next;
+					else
+						pmc->tomb = dpsf->sf_next;
+					kfree(dpsf);
+				}
+				psf->sf_crcount = qrv;
+				rv++;
+			}
+		} else if (psf->sf_oldin) {
+
+			psf->sf_crcount = 0;
+			/*
+			 * add or update "delete" records if an active filter
+			 * is now inactive
+			 */
+			for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next)
+				if (dpsf->sf_inaddr == psf->sf_inaddr)
+					break;
+			if (!dpsf) {
+				dpsf = (struct ip_sf_list *)
+					kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				if (!dpsf)
+					continue;
+				*dpsf = *psf;
+				/* pmc->lock held by callers */
+				dpsf->sf_next = pmc->tomb;
+				pmc->tomb = dpsf;
+			}
+			dpsf->sf_crcount = qrv;
 			rv++;
 		}
 	}
@@ -1654,7 +1751,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	err = -ENOBUFS;
 	if (count >= sysctl_igmp_max_memberships)
 		goto done;
-	iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
+	iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
 	if (iml == NULL)
 		goto done;
 
@@ -1818,8 +1915,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 
 		if (psl)
 			count += psl->sl_max;
-		newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
-			IP_SFLSIZE(count), GFP_KERNEL);
+		newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -1902,8 +1998,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 		goto done;
 	}
 	if (msf->imsf_numsrc) {
-		newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
-				IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL);
+		newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
+							   GFP_KERNEL);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3fe021f1a56..ae20281d8de 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
  */
 int sysctl_local_port_range[2] = { 1024, 4999 };
 
-static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
+int inet_csk_bind_conflict(const struct sock *sk,
+			   const struct inet_bind_bucket *tb)
 {
 	const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
 	struct sock *sk2;
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke
 	return node != NULL;
 }
 
+EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
+
 /* Obtain a reference to a local port for the given sock,
  * if snum is zero it means select any available local port.
  */
 int inet_csk_get_port(struct inet_hashinfo *hashinfo,
-		      struct sock *sk, unsigned short snum)
+		      struct sock *sk, unsigned short snum,
+		      int (*bind_conflict)(const struct sock *sk,
+					   const struct inet_bind_bucket *tb))
 {
 	struct inet_bind_hashbucket *head;
 	struct hlist_node *node;
@@ -125,7 +130,7 @@ tb_found:
 			goto success;
 		} else {
 			ret = 1;
-			if (inet_csk_bind_conflict(sk, tb))
+			if (bind_conflict(sk, tb))
 				goto fail_unlock;
 		}
 	}
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 EXPORT_SYMBOL_GPL(inet_csk_search_req);
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
-				   const unsigned timeout)
+				   unsigned long timeout)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk)
 }
 
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
+
+void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+	const struct inet_sock *inet = inet_sk(sk);
+
+	sin->sin_family		= AF_INET;
+	sin->sin_addr.s_addr	= inet->daddr;
+	sin->sin_port		= inet->dport;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cf..457db99c76d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -50,9 +50,10 @@ static struct sock *idiagnl;
 #define INET_DIAG_PUT(skb, attrtype, attrlen) \
 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
 
-static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
-			int ext, u32 pid, u32 seq, u16 nlmsg_flags,
-			const struct nlmsghdr *unlh)
+static int inet_csk_diag_fill(struct sock *sk,
+			      struct sk_buff *skb,
+			      int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+			      const struct nlmsghdr *unlh)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
 	nlh->nlmsg_flags = nlmsg_flags;
 
 	r = NLMSG_DATA(nlh);
-	if (sk->sk_state != TCP_TIME_WAIT) {
-		if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
-			minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO,
-					      sizeof(*minfo));
-		if (ext & (1 << (INET_DIAG_INFO - 1)))
-			info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
-					   handler->idiag_info_size);
-		
-		if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
-			size_t len = strlen(icsk->icsk_ca_ops->name);
-			strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
-			       icsk->icsk_ca_ops->name);
-		}
+	BUG_ON(sk->sk_state == TCP_TIME_WAIT);
+
+	if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
+		minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
+
+	if (ext & (1 << (INET_DIAG_INFO - 1)))
+		info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
+				     handler->idiag_info_size);
+
+	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+		const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+		strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+		       icsk->icsk_ca_ops->name);
 	}
+
 	r->idiag_family = sk->sk_family;
 	r->idiag_state = sk->sk_state;
 	r->idiag_timer = 0;
@@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
 	r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
 	r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
 
-	if (r->idiag_state == TCP_TIME_WAIT) {
-		const struct inet_timewait_sock *tw = inet_twsk(sk);
-		long tmo = tw->tw_ttd - jiffies;
-		if (tmo < 0)
-			tmo = 0;
-
-		r->id.idiag_sport = tw->tw_sport;
-		r->id.idiag_dport = tw->tw_dport;
-		r->id.idiag_src[0] = tw->tw_rcv_saddr;
-		r->id.idiag_dst[0] = tw->tw_daddr;
-		r->idiag_state = tw->tw_substate;
-		r->idiag_timer = 3;
-		r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
-		r->idiag_rqueue = 0;
-		r->idiag_wqueue = 0;
-		r->idiag_uid = 0;
-		r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-		if (r->idiag_family == AF_INET6) {
-			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
-
-			ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-				       &tcp6tw->tw_v6_rcv_saddr);
-			ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-				       &tcp6tw->tw_v6_daddr);
-		}
-#endif
-		nlh->nlmsg_len = skb->tail - b;
-		return skb->len;
-	}
-
 	r->id.idiag_sport = inet->sport;
 	r->id.idiag_dport = inet->dport;
 	r->id.idiag_src[0] = inet->rcv_saddr;
@@ -185,7 +157,75 @@ nlmsg_failure:
 	return -1;
 }
 
-static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
+			       struct sk_buff *skb, int ext, u32 pid,
+			       u32 seq, u16 nlmsg_flags,
+			       const struct nlmsghdr *unlh)
+{
+	long tmo;
+	struct inet_diag_msg *r;
+	const unsigned char *previous_tail = skb->tail;
+	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
+					 unlh->nlmsg_type, sizeof(*r));
+
+	r = NLMSG_DATA(nlh);
+	BUG_ON(tw->tw_state != TCP_TIME_WAIT);
+
+	nlh->nlmsg_flags = nlmsg_flags;
+
+	tmo = tw->tw_ttd - jiffies;
+	if (tmo < 0)
+		tmo = 0;
+
+	r->idiag_family	      = tw->tw_family;
+	r->idiag_state	      = tw->tw_state;
+	r->idiag_timer	      = 0;
+	r->idiag_retrans      = 0;
+	r->id.idiag_if	      = tw->tw_bound_dev_if;
+	r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
+	r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+	r->id.idiag_sport     = tw->tw_sport;
+	r->id.idiag_dport     = tw->tw_dport;
+	r->id.idiag_src[0]    = tw->tw_rcv_saddr;
+	r->id.idiag_dst[0]    = tw->tw_daddr;
+	r->idiag_state	      = tw->tw_substate;
+	r->idiag_timer	      = 3;
+	r->idiag_expires      = (tmo * 1000 + HZ - 1) / HZ;
+	r->idiag_rqueue	      = 0;
+	r->idiag_wqueue	      = 0;
+	r->idiag_uid	      = 0;
+	r->idiag_inode	      = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (tw->tw_family == AF_INET6) {
+		const struct inet6_timewait_sock *tw6 =
+						inet6_twsk((struct sock *)tw);
+
+		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+			       &tw6->tw_v6_rcv_saddr);
+		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+			       &tw6->tw_v6_daddr);
+	}
+#endif
+	nlh->nlmsg_len = skb->tail - previous_tail;
+	return skb->len;
+nlmsg_failure:
+	skb_trim(skb, previous_tail - skb->data);
+	return -1;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+			const struct nlmsghdr *unlh)
+{
+	if (sk->sk_state == TCP_TIME_WAIT)
+		return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
+					   skb, ext, pid, seq, nlmsg_flags,
+					   unlh);
+	return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+}
+
+static int inet_diag_get_exact(struct sk_buff *in_skb,
+			       const struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock *sk;
@@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl
 	if (!rep)
 		goto out;
 
-	if (inet_diag_fill(rep, sk, req->idiag_ext,
+	if (sk_diag_fill(sk, rep, req->idiag_ext,
 			 NETLINK_CB(in_skb).pid,
 			 nlh->nlmsg_seq, 0, nlh) <= 0)
 		BUG();
@@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
 
 
 static int inet_diag_bc_run(const void *bc, int len,
-			  const struct inet_diag_entry *entry)
+			    const struct inet_diag_entry *entry)
 {
 	while (len > 0) {
 		int yes = 1;
@@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len,
 				yes = 0;
 				break;
 			}
-			
+
 			if (cond->prefix_len == 0)
 				break;
 
@@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len,
 			else
 				addr = entry->daddr;
 
-			if (bitstring_match(addr, cond->addr, cond->prefix_len))
+			if (bitstring_match(addr, cond->addr,
+					    cond->prefix_len))
 				break;
 			if (entry->family == AF_INET6 &&
 			    cond->family == AF_INET) {
@@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len,
 		}
 		}
 
-		if (yes) { 
+		if (yes) {
 			len -= op->yes;
 			bc += op->yes;
 		} else {
@@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 		default:
 			return -EINVAL;
 		}
-		bc += op->yes;
+		bc  += op->yes;
 		len -= op->yes;
 	}
 	return len == 0 ? 0 : -EINVAL;
 }
 
-static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
-			     struct netlink_callback *cb)
+static int inet_csk_diag_dump(struct sock *sk,
+			      struct sk_buff *skb,
+			      struct netlink_callback *cb)
 {
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 
@@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
 			return 0;
 	}
 
-	return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid,
-			    cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+	return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+				  NETLINK_CB(cb->skb).pid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
+			       struct sk_buff *skb,
+			       struct netlink_callback *cb)
+{
+	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+
+	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+		struct inet_diag_entry entry;
+		struct rtattr *bc = (struct rtattr *)(r + 1);
+
+		entry.family = tw->tw_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		if (tw->tw_family == AF_INET6) {
+			struct inet6_timewait_sock *tw6 =
+						inet6_twsk((struct sock *)tw);
+			entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
+			entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+		} else
+#endif
+		{
+			entry.saddr = &tw->tw_rcv_saddr;
+			entry.daddr = &tw->tw_daddr;
+		}
+		entry.sport = tw->tw_num;
+		entry.dport = ntohs(tw->tw_dport);
+		entry.userlocks = 0; 
+
+		if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+			return 0;
+	}
+
+	return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+				   NETLINK_CB(cb->skb).pid,
+				   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
 
 static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
-			    struct request_sock *req,
-			    u32 pid, u32 seq,
-			    const struct nlmsghdr *unlh)
+			      struct request_sock *req, u32 pid, u32 seq,
+			      const struct nlmsghdr *unlh)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *inet = inet_sk(sk);
@@ -489,9 +567,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &tcp6_rsk(req)->loc_addr);
+			       &inet6_rsk(req)->loc_addr);
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &tcp6_rsk(req)->rmt_addr);
+			       &inet6_rsk(req)->rmt_addr);
 	}
 #endif
 	nlh->nlmsg_len = skb->tail - b;
@@ -504,7 +582,7 @@ nlmsg_failure:
 }
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
-			     struct netlink_callback *cb)
+			       struct netlink_callback *cb)
 {
 	struct inet_diag_entry entry;
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -553,13 +631,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 				entry.saddr =
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->loc_addr.s6_addr32 :
+					inet6_rsk(req)->loc_addr.s6_addr32 :
 #endif
 					&ireq->loc_addr;
-				entry.daddr = 
+				entry.daddr =
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->rmt_addr.s6_addr32 :
+					inet6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
 					&ireq->rmt_addr;
 				entry.dport = ntohs(ireq->rmt_port);
@@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	handler = inet_diag_table[cb->nlh->nlmsg_type];
 	BUG_ON(handler == NULL);
 	hashinfo = handler->idiag_hashinfo;
-		
+
 	s_i = cb->args[1];
 	s_num = num = cb->args[2];
 
@@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				    cb->args[3] > 0)
 					goto syn_recv;
 
-				if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+				if (inet_csk_diag_dump(sk, skb, cb) < 0) {
 					inet_listen_unlock(hashinfo);
 					goto done;
 				}
@@ -672,7 +750,6 @@ skip_listen_ht:
 			s_num = 0;
 
 		read_lock_bh(&head->lock);
-
 		num = 0;
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
@@ -684,9 +761,10 @@ skip_listen_ht:
 			if (r->id.idiag_sport != inet->sport &&
 			    r->id.idiag_sport)
 				goto next_normal;
-			if (r->id.idiag_dport != inet->dport && r->id.idiag_dport)
+			if (r->id.idiag_dport != inet->dport &&
+			    r->id.idiag_dport)
 				goto next_normal;
-			if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+			if (inet_csk_diag_dump(sk, skb, cb) < 0) {
 				read_unlock_bh(&head->lock);
 				goto done;
 			}
@@ -695,19 +773,20 @@ next_normal:
 		}
 
 		if (r->idiag_states & TCPF_TIME_WAIT) {
-			sk_for_each(sk, node,
+			struct inet_timewait_sock *tw;
+
+			inet_twsk_for_each(tw, node,
 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
-				struct inet_sock *inet = inet_sk(sk);
 
 				if (num < s_num)
 					goto next_dying;
-				if (r->id.idiag_sport != inet->sport &&
+				if (r->id.idiag_sport != tw->tw_sport &&
 				    r->id.idiag_sport)
 					goto next_dying;
-				if (r->id.idiag_dport != inet->dport &&
+				if (r->id.idiag_dport != tw->tw_dport &&
 				    r->id.idiag_dport)
 					goto next_dying;
-				if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+				if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
 					read_unlock_bh(&head->lock);
 					goto done;
 				}
@@ -724,8 +803,7 @@ done:
 	return skb->len;
 }
 
-static __inline__ int
-inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
 		return 0;
@@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 		return netlink_dump_start(idiagnl, skb, nlh,
 					  inet_diag_dump, NULL);
-	} else {
+	} else
 		return inet_diag_get_exact(skb, nlh);
-	}
 
 err_inval:
 	return -EINVAL;
@@ -766,15 +843,15 @@ err_inval:
 
 static inline void inet_diag_rcv_skb(struct sk_buff *skb)
 {
-	int err;
-	struct nlmsghdr * nlh;
-
 	if (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
-		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+		int err;
+		struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+
+		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		    skb->len < nlh->nlmsg_len)
 			return;
 		err = inet_diag_rcv_msg(skb, nlh);
-		if (err || nlh->nlmsg_flags & NLM_F_ACK) 
+		if (err || nlh->nlmsg_flags & NLM_F_ACK)
 			netlink_ack(skb, nlh, err);
 	}
 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d..33228115cda 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#include <net/ip.h>
 
 /*
  * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
 }
 
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+
+/* called with local bh disabled */
+static int __inet_check_established(struct inet_timewait_death_row *death_row,
+				    struct sock *sk, __u16 lport,
+				    struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	struct inet_sock *inet = inet_sk(sk);
+	u32 daddr = inet->rcv_saddr;
+	u32 saddr = inet->daddr;
+	int dif = sk->sk_bound_dev_if;
+	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		tw = inet_twsk(sk2);
+
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity. */
+	inet->num = lport;
+	inet->sport = htons(lport);
+	sk->sk_hash = hash;
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, 
+					  inet->dport);
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+int inet_hash_connect(struct inet_timewait_death_row *death_row,
+		      struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + inet_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet_check_established(death_row,
+								      sk, port,
+								      &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet_hash(hinfo, sk, 0);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);;
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb  = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+		__inet_hash(hinfo, sk, 0);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a6881..417f126c749 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
-							 SLAB_ATOMIC);
+	struct inet_timewait_sock *tw =
+		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+				 SLAB_ATOMIC);
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924..2160874ce7a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p)
 			/* look for a node to insert instead of p */
 			struct inet_peer *t;
 			t = lookup_rightempty(p);
-			if (*stackptr[-1] != t)
-				BUG();
+			BUG_ON(*stackptr[-1] != t);
 			**--stackptr = t->avl_left;
 			/* t is removed, t->v4daddr > x->v4daddr for any
 			 * x in p->avl_left subtree.
@@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p)
 			t->avl_left = p->avl_left;
 			t->avl_right = p->avl_right;
 			t->avl_height = p->avl_height;
-			if (delp[1] != &p->avl_left)
-				BUG();
+			BUG_ON(delp[1] != &p->avl_left);
 			delp[1] = &t->avl_left; /* was &p->avl_left */
 		}
 		peer_avl_rebalance(stack, stackptr);
@@ -401,6 +399,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
 		return NULL;
 	n->v4daddr = daddr;
 	atomic_set(&n->refcnt, 1);
+	atomic_set(&n->rid, 0);
 	n->ip_id_count = secure_ip_id(daddr);
 	n->tcp_ts_stamp = 0;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48..2a8adda15e1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
  *		Patrick McHardy :	LRU queue of frag heads for evictor.
  */
 
+#include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -38,6 +39,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/inetpeer.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -56,6 +58,8 @@
 int sysctl_ipfrag_high_thresh = 256*1024;
 int sysctl_ipfrag_low_thresh = 192*1024;
 
+int sysctl_ipfrag_max_dist = 64;
+
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
@@ -89,8 +93,10 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	int		iif;
 	struct timeval	stamp;
+	int             iif;
+	unsigned int    rid;
+	struct inet_peer *peer;
 };
 
 /* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
 	BUG_TRAP(qp->last_in&COMPLETE);
 	BUG_TRAP(del_timer(&qp->timer) == 0);
 
+	if (qp->peer)
+		inet_putpeer(qp->peer);
+
 	/* Release all fragment data. */
 	fp = qp->fragments;
 	while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
 	qp->meat = 0;
 	qp->fragments = NULL;
 	qp->iif = 0;
+	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
 	init_timer(&qp->timer);
@@ -373,7 +383,7 @@ out_nomem:
  */
 static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 {
-	__u16 id = iph->id;
+	__be16 id = iph->id;
 	__u32 saddr = iph->saddr;
 	__u32 daddr = iph->daddr;
 	__u8 protocol = iph->protocol;
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	return ip_frag_create(hash, iph, user);
 }
 
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+	struct inet_peer *peer = qp->peer;
+	unsigned int max = sysctl_ipfrag_max_dist;
+	unsigned int start, end;
+
+	int rc;
+
+	if (!peer || !max)
+		return 0;
+
+	start = qp->rid;
+	end = atomic_inc_return(&peer->rid);
+	qp->rid = end;
+
+	rc = qp->fragments && (end - start) > max;
+
+	if (rc) {
+		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	}
+
+	return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+	struct sk_buff *fp;
+
+	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+		atomic_inc(&qp->refcnt);
+		return -ETIMEDOUT;
+	}
+
+	fp = qp->fragments;
+	do {
+		struct sk_buff *xp = fp->next;
+		frag_kfree_skb(fp, NULL);
+		fp = xp;
+	} while (fp);
+
+	qp->last_in = 0;
+	qp->len = 0;
+	qp->meat = 0;
+	qp->fragments = NULL;
+	qp->iif = 0;
+
+	return 0;
+}
+
 /* Add new segment to existing queue. */
 static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (qp->last_in & COMPLETE)
 		goto err;
 
+	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+	    unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+		ipq_kill(qp);
+		goto err;
+	}
+
  	offset = ntohs(skb->nh.iph->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4c347c3b8e..abe23923e4e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -28,6 +29,7 @@
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/ip.h>
@@ -187,7 +189,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
 	}
 
 	if (ipgre_fb_tunnel_dev->flags&IFF_UP)
-		return ipgre_fb_tunnel_dev->priv;
+		return netdev_priv(ipgre_fb_tunnel_dev);
 	return NULL;
 }
 
@@ -277,7 +279,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
 	  return NULL;
 
 	dev->init = ipgre_tunnel_init;
-	nt = dev->priv;
+	nt = netdev_priv(dev);
 	nt->parms = *parms;
 
 	if (register_netdevice(dev) < 0) {
@@ -285,9 +287,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
 		goto failed;
 	}
 
-	nt = dev->priv;
-	nt->parms = *parms;
-
 	dev_hold(dev);
 	ipgre_tunnel_link(nt);
 	return nt;
@@ -298,7 +297,7 @@ failed:
 
 static void ipgre_tunnel_uninit(struct net_device *dev)
 {
-	ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
+	ipgre_tunnel_unlink(netdev_priv(dev));
 	dev_put(dev);
 }
 
@@ -517,7 +516,7 @@ out:
 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 		rel_info = htonl(rel_info);
 	} else if (type == ICMP_TIME_EXCEEDED) {
-		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
 			rel_type = ICMP_DEST_UNREACH;
 			rel_code = ICMP_HOST_UNREACH;
@@ -618,7 +617,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = __pskb_pull(skb, offset);
-		skb_postpull_rcsum(skb, skb->mac.raw, offset);
+		skb_postpull_rcsum(skb, skb->h.raw, offset);
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -668,7 +667,7 @@ drop_nolock:
 
 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *old_iph = skb->nh.iph;
 	struct iphdr  *tiph;
@@ -831,6 +830,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, gre_hlen);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
@@ -913,7 +913,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			t = ipgre_tunnel_locate(&p, 0);
 		}
 		if (t == NULL)
-			t = (struct ip_tunnel*)dev->priv;
+			t = netdev_priv(dev);
 		memcpy(&p, &t->parms, sizeof(p));
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 			err = -EFAULT;
@@ -953,7 +953,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			} else {
 				unsigned nflags=0;
 
-				t = (struct ip_tunnel*)dev->priv;
+				t = netdev_priv(dev);
 
 				if (MULTICAST(p.iph.daddr))
 					nflags = IFF_BROADCAST;
@@ -1002,7 +1002,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t == ipgre_fb_tunnel_dev->priv)
+			if (t == netdev_priv(ipgre_fb_tunnel_dev))
 				goto done;
 			dev = t->dev;
 		}
@@ -1019,12 +1019,12 @@ done:
 
 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
 {
-	return &(((struct ip_tunnel*)dev->priv)->stat);
+	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 }
 
 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
 		return -EINVAL;
 	dev->mtu = new_mtu;
@@ -1064,7 +1064,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 			void *daddr, void *saddr, unsigned len)
 {
-	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
 	u16 *p = (u16*)(iph+1);
 
@@ -1091,7 +1091,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
 
 static int ipgre_open(struct net_device *dev)
 {
-	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *t = netdev_priv(dev);
 
 	if (MULTICAST(t->parms.iph.daddr)) {
 		struct flowi fl = { .oif = t->parms.link,
@@ -1115,7 +1115,7 @@ static int ipgre_open(struct net_device *dev)
 
 static int ipgre_close(struct net_device *dev)
 {
-	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *t = netdev_priv(dev);
 	if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
 		struct in_device *in_dev = inetdev_by_index(t->mlink);
 		if (in_dev) {
@@ -1140,7 +1140,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_IPGRE;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
-	dev->mtu		= 1500 - sizeof(struct iphdr) - 4;
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -1152,10 +1152,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
 	int hlen = LL_MAX_HEADER;
-	int mtu = 1500;
+	int mtu = ETH_DATA_LEN;
 	int addend = sizeof(struct iphdr) + 4;
 
-	tunnel = (struct ip_tunnel*)dev->priv;
+	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
@@ -1219,7 +1219,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 
 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0..18d7fad474d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
 #include <linux/sockios.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
@@ -184,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
 					raw_rcv(last, skb2);
 			}
 			last = sk;
-			nf_reset(skb);
 		}
 	}
 
@@ -203,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 
 	__skb_pull(skb, ihl);
 
-	/* Free reference early: we don't need it any more, and it may
-           hold ip_conntrack module loaded indefinitely. */
-	nf_reset(skb);
-
         /* Point into the IP datagram, just past the header. */
         skb->h.raw = skb->data;
 
@@ -231,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
 			int ret;
 
-			if (!ipprot->no_policy &&
-			    !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-				kfree_skb(skb);
-				goto out;
+			if (!ipprot->no_policy) {
+				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+					kfree_skb(skb);
+					goto out;
+				}
+				nf_reset(skb);
 			}
 			ret = ipprot->handler(skb);
 			if (ret < 0) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b..9bebad07bf2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
  *		
  */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <asm/uaccess.h>
@@ -22,6 +23,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
+#include <net/route.h>
 
 /* 
  * Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd39..3324fbfe528 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,6 +69,7 @@
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
+#include <net/xfrm.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/arp.h>
@@ -85,6 +86,8 @@
 
 int sysctl_ip_default_ttl = IPDEFTTL;
 
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
 {
@@ -202,13 +205,16 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 
 static inline int ip_finish_output(struct sk_buff *skb)
 {
-	struct net_device *dev = skb->dst->dev;
-
-	skb->dev = dev;
-	skb->protocol = htons(ETH_P_IP);
-
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
-		       ip_finish_output2);
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+	/* Policy lookup after SNAT yielded a new policy */
+	if (skb->dst->xfrm != NULL)
+		return xfrm4_output_finish(skb);
+#endif
+	if (skb->len > dst_mtu(skb->dst) &&
+	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+		return ip_fragment(skb, ip_finish_output2);
+	else
+		return ip_finish_output2(skb);
 }
 
 int ip_mc_output(struct sk_buff *skb)
@@ -265,21 +271,21 @@ int ip_mc_output(struct sk_buff *skb)
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	if (skb->len > dst_mtu(&rt->u.dst))
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+		       ip_finish_output);
 }
 
 int ip_output(struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dst->dev;
+
 	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 
-	if (skb->len > dst_mtu(skb->dst) &&
-		!(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_IP);
+
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		       ip_finish_output);
 }
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -411,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
  *	single device frame, and queue such a frame for sending.
  */
 
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 {
 	struct iphdr *iph;
 	int raw = 0;
@@ -420,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	struct sk_buff *skb2;
 	unsigned int mtu, hlen, left, len, ll_rs;
 	int offset;
-	int not_last_frag;
+	__be16 not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	int err = 0;
 
@@ -445,6 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
 	 * some transformers could create wrong frag_list or break existing
@@ -1181,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk)
 	struct ip_options *opt = NULL;
 	struct rtable *rt = inet->cork.rt;
 	struct iphdr *iph;
-	int df = 0;
+	__be16 df = 0;
 	__u8 ttl;
 	int err = 0;
 
@@ -1392,7 +1399,6 @@ void __init ip_init(void)
 #endif
 }
 
-EXPORT_SYMBOL(ip_fragment);
 EXPORT_SYMBOL(ip_generic_getfrag);
 EXPORT_SYMBOL(ip_queue_xmit);
 EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d8725730..2bf8d782f67 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,12 +25,12 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
-#include <net/tcp.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <linux/udp.h>
 #include <linux/igmp.h>
 #include <linux/netfilter.h>
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 			err = ip_options_get_from_user(&opt, optval, optlen);
 			if (err)
 				break;
-			if (sk->sk_type == SOCK_STREAM) {
-				struct tcp_sock *tp = tcp_sk(sk);
+			if (inet->is_icsk) {
+				struct inet_connection_sock *icsk = inet_csk(sk);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				if (sk->sk_family == PF_INET ||
 				    (!((1 << sk->sk_state) &
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				     inet->daddr != LOOPBACK4_IPV6)) {
 #endif
 					if (inet->opt)
-						tp->ext_header_len -= inet->opt->optlen;
+						icsk->icsk_ext_hdr_len -= inet->opt->optlen;
 					if (opt)
-						tp->ext_header_len += opt->optlen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+						icsk->icsk_ext_hdr_len += opt->optlen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				}
 #endif
@@ -621,7 +621,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				err = -ENOBUFS;
 				break;
 			}
-			msf = (struct ip_msfilter *)kmalloc(optlen, GFP_KERNEL);
+			msf = kmalloc(optlen, GFP_KERNEL);
 			if (msf == 0) {
 				err = -ENOBUFS;
 				break;
@@ -778,7 +778,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				err = -ENOBUFS;
 				break;
 			}
-			gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+			gsf = kmalloc(optlen,GFP_KERNEL);
 			if (gsf == 0) {
 				err = -ENOBUFS;
 				break;
@@ -798,7 +798,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				goto mc_msf_out;
 			}
 			msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
-			msf = (struct ip_msfilter *)kmalloc(msize,GFP_KERNEL);
+			msf = kmalloc(msize,GFP_KERNEL);
 			if (msf == 0) {
 				err = -ENOBUFS;
 				goto mc_msf_out;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b4..d64e2ec8da7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
+#include <net/protocol.h>
 
 struct ipcomp_tfms {
 	struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d..bb3613ec448 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/if.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
@@ -58,6 +59,7 @@
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/ipconfig.h>
+#include <net/route.h>
 
 #include <asm/uaccess.h>
 #include <net/checksum.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c05c1df0bb0..e5cbe72c6b8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -93,6 +93,7 @@
  */
 
  
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -108,6 +109,7 @@
 #include <linux/mroute.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/ip.h>
@@ -243,7 +245,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
 	if (dev == NULL)
 		return NULL;
 
-	nt = dev->priv;
+	nt = netdev_priv(dev);
 	SET_MODULE_OWNER(dev);
 	dev->init = ipip_tunnel_init;
 	nt->parms = *parms;
@@ -268,7 +270,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
 		tunnels_wc[0] = NULL;
 		write_unlock_bh(&ipip_lock);
 	} else
-		ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
+		ipip_tunnel_unlink(netdev_priv(dev));
 	dev_put(dev);
 }
 
@@ -442,7 +444,7 @@ out:
 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 		rel_info = htonl(rel_info);
 	} else if (type == ICMP_TIME_EXCEEDED) {
-		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
 			rel_type = ICMP_DEST_UNREACH;
 			rel_code = ICMP_HOST_UNREACH;
@@ -513,7 +515,7 @@ out:
 
 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	u8     tos = tunnel->parms.iph.tos;
@@ -620,6 +622,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
@@ -672,7 +675,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			t = ipip_tunnel_locate(&p, 0);
 		}
 		if (t == NULL)
-			t = (struct ip_tunnel*)dev->priv;
+			t = netdev_priv(dev);
 		memcpy(&p, &t->parms, sizeof(p));
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 			err = -EFAULT;
@@ -709,7 +712,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 					err = -EINVAL;
 					break;
 				}
-				t = (struct ip_tunnel*)dev->priv;
+				t = netdev_priv(dev);
 				ipip_tunnel_unlink(t);
 				t->parms.iph.saddr = p.iph.saddr;
 				t->parms.iph.daddr = p.iph.daddr;
@@ -763,7 +766,7 @@ done:
 
 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 {
-	return &(((struct ip_tunnel*)dev->priv)->stat);
+	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 }
 
 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -786,7 +789,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_TUNNEL;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -798,7 +801,7 @@ static int ipip_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
 
-	tunnel = (struct ip_tunnel*)dev->priv;
+	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
@@ -836,7 +839,7 @@ static int ipip_tunnel_init(struct net_device *dev)
 
 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c..5c94c222e3f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -33,6 +33,7 @@
 #include <asm/uaccess.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
 #include <linux/mm.h>
@@ -49,9 +50,11 @@
 #include <linux/seq_file.h>
 #include <linux/mroute.h>
 #include <linux/init.h>
+#include <linux/if_ether.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
+#include <net/route.h>
 #include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
@@ -176,8 +179,8 @@ static int reg_vif_num = -1;
 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	read_lock(&mrt_lock);
-	((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
-	((struct net_device_stats*)dev->priv)->tx_packets++;
+	((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
+	((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
 	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
@@ -186,13 +189,13 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 {
-	return (struct net_device_stats*)dev->priv;
+	return (struct net_device_stats*)netdev_priv(dev);
 }
 
 static void reg_vif_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_PIMREG;
-	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->hard_start_xmit	= reg_vif_xmit;
 	dev->get_stats		= reg_vif_get_stats;
@@ -1147,8 +1150,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	if (vif->flags & VIFF_REGISTER) {
 		vif->pkt_out++;
 		vif->bytes_out+=skb->len;
-		((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
-		((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+		((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
+		((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
 		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
 		kfree_skb(skb);
 		return;
@@ -1208,8 +1211,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	if (vif->flags & VIFF_TUNNEL) {
 		ip_encap(skb, vif->local, vif->remote);
 		/* FIXME: extra output firewall step used to be here. --RR */
-		((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
-		((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
+		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
+		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
 	}
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1465,8 +1468,8 @@ int pim_rcv_v1(struct sk_buff * skb)
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
 	skb->dst = NULL;
-	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
-	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
 	nf_reset(skb);
 	netif_rx(skb);
 	dev_put(reg_dev);
@@ -1520,8 +1523,8 @@ static int pim_rcv(struct sk_buff * skb)
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
-	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
-	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
 	skb->dst = NULL;
 	nf_reset(skb);
 	netif_rx(skb);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d7eb680101c..9b176a942ac 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 }
 
 
-#if 0000
-/*
- *	Get reference to app by name (called from user context)
- */
-struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
-{
-	struct ip_vs_app *app, *a = NULL;
-
-	down(&__ip_vs_app_mutex);
-
-	list_for_each_entry(ent, &ip_vs_app_list, a_list) {
-		if (strcmp(app->name, appname))
-			continue;
-
-		/* softirq may call ip_vs_app_get too, so the caller
-		   must disable softirq on the current CPU */
-		if (ip_vs_app_get(app))
-			a = app;
-		break;
-	}
-
-	up(&__ip_vs_app_mutex);
-
-	return a;
-}
-#endif
-
-
 /*
  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c65..87b83813cf2 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,11 @@
  *
  */
 
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>		/* for proc_net_* */
 #include <linux/seq_file.h>
@@ -219,7 +223,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
 		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
 
-	IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -254,7 +258,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
   out:
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -295,7 +299,7 @@ struct ip_vs_conn *ip_vs_conn_out_get
 
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -391,8 +395,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	cp->flags |= atomic_read(&dest->conn_flags);
 	cp->dest = dest;
 
-	IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+	IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -430,8 +435,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 	if (!dest)
 		return;
 
-	IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+	IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -571,7 +577,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	ip_vs_conn_hash(cp);
 
   expire_later:
-	IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
+	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
 		  atomic_read(&cp->refcnt)-1,
 		  atomic_read(&cp->n_control));
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1a0843cd58a..3f47ad8e1ca 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 
 	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-		  "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+		  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
 		  ip_vs_fwd_tag(cp),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -532,11 +532,8 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
 {
 	if (!((*pskb)->ipvs_property))
 		return NF_ACCEPT;
-
 	/* The packet was sent from IPVS, exit this chain */
-	(*okfn)(*pskb);
-
-	return NF_STOLEN;
+	return NF_STOP;
 }
 
 u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760..7f0288b25fa 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
@@ -35,6 +36,7 @@
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip.h>
+#include <net/route.h>
 #include <net/sock.h>
 
 #include <asm/uaccess.h>
@@ -447,7 +449,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
   out:
 	read_unlock(&__ip_vs_svc_lock);
 
-	IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+	IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
 		  fwmark, ip_vs_proto_name(protocol),
 		  NIPQUAD(vaddr), ntohs(vport),
 		  svc?"hit":"not hit");
@@ -597,7 +599,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
 	 */
 	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
-			  "refcnt=%d\n",
+			  "dest->refcnt=%d\n",
 			  dest->vfwmark,
 			  NIPQUAD(dest->addr), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
@@ -804,7 +806,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 	dest = ip_vs_trash_get_dest(svc, daddr, dport);
 	if (dest != NULL) {
 		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
-			  "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+			  "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
 			  NIPQUAD(daddr), ntohs(dport),
 			  atomic_read(&dest->refcnt),
 			  dest->vfwmark,
@@ -949,7 +951,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 		atomic_dec(&dest->svc->refcnt);
 		kfree(dest);
 	} else {
-		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
+			  "dest->refcnt=%d\n",
 			  NIPQUAD(dest->addr), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
 		list_add(&dest->n_list, &ip_vs_dest_trash);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce9..9fee19c4c61 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa..c453e1e57f4 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,8 +13,12 @@
  * Changes:
  *
  */
+#include <linux/config.h>
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/interrupt.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 561cda326fa..6e5cb92a5c8 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
  * me to write this module.
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
 }
 
 
-#if 0000
-/*
- *	Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
- *	returns bool success.
- */
-static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
-			     struct ip_vs_lblc_entry *en)
-{
-	if (list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	/*
-	 * Remove it from the table
-	 */
-	write_lock(&tbl->lock);
-	list_del(&en->list);
-	INIT_LIST_HEAD(&en->list);
-	write_unlock(&tbl->lock);
-
-	return 1;
-}
-#endif
-
-
 /*
  *  Get ip_vs_lblc_entry associated with supplied parameters.
  */
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index ce456dbf09a..32ba37ba72d 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 }
 
 
-#if 0000
-/*
- *	Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
- *	returns bool success.
- */
-static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
-			     struct ip_vs_lblcr_entry *en)
-{
-	if (list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	/*
-	 * Remove it from the table
-	 */
-	write_lock(&tbl->lock);
-	list_del(&en->list);
-	INIT_LIST_HEAD(&en->list);
-	write_unlock(&tbl->lock);
-
-	return 1;
-}
-#endif
-
-
 /*
  *  Get ip_vs_lblcr_entry associated with supplied parameters.
  */
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd..8b0505b0931 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8..c36ccf057a1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 0e878fd6215..bc28b1160a3 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_LAST]		=	2*HZ,
 };
 
-
-#if 0
-
-/* FIXME: This is going to die */
-
-static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
-	[IP_VS_TCP_S_NONE]		=	2*HZ,
-	[IP_VS_TCP_S_ESTABLISHED]	=	8*60*HZ,
-	[IP_VS_TCP_S_SYN_SENT]		=	60*HZ,
-	[IP_VS_TCP_S_SYN_RECV]		=	10*HZ,
-	[IP_VS_TCP_S_FIN_WAIT]		=	60*HZ,
-	[IP_VS_TCP_S_TIME_WAIT]		=	60*HZ,
-	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
-	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
-	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
-	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
-	[IP_VS_TCP_S_SYNACK]		=	100*HZ,
-	[IP_VS_TCP_S_LAST]		=	2*HZ,
-};
-
-#endif
-
 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	"NONE",
 	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		struct ip_vs_dest *dest = cp->dest;
 
 		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
-			  "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
+			  "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
 			  pp->name,
 			  (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
 			  th->syn? 'S' : '.',
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aef..89d9175d8f2 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/kernel.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 0f7c56a225b..8bc42b76223 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
+#include <linux/interrupt.h>
 #include <asm/string.h>
 #include <linux/kmod.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a3..7775e6cc68b 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d806..1bca714bda3 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/inetdevice.h>
 #include <linux/net.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049c..52c12e9edbb 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct net_device *tdev;		/* Device to other host */
 	struct iphdr  *old_iph = skb->nh.iph;
 	u8     tos = old_iph->tos;
-	u16    df = old_iph->frag_off;
+	__be16 df = old_iph->frag_off;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5..52a3d7c5790 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,17 +1,11 @@
 /* IPv4 specific functions of netfilter core */
-
-#include <linux/config.h>
-#ifdef CONFIG_NETFILTER
-
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
-
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/route.h>
 #include <linux/ip.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +27,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
 #endif
-		fl.proto = iph->protocol;
 		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
@@ -60,6 +53,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
 	if ((*pskb)->dst->error)
 		return -1;
 
+#ifdef CONFIG_XFRM
+	if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
+		if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+			return -1;
+#endif
+
 	/* Change in oif may mean change in hh_len. */
 	hh_len = (*pskb)->dst->dev->hard_header_len;
 	if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +78,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
+void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(ip_nat_decode_session);
+
 /*
  * Extra routing may needed on local out, as the QUEUE target never
  * returns control to the table.
@@ -135,5 +138,3 @@ static void fini(void)
 
 module_init(init);
 module_exit(fini);
-
-#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0bc00528d88..db783036e4d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK
 	  instead of the individual packets.
 	
 config IP_NF_CONNTRACK_EVENTS
-	bool "Connection tracking events"
-	depends on IP_NF_CONNTRACK
+	bool "Connection tracking events (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && IP_NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  provide a notifier chain that can be used by other kernel code
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS
 	  IF unsure, say `N'.
 
 config IP_NF_CONNTRACK_NETLINK
-	tristate 'Connection tracking netlink interface'
-	depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
 	depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
 	help
 	  This option enables support for a netlink-based userspace interface
@@ -182,6 +182,7 @@ config IP_NF_QUEUE
 
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
+	depends on NETFILTER_XTABLES
 	help
 	  iptables is a general, extensible packet identification framework.
 	  The packet filtering and full NAT (masquerading, port forwarding,
@@ -191,16 +192,6 @@ config IP_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The matches.
-config IP_NF_MATCH_LIMIT
-	tristate "limit match support"
-	depends on IP_NF_IPTABLES
-	help
-	  limit matching allows you to control the rate at which a rule can be
-	  matched: mainly useful in combination with the LOG target ("LOG
-	  target support", below) and to avoid some Denial of Service attacks.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_IPRANGE
 	tristate "IP range match support"
 	depends on IP_NF_IPTABLES
@@ -210,37 +201,6 @@ config IP_NF_MATCH_IPRANGE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_MAC
-	tristate "MAC address match support"
-	depends on IP_NF_IPTABLES
-	help
-	  MAC matching allows you to match packets based on the source
-	  Ethernet address of the packet.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_PKTTYPE
-	tristate "Packet type match support"
-	depends on IP_NF_IPTABLES
-	help
-	  Packet type matching allows you to match a packet by
-	  its "class", eg. BROADCAST, MULTICAST, ...
-
-	  Typical usage:
-	  iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_MARK
-	tristate "netfilter MARK match support"
-	depends on IP_NF_IPTABLES
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_MULTIPORT
 	tristate "Multiple port match support"
 	depends on IP_NF_IPTABLES
@@ -301,15 +261,6 @@ config IP_NF_MATCH_AH_ESP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_LENGTH
-	tristate "LENGTH match support"
-	depends on IP_NF_IPTABLES
-	help
-	  This option allows you to match the length of a packet against a
-	  specific value or range of values.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_TTL
 	tristate "TTL match support"
 	depends on IP_NF_IPTABLES
@@ -319,50 +270,6 @@ config IP_NF_MATCH_TTL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_TCPMSS
-	tristate "tcpmss match support"
-	depends on IP_NF_IPTABLES
-	help
-	  This option adds a `tcpmss' match, which allows you to examine the
-	  MSS value of TCP SYN packets, which control the maximum packet size
-	  for that connection.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_HELPER
-	tristate "Helper match support"
-	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  Helper matching allows you to match packets in dynamic connections
-	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_MATCH_STATE
-	tristate "Connection state match support"
-	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  Connection state matching allows you to match packets based on their
-	  relationship to a tracked connection (ie. previous packets).  This
-	  is a powerful tool for packet classification.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_CONNTRACK
-	tristate "Connection tracking match support"
-	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  This is a general conntrack match module, a superset of the state match.
-
-	  It allows matching on additional conntrack information, which is
-	  useful in complex configurations, such as NAT gateways with multiple
-	  internet links or tunnels.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_OWNER
 	tristate "Owner match support"
 	depends on IP_NF_IPTABLES
@@ -372,15 +279,6 @@ config IP_NF_MATCH_OWNER
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_PHYSDEV
-	tristate "Physdev match support"
-	depends on IP_NF_IPTABLES && BRIDGE_NETFILTER
-	help
-	  Physdev packet matching matches against the physical bridge ports
-	  the IP packet arrived on or will leave by.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_ADDRTYPE
 	tristate  'address type match support'
 	depends on IP_NF_IPTABLES
@@ -391,75 +289,6 @@ config IP_NF_MATCH_ADDRTYPE
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
-config IP_NF_MATCH_REALM
-	tristate  'realm match support'
-	depends on IP_NF_IPTABLES
-	select NET_CLS_ROUTE
-	help
-	  This option adds a `realm' match, which allows you to use the realm
-	  key from the routing subsystem inside iptables.
-	
-	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
-	  in tc world.
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_SCTP
-	tristate  'SCTP protocol match support'
-	depends on IP_NF_IPTABLES
-	help
-	  With this option enabled, you will be able to use the iptables
-	  `sctp' match in order to match on SCTP source/destination ports
-	  and SCTP chunk types.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_DCCP
-	tristate  'DCCP protocol match support'
-	depends on IP_NF_IPTABLES
-	help
-	  With this option enabled, you will be able to use the iptables
-	  `dccp' match in order to match on DCCP source/destination ports
-	  and DCCP flags.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_COMMENT
-	tristate  'comment match support'
-	depends on IP_NF_IPTABLES
-	help
-	  This option adds a `comment' dummy-match, which allows you to put
-	  comments in your iptables ruleset.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_CONNMARK
-	tristate  'Connection mark match support'
-	depends on IP_NF_IPTABLES
-	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
-	help
-	  This option adds a `connmark' match, which allows you to match the
-	  connection mark value previously set for the session by `CONNMARK'. 
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  The module will be called
-	  ipt_connmark.o.  If unsure, say `N'.
-
-config IP_NF_MATCH_CONNBYTES
-	tristate  'Connection byte/packet counter match support'
-	depends on IP_NF_IPTABLES
-	depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
-	help
-	  This option adds a `connbytes' match, which allows you to match the
-	  number of bytes and/or packets for each direction within a connection.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
 config IP_NF_MATCH_HASHLIMIT
 	tristate  'hashlimit match support'
 	depends on IP_NF_IPTABLES
@@ -474,18 +303,15 @@ config IP_NF_MATCH_HASHLIMIT
 	  destination IP' or `500pps from any given source IP'  with a single
 	  IPtables rule.
 
-config IP_NF_MATCH_STRING
-	tristate  'string match support'
-	depends on IP_NF_IPTABLES 
-	select TEXTSEARCH
-	select TEXTSEARCH_KMP
-	select TEXTSEARCH_BM
-	select TEXTSEARCH_FSM
-	help
-	  This option adds a `string' match, which allows you to look for
-	  pattern matchings in packets.
+config IP_NF_MATCH_POLICY
+       tristate "IPsec policy match support"
+       depends on IP_NF_IPTABLES && XFRM
+       help
+         Policy matching allows you to match packets based on the
+         IPsec policy that was used during decapsulation/will
+         be used during encapsulation.
 
-	  To compile it as a module, choose M here.  If unsure, say N.
+         To compile it as a module, choose M here.  If unsure, say N.
 
 # `filter', generic and specific targets
 config IP_NF_FILTER
@@ -562,17 +388,6 @@ config IP_NF_TARGET_TCPMSS
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_NFQUEUE
-	tristate "NFQUEUE Target Support"
-	depends on IP_NF_IPTABLES
-	help
-	  This Target replaced the old obsolete QUEUE target.
-
-	  As opposed to QUEUE, it supports 65535 different queues,
-	  not just one.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # NAT + specific targets
 config IP_NF_NAT
 	tristate "Full NAT"
@@ -725,31 +540,6 @@ config IP_NF_TARGET_DSCP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_MARK
-	tristate "MARK target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_TARGET_CLASSIFY
-	tristate "CLASSIFY target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `CLASSIFY' target, which enables the user to set
-	  the priority of a packet. Some qdiscs can use this value for
-	  classification, among these are:
-
-  	  atm, cbq, dsmark, pfifo_fast, htb, prio
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_TARGET_TTL
 	tristate  'TTL target support'
 	depends on IP_NF_MANGLE
@@ -764,19 +554,6 @@ config IP_NF_TARGET_TTL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_CONNMARK
-	tristate  'CONNMARK target support'
-	depends on IP_NF_MANGLE
-	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
-	help
-	  This option adds a `CONNMARK' target, which allows one to manipulate
-	  the connection mark value.  Similar to the MARK target, but
-	  affects the connection mark value rather than the packet mark value.
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  The module will be called
-	  ipt_CONNMARK.o.  If unsure, say `N'.
-
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
 	depends on IP_NF_MANGLE && EXPERIMENTAL
@@ -800,23 +577,10 @@ config IP_NF_RAW
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
-config IP_NF_TARGET_NOTRACK
-	tristate  'NOTRACK target support'
-	depends on IP_NF_RAW
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  The NOTRACK target allows a select rule to specify
-	  which packets *not* to enter the conntrack/NAT
-	  subsystem with all the consequences (no ICMP error tracking,
-	  no protocol helpers for the selected packets).
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-
 # ARP tables
 config IP_NF_ARPTABLES
 	tristate "ARP tables support"
+	depends on NETFILTER_XTABLES
 	help
 	  arptables is a general, extensible packet identification framework.
 	  The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258f..e5c5b3202f0 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
 
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
 
 # conntrack netlink interface
 obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,19 +42,12 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
 # matches
-obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
-obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
 obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
-obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
-obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o
-obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
-obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
-obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
 obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
 obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
 obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
@@ -61,39 +55,25 @@ obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
 obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
-obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
-obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
-obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
-obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
-obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o
-obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
-obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
-obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
-obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
-obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
+obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
 
 # targets
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
 obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
-obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
 obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
 obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
-obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
-obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
-obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
 
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba..afe3d8f8177 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <linux/capability.h>
 #include <linux/if_arp.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
@@ -23,6 +24,7 @@
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -54,33 +56,9 @@ do {								\
 #else
 #define ARP_NF_ASSERT(x)
 #endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
-static DECLARE_MUTEX(arpt_mutex);
-
-#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
 #include <linux/netfilter_ipv4/listhelp.h>
 
-struct arpt_table_info {
-	unsigned int size;
-	unsigned int number;
-	unsigned int initial_entries;
-	unsigned int hook_entry[NF_ARP_NUMHOOKS];
-	unsigned int underflow[NF_ARP_NUMHOOKS];
-	char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
-};
-
-static LIST_HEAD(arpt_target);
-static LIST_HEAD(arpt_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      char *hdr_addr, int len)
 {
@@ -227,9 +205,9 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 }
 
 static unsigned int arpt_error(struct sk_buff **pskb,
-			       unsigned int hooknum,
 			       const struct net_device *in,
 			       const struct net_device *out,
+			       unsigned int hooknum,
 			       const void *targinfo,
 			       void *userinfo)
 {
@@ -258,6 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
+	struct xt_table_info *private = table->private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -269,11 +248,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	outdev = out ? out->name : nulldevname;
 
 	read_lock_bh(&table->lock);
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private,
-			       smp_processor_id());
-	e = get_entry(table_base, table->private->hook_entry[hook]);
-	back = get_entry(table_base, table->private->underflow[hook]);
+	table_base = (void *)private->entries[smp_processor_id()];
+	e = get_entry(table_base, private->hook_entry[hook]);
+	back = get_entry(table_base, private->underflow[hook]);
 
 	arp = (*pskb)->nh.arph;
 	do {
@@ -321,8 +298,8 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 				 * abs. verdicts
 				 */
 				verdict = t->u.kernel.target->target(pskb,
-								     hook,
 								     in, out,
+								     hook,
 								     t->data,
 								     userdata);
 
@@ -347,106 +324,6 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 		return verdict;
 }
 
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-static inline struct arpt_table *find_table_lock(const char *name)
-{
-	struct arpt_table *t;
-
-	if (down_interruptible(&arpt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &arpt_tables, list)
-		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
-			return t;
-	up(&arpt_mutex);
-	return NULL;
-}
-
-
-/* Find target, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct arpt_target *find_target(const char *name, u8 revision)
-{
-	struct arpt_target *t;
-	int err = 0;
-
-	if (down_interruptible(&arpt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &arpt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision == revision) {
-				if (try_module_get(t->me)) {
-					up(&arpt_mutex);
-					return t;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&arpt_mutex);
-	return ERR_PTR(err);
-}
-
-struct arpt_target *arpt_find_target(const char *name, u8 revision)
-{
-	struct arpt_target *target;
-
-	target = try_then_request_module(find_target(name, revision),
-					 "arpt_%s", name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct arpt_target *t;
-	int have_rev = 0;
-
-	list_for_each_entry(t, &arpt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision > *bestp)
-				*bestp = t->revision;
-			if (t->revision == revision)
-				have_rev =1;
-		}
-	}
-	return have_rev;
-}
-
-/* Returns true or false (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
-				int (*revfn)(const char *, u8, int *),
-				int *err)
-{
-	int have_rev, best = -1;
-
-	if (down_interruptible(&arpt_mutex) != 0) {
-		*err = -EINTR;
-		return 1;
-	}
-	have_rev = revfn(name, revision, &best);
-	up(&arpt_mutex);
-
-	/* Nothing at all?  Return 0 to try loading module. */
-	if (best == -1) {
-		*err = -ENOENT;
-		return 0;
-	}
-
-	*err = best;
-	if (!have_rev)
-		*err = -EPROTONOSUPPORT;
-	return 1;
-}
-
-
 /* All zeroes == unconditional rule. */
 static inline int unconditional(const struct arpt_arp *arp)
 {
@@ -462,7 +339,8 @@ static inline int unconditional(const struct arpt_arp *arp)
 /* Figures out from what hook each rule can be called: returns 0 if
  * there are loops.  Puts hook bitmask in comefrom.
  */
-static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
+static int mark_source_chains(struct xt_table_info *newinfo,
+			      unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -472,7 +350,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 	for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct arpt_entry *e
-			= (struct arpt_entry *)(newinfo->entries + pos);
+			= (struct arpt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -514,13 +392,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 						goto next;
 
 					e = (struct arpt_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct arpt_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -537,7 +415,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 					newpos = pos + e->next_offset;
 				}
 				e = (struct arpt_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -592,8 +470,8 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
 	}
 
 	t = arpt_get_target(e);
-	target = try_then_request_module(find_target(t->u.user.name,
-						     t->u.user.revision),
+	target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
+							t->u.user.revision),
 					 "arpt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
 		duprintf("check_entry: `%s' not found\n", t->u.user.name);
@@ -627,7 +505,7 @@ out:
 }
 
 static inline int check_entry_size_and_hooks(struct arpt_entry *e,
-					     struct arpt_table_info *newinfo,
+					     struct xt_table_info *newinfo,
 					     unsigned char *base,
 					     unsigned char *limit,
 					     const unsigned int *hook_entries,
@@ -661,7 +539,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
            < 0 (not ARPT_RETURN). --RR */
 
 	/* Clear counters and comefrom */
-	e->counters = ((struct arpt_counters) { 0, 0 });
+	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
 
 	(*i)++;
@@ -688,7 +566,8 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
  */
 static int translate_table(const char *name,
 			   unsigned int valid_hooks,
-			   struct arpt_table_info *newinfo,
+			   struct xt_table_info *newinfo,
+			   void *entry0,
 			   unsigned int size,
 			   unsigned int number,
 			   const unsigned int *hook_entries,
@@ -710,11 +589,11 @@ static int translate_table(const char *name,
 	i = 0;
 
 	/* Walk through entries, checking offsets. */
-	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				 check_entry_size_and_hooks,
 				 newinfo,
-				 newinfo->entries,
-				 newinfo->entries + size,
+				 entry0,
+				 entry0 + size,
 				 hook_entries, underflows, &i);
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -743,79 +622,78 @@ static int translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks)) {
+	if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
 		duprintf("Looping hook\n");
 		return -ELOOP;
 	}
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				 check_entry, name, size, &i);
 
 	if (ret != 0) {
-		ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				   cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
 }
 
-static struct arpt_table_info *replace_table(struct arpt_table *table,
-					     unsigned int num_counters,
-					     struct arpt_table_info *newinfo,
-					     int *error)
+/* Gets counters. */
+static inline int add_entry_to_counter(const struct arpt_entry *e,
+				       struct xt_counters total[],
+				       unsigned int *i)
 {
-	struct arpt_table_info *oldinfo;
-
-	/* Do the substitution. */
-	write_lock_bh(&table->lock);
-	/* Check inside lock: is the old number correct? */
-	if (num_counters != table->private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
-			 num_counters, table->private->number);
-		write_unlock_bh(&table->lock);
-		*error = -EAGAIN;
-		return NULL;
-	}
-	oldinfo = table->private;
-	table->private = newinfo;
-	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
-	return oldinfo;
+	(*i)++;
+	return 0;
 }
 
-/* Gets counters. */
-static inline int add_entry_to_counter(const struct arpt_entry *e,
-				       struct arpt_counters total[],
+static inline int set_entry_to_counter(const struct arpt_entry *e,
+				       struct xt_counters total[],
 				       unsigned int *i)
 {
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
 	(*i)++;
 	return 0;
 }
 
-static void get_counters(const struct arpt_table_info *t,
-			 struct arpt_counters counters[])
+static void get_counters(const struct xt_table_info *t,
+			 struct xt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	ARPT_ENTRY_ITERATE(t->entries[curcpu],
+			   t->size,
+			   set_entry_to_counter,
+			   counters,
+			   &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		ARPT_ENTRY_ITERATE(t->entries[cpu],
 				   t->size,
 				   add_entry_to_counter,
 				   counters,
@@ -829,27 +707,29 @@ static int copy_entries_to_user(unsigned int total_size,
 {
 	unsigned int off, num, countersize;
 	struct arpt_entry *e;
-	struct arpt_counters *counters;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
 	 * about).
 	 */
-	countersize = sizeof(struct arpt_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	countersize = sizeof(struct xt_counters) * private->number;
+	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
-	get_counters(table->private, counters);
+	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	/* ... then copy entire thing ... */
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -859,7 +739,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 		struct arpt_entry_target *t;
 
-		e = (struct arpt_entry *)(table->private->entries + off);
+		e = (struct arpt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct arpt_entry, counters),
 				 &counters[num],
@@ -890,21 +770,21 @@ static int get_entries(const struct arpt_get_entries *entries,
 	int ret;
 	struct arpt_table *t;
 
-	t = find_table_lock(entries->name);
+	t = xt_find_table_lock(NF_ARP, entries->name);
 	if (t || !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
+			 private->number);
+		if (entries->size == private->size)
+			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
-				 entries->size);
+				 private->size, entries->size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
-		up(&arpt_mutex);
+		xt_table_unlock(t);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
 
@@ -916,8 +796,9 @@ static int do_replace(void __user *user, unsigned int len)
 	int ret;
 	struct arpt_replace tmp;
 	struct arpt_table *t;
-	struct arpt_table_info *newinfo, *oldinfo;
-	struct arpt_counters *counters;
+	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -926,38 +807,33 @@ static int do_replace(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.size)
 		return -ENOPROTOOPT;
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return -ENOMEM;
-
-	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(tmp.size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
+	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
 
 	duprintf("arp_tables: Translated table\n");
 
-	t = try_then_request_module(find_table_lock(tmp.name),
+	t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
 				    "arptable_%s", tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
@@ -972,7 +848,7 @@ static int do_replace(void __user *user, unsigned int len)
 		goto put_module;
 	}
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -989,24 +865,26 @@ static int do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+
+	xt_free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct arpt_counters) * tmp.num_counters) != 0)
+			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
-	up(&arpt_mutex);
+	xt_table_unlock(t);
 	return ret;
 
  put_module:
 	module_put(t->me);
-	up(&arpt_mutex);
+	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	xt_free_table_info(newinfo);
 	return ret;
 }
 
@@ -1014,7 +892,7 @@ static int do_replace(void __user *user, unsigned int len)
  * and everything is OK.
  */
 static inline int add_counter_to_entry(struct arpt_entry *e,
-				       const struct arpt_counters addme[],
+				       const struct xt_counters addme[],
 				       unsigned int *i)
 {
 
@@ -1027,14 +905,16 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
 static int do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct arpt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp, *paddc;
 	struct arpt_table *t;
+	struct xt_table_info *private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
 	paddc = vmalloc(len);
@@ -1046,27 +926,30 @@ static int do_add_counters(void __user *user, unsigned int len)
 		goto free;
 	}
 
-	t = find_table_lock(tmp.name);
+	t = xt_find_table_lock(NF_ARP, tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
 
 	write_lock_bh(&t->lock);
-	if (t->private->number != paddc->num_counters) {
+	private = t->private;
+	if (private->number != paddc->num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
-	ARPT_ENTRY_ITERATE(t->private->entries,
-			   t->private->size,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = private->entries[smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_entry,
+			   private->size,
 			   add_counter_to_entry,
 			   paddc->counters,
 			   &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
-	up(&arpt_mutex);
+	xt_table_unlock(t);
 	module_put(t->me);
  free:
 	vfree(paddc);
@@ -1123,25 +1006,26 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 		}
 		name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
 
-		t = try_then_request_module(find_table_lock(name),
+		t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
 					    "arptable_%s", name);
 		if (t && !IS_ERR(t)) {
 			struct arpt_getinfo info;
+			struct xt_table_info *private = t->private;
 
 			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
+			memcpy(info.hook_entry, private->hook_entry,
 			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
+			memcpy(info.underflow, private->underflow,
 			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
+			info.num_entries = private->number;
+			info.size = private->size;
 			strcpy(info.name, name);
 
 			if (copy_to_user(user, &info, *len) != 0)
 				ret = -EFAULT;
 			else
 				ret = 0;
-			up(&arpt_mutex);
+			xt_table_unlock(t);
 			module_put(t->me);
 		} else
 			ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1166,7 +1050,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	}
 
 	case ARPT_SO_GET_REVISION_TARGET: {
-		struct arpt_get_revision rev;
+		struct xt_get_revision rev;
 
 		if (*len != sizeof(rev)) {
 			ret = -EINVAL;
@@ -1177,8 +1061,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 			break;
 		}
 
-		try_then_request_module(find_revision(rev.name, rev.revision,
-						      target_revfn, &ret),
+		try_then_request_module(xt_find_revision(NF_ARP, rev.name,
+							 rev.revision, 1, &ret),
 					"arpt_%s", rev.name);
 		break;
 	}
@@ -1191,101 +1075,57 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	return ret;
 }
 
-/* Registration hooks for targets. */
-int arpt_register_target(struct arpt_target *target)
-{
-	int ret;
-
-	ret = down_interruptible(&arpt_mutex);
-	if (ret != 0)
-		return ret;
-
-	list_add(&target->list, &arpt_target);
-	up(&arpt_mutex);
-
-	return ret;
-}
-
-void arpt_unregister_target(struct arpt_target *target)
-{
-	down(&arpt_mutex);
-	LIST_DELETE(&arpt_target, target);
-	up(&arpt_mutex);
-}
-
 int arpt_register_table(struct arpt_table *table,
 			const struct arpt_replace *repl)
 {
 	int ret;
-	struct arpt_table_info *newinfo;
-	static struct arpt_table_info bootstrap
+	struct xt_table_info *newinfo;
+	static struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(repl->size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo) {
 		ret = -ENOMEM;
 		return ret;
 	}
-	memcpy(newinfo->entries, repl->entries, repl->size);
+
+	/* choose the copy on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
+
 	duprintf("arpt_register_table: translate table gives %d\n", ret);
 	if (ret != 0) {
-		vfree(newinfo);
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	ret = down_interruptible(&arpt_mutex);
-	if (ret != 0) {
-		vfree(newinfo);
+	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&arpt_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
-
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
-
-	duprintf("table->private->number = %u\n",
-		 table->private->number);
-	
-	/* save number of initial entries */
-	table->private->initial_entries = table->private->number;
-
-	rwlock_init(&table->lock);
-	list_prepend(&arpt_tables, table);
-
- unlock:
-	up(&arpt_mutex);
-	return ret;
-
- free_unlock:
-	vfree(newinfo);
-	goto unlock;
+	return 0;
 }
 
 void arpt_unregister_table(struct arpt_table *table)
 {
-	down(&arpt_mutex);
-	LIST_DELETE(&arpt_tables, table);
-	up(&arpt_mutex);
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
+
+	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
-	ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
 			   cleanup_entry, NULL);
-	vfree(table->private);
+	xt_free_table_info(private);
 }
 
 /* The built-in targets: standard (NULL) and error. */
@@ -1308,52 +1148,15 @@ static struct nf_sockopt_ops arpt_sockopts = {
 	.get		= do_arpt_get_ctl,
 };
 
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const struct arpt_table *t,
-			     off_t start_offset, char *buffer, int length,
-			     off_t *pos, unsigned int *count)
-{
-	if ((*count)++ >= start_offset) {
-		unsigned int namelen;
-
-		namelen = sprintf(buffer + *pos, "%s\n", t->name);
-		if (*pos + namelen > length) {
-			/* Stop iterating */
-			return 1;
-		}
-		*pos += namelen;
-	}
-	return 0;
-}
-
-static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&arpt_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&arpt_mutex);
-
-	/* `start' hack - see fs/proc/generic.c line ~105 */
-	*start=(char *)((unsigned long)count-offset);
-	return pos;
-}
-#endif /*CONFIG_PROC_FS*/
-
 static int __init init(void)
 {
 	int ret;
 
+	xt_proto_init(NF_ARP);
+
 	/* Noone else will be downing sem now, so we won't sleep */
-	down(&arpt_mutex);
-	list_append(&arpt_target, &arpt_standard_target);
-	list_append(&arpt_target, &arpt_error_target);
-	up(&arpt_mutex);
+	xt_register_target(NF_ARP, &arpt_standard_target);
+	xt_register_target(NF_ARP, &arpt_error_target);
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&arpt_sockopts);
@@ -1362,19 +1165,6 @@ static int __init init(void)
 		return ret;
 	}
 
-#ifdef CONFIG_PROC_FS
-	{
-		struct proc_dir_entry *proc;
-
-		proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
-		if (!proc) {
-			nf_unregister_sockopt(&arpt_sockopts);
-			return -ENOMEM;
-		}
-		proc->owner = THIS_MODULE;
-	}
-#endif
-
 	printk("arp_tables: (C) 2002 David S. Miller\n");
 	return 0;
 }
@@ -1382,16 +1172,12 @@ static int __init init(void)
 static void __exit fini(void)
 {
 	nf_unregister_sockopt(&arpt_sockopts);
-#ifdef CONFIG_PROC_FS
-	proc_net_remove("arp_tables_names");
-#endif
+	xt_proto_fini(NF_ARP);
 }
 
 EXPORT_SYMBOL(arpt_register_table);
 EXPORT_SYMBOL(arpt_unregister_table);
 EXPORT_SYMBOL(arpt_do_table);
-EXPORT_SYMBOL(arpt_register_target);
-EXPORT_SYMBOL(arpt_unregister_target);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 3e592ec8648..c97650a16a5 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -8,8 +8,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
-   const struct net_device *out, const void *targinfo, void *userinfo)
+target(struct sk_buff **pskb, const struct net_device *in,
+   const struct net_device *out, unsigned int hooknum, const void *targinfo,
+   void *userinfo)
 {
 	const struct arpt_mangle *mangle = targinfo;
 	struct arphdr *arp;
@@ -64,7 +65,7 @@ target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
 }
 
 static int
-checkentry(const char *tablename, const struct arpt_entry *e, void *targinfo,
+checkentry(const char *tablename, const void *e, void *targinfo,
    unsigned int targinfosize, unsigned int hook_mask)
 {
 	const struct arpt_mangle *mangle = targinfo;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 0d759f5a4ef..f6ab45f4868 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -145,6 +145,7 @@ static struct arpt_table packet_filter = {
 	.lock		= RW_LOCK_UNLOCKED,
 	.private	= NULL,
 	.me		= THIS_MODULE,
+	.af		= NF_ARP,
 };
 
 /* The work comes in here from netfilter.c */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f..84e4f79b7ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
  *
  */
 
+#include <linux/in.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
 #include <linux/moduleparam.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 
@@ -34,7 +36,7 @@ static unsigned int master_timeout = 300;
 MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
 MODULE_DESCRIPTION("Amanda connection tracking module");
 MODULE_LICENSE("GPL");
-module_param(master_timeout, int, 0600);
+module_param(master_timeout, uint, 0600);
 MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 
 static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 7a4ecddd597..84c66dbfeda 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data)
 	return 1;
 }
 
+void ip_conntrack_flush(void)
+{
+	ip_ct_iterate_cleanup(kill_all, NULL);
+}
+
 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 {
 	if (vmalloced)
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 			   get_order(sizeof(struct list_head) * size));
 }
 
-void ip_conntrack_flush(void)
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
 {
+	ip_ct_attach = NULL;
+
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void)
 
 	ip_ct_event_cache_flush();
  i_see_dead_people:
-	ip_ct_iterate_cleanup(kill_all, NULL);
+	ip_conntrack_flush();
 	if (atomic_read(&ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void)
 	/* wait until all references to ip_conntrack_untracked are dropped */
 	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
 		schedule();
-}
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
-	ip_ct_attach = NULL;
-	ip_conntrack_flush();
 	kmem_cache_destroy(ip_conntrack_cachep);
 	kmem_cache_destroy(ip_conntrack_expect_cachep);
 	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 68b173bcda6..e627e585617 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -34,7 +34,7 @@ static int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 
 static int loose;
-module_param(loose, int, 0600);
+module_param(loose, bool, 0600);
 
 unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
 				enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 4108a5e12b3..d716bba798f 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -762,7 +762,7 @@ static struct ip_conntrack_helper pptp = {
 	.help = conntrack_pptp_help
 };
 
-extern void __exit ip_ct_proto_gre_fini(void);
+extern void ip_ct_proto_gre_fini(void);
 extern int __init ip_ct_proto_gre_init(void);
 
 /* ip_conntrack_pptp initialization */
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index d7c40421d0d..c51a2cf71b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -36,7 +36,7 @@
 #define MAX_PORTS 8
 static unsigned short ports[MAX_PORTS];
 static int ports_c;
-static int max_dcc_channels = 8;
+static unsigned int max_dcc_channels = 8;
 static unsigned int dcc_timeout = 300;
 /* This is slow, but it's simple. --RR */
 static char *irc_buffer;
@@ -54,9 +54,9 @@ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
 MODULE_LICENSE("GPL");
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, int, 0400);
+module_param(max_dcc_channels, uint, 0400);
 MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, int, 0400);
+module_param(dcc_timeout, uint, 0400);
 MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
 
 static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -254,10 +254,6 @@ static int __init init(void)
 		printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
 		return -EBUSY;
 	}
-	if (dcc_timeout < 0) {
-		printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
-		return -EBUSY;
-	}
 
 	irc_buffer = kmalloc(65536, GFP_KERNEL);
 	if (!irc_buffer)
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 186646eb249..4e68e16a261 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
 MODULE_LICENSE("GPL");
 
 static unsigned int timeout = 3;
-module_param(timeout, int, 0600);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 static int help(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 3fce91bcc0b..c9ebbe0d2d9 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -79,6 +79,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 		      const struct ip_conntrack_tuple *tuple)
 {
 	struct nfattr *nest_parms;
+	int ret;
 	
 	nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
 	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
@@ -86,10 +87,10 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 	NFA_NEST_END(skb, nest_parms);
 
 	nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-	ctnetlink_dump_tuples_proto(skb, tuple);
+	ret = ctnetlink_dump_tuples_proto(skb, tuple);
 	NFA_NEST_END(skb, nest_parms);
 
-	return 0;
+	return ret;
 
 nfattr_failure:
 	return -1;
@@ -160,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
 		return 0;
 		
 	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
 
 	if (ct->helper->to_nfattr)
 		ct->helper->to_nfattr(skb, ct);
@@ -229,7 +230,7 @@ nfattr_failure:
 static inline int
 ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	unsigned int use = htonl(atomic_read(&ct->ct_general.use));
+	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
 	
 	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
 	return 0;
@@ -311,29 +312,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	if (events & IPCT_DESTROY) {
 		type = IPCTNL_MSG_CT_DELETE;
 		group = NFNLGRP_CONNTRACK_DESTROY;
-		goto alloc_skb;
-	}
-	if (events & (IPCT_NEW | IPCT_RELATED)) {
+	} else if (events & (IPCT_NEW | IPCT_RELATED)) {
 		type = IPCTNL_MSG_CT_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-		goto alloc_skb;
-	}
-	if (events & (IPCT_STATUS |
+	} else if (events & (IPCT_STATUS |
 		      IPCT_PROTOINFO |
 		      IPCT_HELPER |
 		      IPCT_HELPINFO |
 		      IPCT_NATINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
-		goto alloc_skb;
-	} 
+	} else 
+		return NOTIFY_DONE;
 	
-	return NOTIFY_DONE;
-
-alloc_skb:
   /* FIXME: Check if there are any listeners before, don't hurt performance */
 	
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -503,7 +497,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
 }
 
 static const size_t cta_min_proto[CTA_PROTO_MAX] = {
-	[CTA_PROTO_NUM-1]	= sizeof(u_int16_t),
+	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
 	[CTA_PROTO_SRC_PORT-1]	= sizeof(u_int16_t),
 	[CTA_PROTO_DST_PORT-1]	= sizeof(u_int16_t),
 	[CTA_PROTO_ICMP_TYPE-1]	= sizeof(u_int8_t),
@@ -528,7 +522,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
 
 	if (!tb[CTA_PROTO_NUM-1])
 		return -EINVAL;
-	tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
 
 	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
 
@@ -728,11 +722,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 		}
 	}	
-	if (del_timer(&ct->timeout)) {
-		ip_conntrack_put(ct);
+	if (del_timer(&ct->timeout))
 		ct->timeout.function((unsigned long)ct);
-		return 0;
-	}
+
 	ip_conntrack_put(ct);
 	DEBUGP("leaving\n");
 
@@ -877,7 +869,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
 		DEBUGP("NAT status: %lu\n", 
 		       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
 		
-		if (ip_nat_initialized(ct, hooknum))
+		if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
 			return -EEXIST;
 		ip_nat_setup_info(ct, &range, hooknum);
 
@@ -1039,6 +1031,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 			return err;
 	}
 
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
 	ct->helper = ip_conntrack_helper_find_get(rtuple);
 
 	add_timer(&ct->timeout);
@@ -1047,11 +1044,6 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 	if (ct->helper)
 		ip_conntrack_helper_put(ct->helper);
 
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
 	DEBUGP("conntrack with id %u inserted\n", ct->id);
 	return 0;
 
@@ -1211,7 +1203,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	unsigned int type;
 	unsigned char *b;
 	int flags = 0;
-	u16 proto;
 
 	if (events & IPEXP_NEW) {
 		type = IPCTNL_MSG_EXP_NEW;
@@ -1238,7 +1229,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 		goto nfattr_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
-	proto = exp->tuple.dst.protonum;
 	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
 	return NOTIFY_DONE;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 88c3712bd25..f891308b5e4 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned long ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377..56794797d55 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,8 @@
 #include <linux/ip.h>
 #include <linux/in.h>
 #include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
 
 static DEFINE_RWLOCK(ip_ct_gre_lock);
 #define ASSERT_READ_LOCK(x)
@@ -308,7 +310,10 @@ int __init ip_ct_proto_gre_init(void)
 	return ip_conntrack_protocol_register(&gre);
 }
 
-void __exit ip_ct_proto_gre_fini(void)
+/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
+ * init() code on errors.
+ */
+void ip_ct_proto_gre_fini(void)
 {
 	struct list_head *pos, *n;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5f9925db608..3021af0910f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -16,13 +16,12 @@
 #include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/checksum.h>
-#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned long ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout = 30*HZ;
 
 #if 0
 #define DEBUGP printk
@@ -47,20 +46,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static const u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -110,17 +110,17 @@ static int icmp_packet(struct ip_conntrack *ct,
 	return NF_ACCEPT;
 }
 
-static const u_int8_t valid_new[] = { 
-	[ICMP_ECHO] = 1,
-	[ICMP_TIMESTAMP] = 1,
-	[ICMP_INFO_REQUEST] = 1,
-	[ICMP_ADDRESS] = 1 
-};
-
 /* Called when a new connection for this protocol found. */
 static int icmp_new(struct ip_conntrack *conntrack,
 		    const struct sk_buff *skb)
 {
+	static const u_int8_t valid_new[] = { 
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1 
+	};
+
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
 		/* Can't create a new ICMP `conn' with this. */
@@ -279,10 +279,6 @@ static int icmp_tuple_to_nfattr(struct sk_buff *skb,
 	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
 		&t->dst.u.icmp.code);
 
-	if (t->dst.u.icmp.type >= sizeof(valid_new) 
-	    || !valid_new[t->dst.u.icmp.type])
-		return -EINVAL;
-
 	return 0;
 
 nfattr_failure:
@@ -295,7 +291,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	if (!tb[CTA_PROTO_ICMP_TYPE-1]
 	    || !tb[CTA_PROTO_ICMP_CODE-1]
 	    || !tb[CTA_PROTO_ICMP_ID-1])
-		return -1;
+		return -EINVAL;
 
 	tuple->dst.u.icmp.type = 
 			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
@@ -304,6 +300,10 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	tuple->src.u.icmp.id =
 			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
 
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
 	return 0;
 }
 #endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d456..be602e8aeab 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
+#include <linux/interrupt.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/in.h>
@@ -57,15 +58,15 @@ static const char *sctp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned long ip_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned long ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned long ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned long ip_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned long ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned long ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed            =  10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_established       =   5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
 
-static const unsigned long * sctp_timeouts[]
+static const unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
     &ip_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
     &ip_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 62598167677..e0dc3706354 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -32,7 +32,6 @@
 
 #include <net/tcp.h>
 
-#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -85,21 +84,21 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned long ip_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned long ip_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned long ip_ct_tcp_timeout_established =   5 DAYS;
-unsigned long ip_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned long ip_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned long ip_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned long ip_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned long ip_ct_tcp_timeout_close =        10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent =      2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv =     60 SECS;
+unsigned int ip_ct_tcp_timeout_established =   5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait =      2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait =   60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack =     30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait =     2 MINS;
+unsigned int ip_ct_tcp_timeout_close =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static const unsigned long * tcp_timeouts[]
+static const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -272,9 +271,9 @@ static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sCL -> sCL
  */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
- *	sSS -> sIV	Might be a half-open connection.
+ *	sSS -> sIG	Might be a half-open connection.
  *	sSR -> sSR	Might answer late resent SYN.
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
@@ -341,9 +340,10 @@ static int tcp_print_conntrack(struct seq_file *s,
 static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct ip_conntrack *ct)
 {
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	struct nfattr *nest_parms;
 	
 	read_lock_bh(&tcp_lock);
+	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
 	read_unlock_bh(&tcp_lock);
@@ -917,8 +917,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 
 	switch (new_state) {
 	case TCP_CONNTRACK_IGNORE:
-		/* Either SYN in ORIGINAL
-		 * or SYN/ACK in REPLY. */
+		/* Ignored packets:
+		 * 
+		 * a) SYN in ORIGINAL
+		 * b) SYN/ACK in REPLY
+		 * c) ACK in reply direction after initial SYN in original.
+		 */
 		if (index == TCP_SYNACK_SET
 		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
 		    && conntrack->proto.tcp.last_dir != dir
@@ -985,16 +989,23 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		}
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
-		    && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+		         && conntrack->proto.tcp.last_index == TCP_SYN_SET)
+		        || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+		            && conntrack->proto.tcp.last_index == TCP_ACK_SET))
 		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
-			/* RST sent to invalid SYN we had let trough
-			 * SYN was in window then, tear down connection.
+			/* RST sent to invalid SYN or ACK we had let through
+			 * at a) and c) above:
+			 *
+			 * a) SYN was in window then
+			 * c) we hold a half-open connection.
+			 *
+			 * Delete our connection entry.
 			 * We skip window checking, because packet might ACK
-			 * segments we ignored in the SYN. */
+			 * segments we ignored. */
 			goto in_window;
 		}
-		/* Just fall trough */
+		/* Just fall through */
 	default:
 		/* Keep compilers happy. */
 		break;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c766..55b7d3210ad 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,15 +11,15 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/seq_file.h>
 #include <net/checksum.h>
-#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned long ip_ct_udp_timeout = 30*HZ;
-unsigned long ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4..833fcb4be5e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
 #endif
 #include <net/checksum.h>
 #include <net/ip.h>
+#include <net/route.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -450,30 +451,6 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
 	return NF_ACCEPT;
 }
 
-static unsigned int ip_refrag(unsigned int hooknum,
-			      struct sk_buff **pskb,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      int (*okfn)(struct sk_buff *))
-{
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
-	/* We've seen it coming out the other side: confirm */
-	if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
-		return NF_DROP;
-
-	/* Local packets are never produced too large for their
-	   interface.  We degfragment them at LOCAL_OUT, however,
-	   so we have to refragment them here. */
-	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
-	    !skb_shinfo(*pskb)->tso_size) {
-		/* No hook can be after us, so this should be OK. */
-		ip_fragment(*pskb, okfn);
-		return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ip_conntrack_local(unsigned int hooknum,
 				       struct sk_buff **pskb,
 				       const struct net_device *in,
@@ -543,7 +520,7 @@ static struct nf_hook_ops ip_conntrack_helper_in_ops = {
 
 /* Refragmenter; last chance. */
 static struct nf_hook_ops ip_conntrack_out_ops = {
-	.hook		= ip_refrag,
+	.hook		= ip_confirm,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
 	.hooknum	= NF_IP_POST_ROUTING,
@@ -567,28 +544,28 @@ extern int ip_conntrack_max;
 extern unsigned int ip_conntrack_htable_size;
 
 /* From ip_conntrack_proto_tcp.c */
-extern unsigned long ip_ct_tcp_timeout_syn_sent;
-extern unsigned long ip_ct_tcp_timeout_syn_recv;
-extern unsigned long ip_ct_tcp_timeout_established;
-extern unsigned long ip_ct_tcp_timeout_fin_wait;
-extern unsigned long ip_ct_tcp_timeout_close_wait;
-extern unsigned long ip_ct_tcp_timeout_last_ack;
-extern unsigned long ip_ct_tcp_timeout_time_wait;
-extern unsigned long ip_ct_tcp_timeout_close;
-extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern unsigned int ip_ct_tcp_timeout_syn_sent;
+extern unsigned int ip_ct_tcp_timeout_syn_recv;
+extern unsigned int ip_ct_tcp_timeout_established;
+extern unsigned int ip_ct_tcp_timeout_fin_wait;
+extern unsigned int ip_ct_tcp_timeout_close_wait;
+extern unsigned int ip_ct_tcp_timeout_last_ack;
+extern unsigned int ip_ct_tcp_timeout_time_wait;
+extern unsigned int ip_ct_tcp_timeout_close;
+extern unsigned int ip_ct_tcp_timeout_max_retrans;
 extern int ip_ct_tcp_loose;
 extern int ip_ct_tcp_be_liberal;
 extern int ip_ct_tcp_max_retrans;
 
 /* From ip_conntrack_proto_udp.c */
-extern unsigned long ip_ct_udp_timeout;
-extern unsigned long ip_ct_udp_timeout_stream;
+extern unsigned int ip_ct_udp_timeout;
+extern unsigned int ip_ct_udp_timeout_stream;
 
 /* From ip_conntrack_proto_icmp.c */
-extern unsigned long ip_ct_icmp_timeout;
+extern unsigned int ip_ct_icmp_timeout;
 
 /* From ip_conntrack_proto_icmp.c */
-extern unsigned long ip_ct_generic_timeout;
+extern unsigned int ip_ct_generic_timeout;
 
 /* Log invalid packets of a given protocol */
 static int log_invalid_proto_min = 0;
@@ -967,7 +944,7 @@ module_exit(fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
    They should call this. */
-void need_ip_conntrack(void)
+void need_conntrack(void)
 {
 }
 
@@ -985,7 +962,7 @@ EXPORT_SYMBOL(ip_ct_get_tuple);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
 EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(need_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9..b8daab3c64a 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index e546203f566..ac004895781 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -148,14 +148,14 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 {
 	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
-	u_int16_t msg, *cid = NULL, new_callid;
+	u_int16_t msg, new_callid;
+	unsigned int cid_off;
 
 	new_callid = htons(ct_pptp_info->pns_call_id);
 	
 	switch (msg = ntohs(ctlh->messageType)) {
 		case PPTP_OUT_CALL_REQUEST:
-			cid = &pptpReq->ocreq.callID;
+			cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
 			/* FIXME: ideally we would want to reserve a call ID
 			 * here.  current netfilter NAT core is not able to do
 			 * this :( For now we use TCP source port. This breaks
@@ -172,10 +172,10 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 			ct_pptp_info->pns_call_id = ntohs(new_callid);
 			break;
 		case PPTP_IN_CALL_REPLY:
-			cid = &pptpReq->icreq.callID;
+			cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
 			break;
 		case PPTP_CALL_CLEAR_REQUEST:
-			cid = &pptpReq->clrreq.callID;
+			cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
 			break;
 		default:
 			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
@@ -197,18 +197,15 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
 	 * down to here */
-
-	IP_NF_ASSERT(cid);
-
 	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-		ntohs(*cid), ntohs(new_callid));
+		ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid));
 
 	/* mangle packet */
 	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-		(void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
-				 	sizeof(new_callid), 
-					(char *)&new_callid,
-				 	sizeof(new_callid)) == 0)
+	                             cid_off + sizeof(struct pptp_pkt_hdr) +
+	                             sizeof(struct PptpControlHeader),
+	                             sizeof(new_callid), (char *)&new_callid,
+	                             sizeof(new_callid)) == 0)
 		return NF_DROP;
 
 	return NF_ACCEPT;
@@ -299,31 +296,30 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		 union pptp_ctrl_union *pptpReq)
 {
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
-
-	int ret = NF_ACCEPT, rv;
+	u_int16_t msg, new_cid = 0, new_pcid;
+	unsigned int pcid_off, cid_off = 0;
 
 	new_pcid = htons(nat_pptp_info->pns_call_id);
 
 	switch (msg = ntohs(ctlh->messageType)) {
 	case PPTP_OUT_CALL_REPLY:
-		pcid = &pptpReq->ocack.peersCallID;	
-		cid = &pptpReq->ocack.callID;
+		pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+		cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
 		break;
 	case PPTP_IN_CALL_CONNECT:
-		pcid = &pptpReq->iccon.peersCallID;
+		pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
 		break;
 	case PPTP_IN_CALL_REQUEST:
 		/* only need to nat in case PAC is behind NAT box */
-		break;
+		return NF_ACCEPT;
 	case PPTP_WAN_ERROR_NOTIFY:
-		pcid = &pptpReq->wanerr.peersCallID;
+		pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
 		break;
 	case PPTP_CALL_DISCONNECT_NOTIFY:
-		pcid = &pptpReq->disc.callID;
+		pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
 		break;
 	case PPTP_SET_LINK_INFO:
-		pcid = &pptpReq->setlink.peersCallID;
+		pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
 		break;
 
 	default:
@@ -345,35 +341,26 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
 
 	/* mangle packet */
-	IP_NF_ASSERT(pcid);
 	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
-		ntohs(*pcid), ntohs(new_pcid));
-	
-	rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
-				      (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
-				      sizeof(new_pcid), (char *)&new_pcid, 
-				      sizeof(new_pcid));
-	if (rv != NF_ACCEPT) 
-		return rv;
+		ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid));
+
+	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	                             pcid_off + sizeof(struct pptp_pkt_hdr) +
+				     sizeof(struct PptpControlHeader),
+				     sizeof(new_pcid), (char *)&new_pcid,
+				     sizeof(new_pcid)) == 0)
+		return NF_DROP;
 
 	if (new_cid) {
-		IP_NF_ASSERT(cid);
 		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-			ntohs(*cid), ntohs(new_cid));
-		rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
-					      (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 
-					      sizeof(new_cid),
-					      (char *)&new_cid, 
-					      sizeof(new_cid));
-		if (rv != NF_ACCEPT)
-			return rv;
+			ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid));
+		if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+		                             cid_off + sizeof(struct pptp_pkt_hdr) +
+					     sizeof(struct PptpControlHeader),
+					     sizeof(new_cid), (char *)&new_cid,
+					     sizeof(new_cid)) == 0)
+			return NF_DROP;
 	}
-
-	/* check for earlier return value of 'switch' above */
-	if (ret != NF_ACCEPT)
-		return ret;
-
-	/* great, at least we don't need to resize packets */
 	return NF_ACCEPT;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe..461c833eaca 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index f7cad7cf1ae..6c4899d8046 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -151,42 +151,6 @@ gre_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-/* print out a nat tuple */
-static unsigned int 
-gre_print(char *buffer, 
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.gre.key)
-		len += sprintf(buffer + len, "srckey=0x%x ", 
-				ntohl(match->src.u.gre.key));
-
-	if (mask->dst.u.gre.key)
-		len += sprintf(buffer + len, "dstkey=0x%x ",
-				ntohl(match->src.u.gre.key));
-
-	return len;
-}
-
-/* print a range of keys */
-static unsigned int 
-gre_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.gre.key != 0 
-	    || range->max.gre.key != 0xFFFF) {
-		if (range->min.gre.key == range->max.gre.key)
-			return sprintf(buffer, "key 0x%x ",
-					ntohl(range->min.gre.key));
-		else
-			return sprintf(buffer, "keys 0x%u-0x%u ",
-					ntohl(range->min.gre.key),
-					ntohl(range->max.gre.key));
-	} else
-		return 0;
-}
-
 /* nat helper struct */
 static struct ip_nat_protocol gre = { 
 	.name		= "GRE", 
@@ -194,8 +158,6 @@ static struct ip_nat_protocol gre = {
 	.manip_pkt	= gre_manip_pkt,
 	.in_range	= gre_in_range,
 	.unique_tuple	= gre_unique_tuple,
-	.print		= gre_print,
-	.print_range	= gre_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 93871904399..31a3f4ccb99 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -74,38 +74,6 @@ icmp_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-icmp_print(char *buffer,
-	   const struct ip_conntrack_tuple *match,
-	   const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.icmp.id)
-		len += sprintf(buffer + len, "id=%u ",
-			       ntohs(match->src.u.icmp.id));
-
-	if (mask->dst.u.icmp.type)
-		len += sprintf(buffer + len, "type=%u ",
-			       ntohs(match->dst.u.icmp.type));
-
-	if (mask->dst.u.icmp.code)
-		len += sprintf(buffer + len, "code=%u ",
-			       ntohs(match->dst.u.icmp.code));
-
-	return len;
-}
-
-static unsigned int
-icmp_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
-		return sprintf(buffer, "id %u-%u ",
-			       ntohs(range->min.icmp.id),
-			       ntohs(range->max.icmp.id));
-	else return 0;
-}
-
 struct ip_nat_protocol ip_nat_protocol_icmp = {
 	.name			= "ICMP",
 	.protonum		= IPPROTO_ICMP,
@@ -113,8 +81,6 @@ struct ip_nat_protocol ip_nat_protocol_icmp = {
 	.manip_pkt		= icmp_manip_pkt,
 	.in_range		= icmp_in_range,
 	.unique_tuple		= icmp_unique_tuple,
-	.print			= icmp_print,
-	.print_range		= icmp_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 1d381bf6857..a3d14079eba 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -136,40 +136,6 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-tcp_print(char *buffer,
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.tcp.port)
-		len += sprintf(buffer + len, "srcpt=%u ",
-			       ntohs(match->src.u.tcp.port));
-
-
-	if (mask->dst.u.tcp.port)
-		len += sprintf(buffer + len, "dstpt=%u ",
-			       ntohs(match->dst.u.tcp.port));
-
-	return len;
-}
-
-static unsigned int
-tcp_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
-		if (range->min.tcp.port == range->max.tcp.port)
-			return sprintf(buffer, "port %u ",
-				       ntohs(range->min.tcp.port));
-		else
-			return sprintf(buffer, "ports %u-%u ",
-				       ntohs(range->min.tcp.port),
-				       ntohs(range->max.tcp.port));
-	}
-	else return 0;
-}
-
 struct ip_nat_protocol ip_nat_protocol_tcp = {
 	.name			= "TCP",
 	.protonum		= IPPROTO_TCP,
@@ -177,8 +143,6 @@ struct ip_nat_protocol ip_nat_protocol_tcp = {
 	.manip_pkt		= tcp_manip_pkt,
 	.in_range		= tcp_in_range,
 	.unique_tuple		= tcp_unique_tuple,
-	.print			= tcp_print,
-	.print_range		= tcp_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index c4906e1aa24..ec6053fdc86 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -122,40 +122,6 @@ udp_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-udp_print(char *buffer,
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.udp.port)
-		len += sprintf(buffer + len, "srcpt=%u ",
-			       ntohs(match->src.u.udp.port));
-
-
-	if (mask->dst.u.udp.port)
-		len += sprintf(buffer + len, "dstpt=%u ",
-			       ntohs(match->dst.u.udp.port));
-
-	return len;
-}
-
-static unsigned int
-udp_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
-		if (range->min.udp.port == range->max.udp.port)
-			return sprintf(buffer, "port %u ",
-				       ntohs(range->min.udp.port));
-		else
-			return sprintf(buffer, "ports %u-%u ",
-				       ntohs(range->min.udp.port),
-				       ntohs(range->max.udp.port));
-	}
-	else return 0;
-}
-
 struct ip_nat_protocol ip_nat_protocol_udp = {
 	.name			= "UDP",
 	.protonum		= IPPROTO_UDP,
@@ -163,8 +129,6 @@ struct ip_nat_protocol ip_nat_protocol_udp = {
 	.manip_pkt		= udp_manip_pkt,
 	.in_range		= udp_in_range,
 	.unique_tuple		= udp_unique_tuple,
-	.print			= udp_print,
-	.print_range		= udp_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index f0099a646a0..3bf04951724 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -46,26 +46,10 @@ unknown_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-unknown_print(char *buffer,
-	      const struct ip_conntrack_tuple *match,
-	      const struct ip_conntrack_tuple *mask)
-{
-	return 0;
-}
-
-static unsigned int
-unknown_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	return 0;
-}
-
 struct ip_nat_protocol ip_nat_unknown_protocol = {
 	.name			= "unknown",
 	/* .me isn't set: getting a ref to this cannot fail. */
 	.manip_pkt		= unknown_manip_pkt,
 	.in_range		= unknown_in_range,
 	.unique_tuple		= unknown_unique_tuple,
-	.print			= unknown_print,
-	.print_range		= unknown_print_range
 };
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index cb66b8bddeb..1de86282d23 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -95,6 +95,7 @@ static struct ipt_table nat_table = {
 	.valid_hooks	= NAT_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= AF_INET,
 };
 
 /* Source NAT */
@@ -168,7 +169,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 }
 
 static int ipt_snat_checkentry(const char *tablename,
-			       const struct ipt_entry *e,
+			       const void *entry,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hook_mask)
@@ -201,7 +202,7 @@ static int ipt_snat_checkentry(const char *tablename,
 }
 
 static int ipt_dnat_checkentry(const char *tablename,
-			       const struct ipt_entry *e,
+			       const void *entry,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b4..4f95d477805 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
  *
  */
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -53,6 +54,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/ip.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 30cd4e18c12..ad438fb185b 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
 			         : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN"  \
 				    : "*ERROR*")))
 
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	struct ip_conntrack *ct;
+	struct ip_conntrack_tuple *t;
+	enum ip_conntrack_info ctinfo;
+	enum ip_conntrack_dir dir;
+	unsigned long statusbit;
+
+	ct = ip_conntrack_get(skb, &ctinfo);
+	if (ct == NULL)
+		return;
+	dir = CTINFO2DIR(ctinfo);
+	t = &ct->tuplehash[dir].tuple;
+
+	if (dir == IP_CT_DIR_ORIGINAL)
+		statusbit = IPS_DST_NAT;
+	else
+		statusbit = IPS_SRC_NAT;
+
+	if (ct->status & statusbit) {
+		fl->fl4_dst = t->dst.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP)
+			fl->fl_ip_dport = t->dst.u.tcp.port;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl->fl4_src = t->src.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP)
+			fl->fl_ip_sport = t->src.u.tcp.port;
+	}
+}
+#endif
+		
 static unsigned int
 ip_nat_fn(unsigned int hooknum,
 	  struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
           const struct net_device *out,
           int (*okfn)(struct sk_buff *))
 {
-	u_int32_t saddr, daddr;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
 
-	saddr = (*pskb)->nh.iph->saddr;
-	daddr = (*pskb)->nh.iph->daddr;
-
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-	    && ((*pskb)->nh.iph->saddr != saddr
-	        || (*pskb)->nh.iph->daddr != daddr)) {
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = NULL;
+	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.ip !=
+		    ct->tuplehash[!dir].tuple.dst.ip) {
+			dst_release((*pskb)->dst);
+			(*pskb)->dst = NULL;
+		}
 	}
 	return ret;
 }
@@ -185,29 +225,30 @@ ip_nat_out(unsigned int hooknum,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	/* We can hit fragment here; forwarded packets get
-	   defragmented by connection tracking coming in, then
-	   fragmented (grr) by the forward code.
-
-	   In future: If we have nfct != NULL, AND we have NAT
-	   initialized, AND there is no helper, then we can do full
-	   NAPT on the head, and IP-address-only NAT on the rest.
-
-	   I'm starting to have nightmares about fragments.  */
-
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
-
-		if (!*pskb)
-			return NF_STOLEN;
+	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN
+	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.ip !=
+		    ct->tuplehash[!dir].tuple.dst.ip
+#ifdef CONFIG_XFRM
+		    || ct->tuplehash[dir].tuple.src.u.all !=
+		       ct->tuplehash[!dir].tuple.dst.u.all
+#endif
+		    )
+			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
 	}
-
-	return ip_nat_fn(hooknum, pskb, in, out, okfn);
+	return ret;
 }
 
 static unsigned int
@@ -217,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
 		const struct net_device *out,
 		int (*okfn)(struct sk_buff *))
 {
-	u_int32_t saddr, daddr;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
@@ -225,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	saddr = (*pskb)->nh.iph->saddr;
-	daddr = (*pskb)->nh.iph->daddr;
-
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-	    && ((*pskb)->nh.iph->saddr != saddr
-		|| (*pskb)->nh.iph->daddr != daddr))
-		return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.ip !=
+		    ct->tuplehash[!dir].tuple.src.ip
+#ifdef CONFIG_XFRM
+		    || ct->tuplehash[dir].tuple.dst.u.all !=
+		       ct->tuplehash[dir].tuple.src.u.all
+#endif
+		    )
+			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+	}
 	return ret;
 }
 
@@ -316,14 +364,18 @@ static int init_or_cleanup(int init)
 {
 	int ret = 0;
 
-	need_ip_conntrack();
+	need_conntrack();
 
 	if (!init) goto cleanup;
 
+#ifdef CONFIG_XFRM
+	BUG_ON(ip_nat_decode_session != NULL);
+	ip_nat_decode_session = nat_decode_session;
+#endif
 	ret = ip_nat_rule_init();
 	if (ret < 0) {
 		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_nothing;
+		goto cleanup_decode_session;
 	}
 	ret = nf_register_hook(&ip_nat_in_ops);
 	if (ret < 0) {
@@ -371,7 +423,11 @@ static int init_or_cleanup(int init)
 	nf_unregister_hook(&ip_nat_in_ops);
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
- cleanup_nothing:
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+	ip_nat_decode_session = NULL;
+	synchronize_net();
+#endif
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 2215317c76b..43c3bd7c118 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -42,7 +42,10 @@ static unsigned int help(struct sk_buff **pskb,
 			 enum ip_conntrack_info ctinfo,
 			 struct ip_conntrack_expect *exp)
 {
-	exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+	struct ip_conntrack *ct = exp->master;
+
+	exp->saved_proto.udp.port
+		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = ip_nat_follow_master;
 	if (ip_conntrack_expect_related(exp) != 0)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e..2371b2062c2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2,7 +2,7 @@
  * Packet matching code.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
- * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,16 +11,17 @@
  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
  * 	- increase module usage count as soon as we have rules inside
  * 	  a table
+ * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
+ * 	- Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
  */
 #include <linux/config.h>
 #include <linux/cache.h>
+#include <linux/capability.h>
 #include <linux/skbuff.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/module.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
 #include <linux/icmp.h>
 #include <net/ip.h>
 #include <asm/uaccess.h>
@@ -29,6 +30,7 @@
 #include <linux/err.h>
 #include <linux/cpumask.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -61,14 +63,6 @@ do {								\
 #else
 #define IP_NF_ASSERT(x)
 #endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
-
-static DECLARE_MUTEX(ipt_mutex);
-
-/* Must have mutex */
-#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 /* All the better to debug you with... */
@@ -83,48 +77,8 @@ static DECLARE_MUTEX(ipt_mutex);
    context stops packets coming through and allows user context to read
    the counters or update the rules.
 
-   To be cache friendly on SMP, we arrange them like so:
-   [ n-entries ]
-   ... cache-align padding ...
-   [ n-entries ]
-
    Hence the start of any table is given by get_table() below.  */
 
-/* The table itself */
-struct ipt_table_info
-{
-	/* Size per table */
-	unsigned int size;
-	/* Number of entries: FIXME. --RR */
-	unsigned int number;
-	/* Initial number of entries. Needed for module usage count */
-	unsigned int initial_entries;
-
-	/* Entry points and underflows */
-	unsigned int hook_entry[NF_IP_NUMHOOKS];
-	unsigned int underflow[NF_IP_NUMHOOKS];
-
-	/* ipt_entry tables: one per CPU */
-	char entries[0] ____cacheline_aligned;
-};
-
-static LIST_HEAD(ipt_target);
-static LIST_HEAD(ipt_match);
-static LIST_HEAD(ipt_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
-#if 0
-#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
-#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
-#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
-#endif
-
 /* Returns whether matches rule or not. */
 static inline int
 ip_packet_match(const struct iphdr *ip,
@@ -243,7 +197,8 @@ int do_match(struct ipt_entry_match *m,
 	     int *hotdrop)
 {
 	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
+	if (!m->u.kernel.match->match(skb, in, out, m->data, offset, 
+	    skb->nh.iph->ihl*4, hotdrop))
 		return 1;
 	else
 		return 0;
@@ -274,6 +229,7 @@ ipt_do_table(struct sk_buff **pskb,
 	const char *indev, *outdev;
 	void *table_base;
 	struct ipt_entry *e, *back;
+	struct xt_table_info *private = table->private;
 
 	/* Initialization */
 	ip = (*pskb)->nh.iph;
@@ -290,25 +246,11 @@ ipt_do_table(struct sk_buff **pskb,
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private, smp_processor_id());
-	e = get_entry(table_base, table->private->hook_entry[hook]);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Check noone else using our table */
-	if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
-	    && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
-		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
-		       smp_processor_id(),
-		       table->name,
-		       &((struct ipt_entry *)table_base)->comefrom,
-		       ((struct ipt_entry *)table_base)->comefrom);
-	}
-	((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
-#endif
+	table_base = (void *)private->entries[smp_processor_id()];
+	e = get_entry(table_base, private->hook_entry[hook]);
 
 	/* For return from builtin chain */
-	back = get_entry(table_base, table->private->underflow[hook]);
+	back = get_entry(table_base, private->underflow[hook]);
 
 	do {
 		IP_NF_ASSERT(e);
@@ -394,9 +336,6 @@ ipt_do_table(struct sk_buff **pskb,
 		}
 	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
-#endif
 	read_unlock_bh(&table->lock);
 
 #ifdef DEBUG_ALLOW_ALL
@@ -408,145 +347,6 @@ ipt_do_table(struct sk_buff **pskb,
 #endif
 }
 
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-static inline struct ipt_table *find_table_lock(const char *name)
-{
-	struct ipt_table *t;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ipt_tables, list)
-		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
-			return t;
-	up(&ipt_mutex);
-	return NULL;
-}
-
-/* Find match, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ipt_match *find_match(const char *name, u8 revision)
-{
-	struct ipt_match *m;
-	int err = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(m, &ipt_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision == revision) {
-				if (try_module_get(m->me)) {
-					up(&ipt_mutex);
-					return m;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ipt_mutex);
-	return ERR_PTR(err);
-}
-
-/* Find target, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ipt_target *find_target(const char *name, u8 revision)
-{
-	struct ipt_target *t;
-	int err = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ipt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision == revision) {
-				if (try_module_get(t->me)) {
-					up(&ipt_mutex);
-					return t;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ipt_mutex);
-	return ERR_PTR(err);
-}
-
-struct ipt_target *ipt_find_target(const char *name, u8 revision)
-{
-	struct ipt_target *target;
-
-	target = try_then_request_module(find_target(name, revision),
-					 "ipt_%s", name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
-}
-
-static int match_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ipt_match *m;
-	int have_rev = 0;
-
-	list_for_each_entry(m, &ipt_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision > *bestp)
-				*bestp = m->revision;
-			if (m->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ipt_target *t;
-	int have_rev = 0;
-
-	list_for_each_entry(t, &ipt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision > *bestp)
-				*bestp = t->revision;
-			if (t->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-/* Returns true or false (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
-				int (*revfn)(const char *, u8, int *),
-				int *err)
-{
-	int have_rev, best = -1;
-
-	if (down_interruptible(&ipt_mutex) != 0) {
-		*err = -EINTR;
-		return 1;
-	}
-	have_rev = revfn(name, revision, &best);
-	up(&ipt_mutex);
-
-	/* Nothing at all?  Return 0 to try loading module. */
-	if (best == -1) {
-		*err = -ENOENT;
-		return 0;
-	}
-
-	*err = best;
-	if (!have_rev)
-		*err = -EPROTONOSUPPORT;
-	return 1;
-}
-
-
 /* All zeroes == unconditional rule. */
 static inline int
 unconditional(const struct ipt_ip *ip)
@@ -563,7 +363,8 @@ unconditional(const struct ipt_ip *ip)
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct xt_table_info *newinfo,
+		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -572,7 +373,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ipt_entry *e
-			= (struct ipt_entry *)(newinfo->entries + pos);
+			= (struct ipt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -622,13 +423,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 						goto next;
 
 					e = (struct ipt_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct ipt_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -645,7 +446,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 					newpos = pos + e->next_offset;
 				}
 				e = (struct ipt_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -708,7 +509,7 @@ check_match(struct ipt_entry_match *m,
 {
 	struct ipt_match *match;
 
-	match = try_then_request_module(find_match(m->u.user.name,
+	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
 						   m->u.user.revision),
 					"ipt_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
@@ -753,7 +554,8 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 		goto cleanup_matches;
 
 	t = ipt_get_target(e);
-	target = try_then_request_module(find_target(t->u.user.name,
+	target = try_then_request_module(xt_find_target(AF_INET,
+						     t->u.user.name,
 						     t->u.user.revision),
 					 "ipt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
@@ -790,7 +592,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 
 static inline int
 check_entry_size_and_hooks(struct ipt_entry *e,
-			   struct ipt_table_info *newinfo,
+			   struct xt_table_info *newinfo,
 			   unsigned char *base,
 			   unsigned char *limit,
 			   const unsigned int *hook_entries,
@@ -824,7 +626,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
            < 0 (not IPT_RETURN). --RR */
 
 	/* Clear counters and comefrom */
-	e->counters = ((struct ipt_counters) { 0, 0 });
+	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
 
 	(*i)++;
@@ -854,7 +656,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 static int
 translate_table(const char *name,
 		unsigned int valid_hooks,
-		struct ipt_table_info *newinfo,
+		struct xt_table_info *newinfo,
+		void *entry0,
 		unsigned int size,
 		unsigned int number,
 		const unsigned int *hook_entries,
@@ -875,11 +678,11 @@ translate_table(const char *name,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry_size_and_hooks,
 				newinfo,
-				newinfo->entries,
-				newinfo->entries + size,
+				entry0,
+				entry0 + size,
 				hook_entries, underflows, &i);
 	if (ret != 0)
 		return ret;
@@ -907,95 +710,79 @@ translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks))
+	if (!mark_source_chains(newinfo, valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry, name, size, &i);
 
 	if (ret != 0) {
-		IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				  cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
 }
 
-static struct ipt_table_info *
-replace_table(struct ipt_table *table,
-	      unsigned int num_counters,
-	      struct ipt_table_info *newinfo,
-	      int *error)
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+		     struct xt_counters total[],
+		     unsigned int *i)
 {
-	struct ipt_table_info *oldinfo;
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	{
-		struct ipt_entry *table_base;
-		unsigned int i;
-
-		for_each_cpu(i) {
-			table_base =
-				(void *)newinfo->entries
-				+ TABLE_OFFSET(newinfo, i);
-
-			table_base->comefrom = 0xdead57ac;
-		}
-	}
-#endif
-
-	/* Do the substitution. */
-	write_lock_bh(&table->lock);
-	/* Check inside lock: is the old number correct? */
-	if (num_counters != table->private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
-			 num_counters, table->private->number);
-		write_unlock_bh(&table->lock);
-		*error = -EAGAIN;
-		return NULL;
-	}
-	oldinfo = table->private;
-	table->private = newinfo;
-	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
-	return oldinfo;
+	(*i)++;
+	return 0;
 }
 
-/* Gets counters. */
 static inline int
-add_entry_to_counter(const struct ipt_entry *e,
+set_entry_to_counter(const struct ipt_entry *e,
 		     struct ipt_counters total[],
 		     unsigned int *i)
 {
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
 	(*i)++;
 	return 0;
 }
 
 static void
-get_counters(const struct ipt_table_info *t,
-	     struct ipt_counters counters[])
+get_counters(const struct xt_table_info *t,
+	     struct xt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	IPT_ENTRY_ITERATE(t->entries[curcpu],
+			  t->size,
+			  set_entry_to_counter,
+			  counters,
+			  &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		IPT_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
@@ -1010,26 +797,32 @@ copy_entries_to_user(unsigned int total_size,
 {
 	unsigned int off, num, countersize;
 	struct ipt_entry *e;
-	struct ipt_counters *counters;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
-	countersize = sizeof(struct ipt_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	countersize = sizeof(struct xt_counters) * private->number;
+	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
-	get_counters(table->private, counters);
+	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	/* ... then copy entire thing ... */
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -1041,7 +834,7 @@ copy_entries_to_user(unsigned int total_size,
 		struct ipt_entry_match *m;
 		struct ipt_entry_target *t;
 
-		e = (struct ipt_entry *)(table->private->entries + off);
+		e = (struct ipt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct ipt_entry, counters),
 				 &counters[num],
@@ -1089,21 +882,22 @@ get_entries(const struct ipt_get_entries *entries,
 	int ret;
 	struct ipt_table *t;
 
-	t = find_table_lock(entries->name);
+	t = xt_find_table_lock(AF_INET, entries->name);
 	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
+			 private->number);
+		if (entries->size == private->size)
+			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
+				 private->size,
 				 entries->size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
-		up(&ipt_mutex);
+		xt_table_unlock(t);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
 
@@ -1116,8 +910,9 @@ do_replace(void __user *user, unsigned int len)
 	int ret;
 	struct ipt_replace tmp;
 	struct ipt_table *t;
-	struct ipt_table_info *newinfo, *oldinfo;
-	struct ipt_counters *counters;
+	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1126,38 +921,33 @@ do_replace(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.size)
 		return -ENOPROTOOPT;
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return -ENOMEM;
-
-	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(tmp.size) * 
-			  	(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
 
 	duprintf("ip_tables: Translated table\n");
 
-	t = try_then_request_module(find_table_lock(tmp.name),
+	t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
 				    "iptable_%s", tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1172,7 +962,7 @@ do_replace(void __user *user, unsigned int len)
 		goto put_module;
 	}
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -1189,24 +979,25 @@ do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	xt_free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
+			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
-	up(&ipt_mutex);
+	xt_table_unlock(t);
 	return ret;
 
  put_module:
 	module_put(t->me);
-	up(&ipt_mutex);
+	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	xt_free_table_info(newinfo);
 	return ret;
 }
 
@@ -1214,7 +1005,7 @@ do_replace(void __user *user, unsigned int len)
  * and everything is OK. */
 static inline int
 add_counter_to_entry(struct ipt_entry *e,
-		     const struct ipt_counters addme[],
+		     const struct xt_counters addme[],
 		     unsigned int *i)
 {
 #if 0
@@ -1236,17 +1027,19 @@ static int
 do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct ipt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp, *paddc;
 	struct ipt_table *t;
+	struct xt_table_info *private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = vmalloc_node(len, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
@@ -1255,27 +1048,30 @@ do_add_counters(void __user *user, unsigned int len)
 		goto free;
 	}
 
-	t = find_table_lock(tmp.name);
+	t = xt_find_table_lock(AF_INET, tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
 
 	write_lock_bh(&t->lock);
-	if (t->private->number != paddc->num_counters) {
+	private = t->private;
+	if (private->number != paddc->num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
-	IPT_ENTRY_ITERATE(t->private->entries,
-			  t->private->size,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_entry,
+			  private->size,
 			  add_counter_to_entry,
 			  paddc->counters,
 			  &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
-	up(&ipt_mutex);
+	xt_table_unlock(t);
 	module_put(t->me);
  free:
 	vfree(paddc);
@@ -1334,25 +1130,26 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		}
 		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
 
-		t = try_then_request_module(find_table_lock(name),
+		t = try_then_request_module(xt_find_table_lock(AF_INET, name),
 					    "iptable_%s", name);
 		if (t && !IS_ERR(t)) {
 			struct ipt_getinfo info;
+			struct xt_table_info *private = t->private;
 
 			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
+			memcpy(info.hook_entry, private->hook_entry,
 			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
+			memcpy(info.underflow, private->underflow,
 			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
+			info.num_entries = private->number;
+			info.size = private->size;
 			memcpy(info.name, name, sizeof(info.name));
 
 			if (copy_to_user(user, &info, *len) != 0)
 				ret = -EFAULT;
 			else
 				ret = 0;
-			up(&ipt_mutex);
+			xt_table_unlock(t);
 			module_put(t->me);
 		} else
 			ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1379,7 +1176,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	case IPT_SO_GET_REVISION_MATCH:
 	case IPT_SO_GET_REVISION_TARGET: {
 		struct ipt_get_revision rev;
-		int (*revfn)(const char *, u8, int *);
+		int target;
 
 		if (*len != sizeof(rev)) {
 			ret = -EINVAL;
@@ -1391,12 +1188,13 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		}
 
 		if (cmd == IPT_SO_GET_REVISION_TARGET)
-			revfn = target_revfn;
+			target = 1;
 		else
-			revfn = match_revfn;
+			target = 0;
 
-		try_then_request_module(find_revision(rev.name, rev.revision,
-						      revfn, &ret),
+		try_then_request_module(xt_find_revision(AF_INET, rev.name,
+							 rev.revision,
+							 target, &ret),
 					"ipt_%s", rev.name);
 		break;
 	}
@@ -1409,309 +1207,53 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-/* Registration hooks for targets. */
-int
-ipt_register_target(struct ipt_target *target)
+int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
 {
 	int ret;
-
-	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0)
-		return ret;
-	list_add(&target->list, &ipt_target);
-	up(&ipt_mutex);
-	return ret;
-}
-
-void
-ipt_unregister_target(struct ipt_target *target)
-{
-	down(&ipt_mutex);
-	LIST_DELETE(&ipt_target, target);
-	up(&ipt_mutex);
-}
-
-int
-ipt_register_match(struct ipt_match *match)
-{
-	int ret;
-
-	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0)
-		return ret;
-
-	list_add(&match->list, &ipt_match);
-	up(&ipt_mutex);
-
-	return ret;
-}
-
-void
-ipt_unregister_match(struct ipt_match *match)
-{
-	down(&ipt_mutex);
-	LIST_DELETE(&ipt_match, match);
-	up(&ipt_mutex);
-}
-
-int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
-{
-	int ret;
-	struct ipt_table_info *newinfo;
-	static struct ipt_table_info bootstrap
+	struct xt_table_info *newinfo;
+	static struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(repl->size) * 
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	memcpy(newinfo->entries, repl->entries, repl->size);
+	/* choose the copy on our node/cpu
+	 * but dont care of preemption
+	 */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
 	if (ret != 0) {
-		vfree(newinfo);
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0) {
-		vfree(newinfo);
+	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&ipt_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
-
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
-
-	duprintf("table->private->number = %u\n",
-		 table->private->number);
-	
-	/* save number of initial entries */
-	table->private->initial_entries = table->private->number;
-
-	rwlock_init(&table->lock);
-	list_prepend(&ipt_tables, table);
-
- unlock:
-	up(&ipt_mutex);
-	return ret;
-
- free_unlock:
-	vfree(newinfo);
-	goto unlock;
+	return 0;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
 {
-	down(&ipt_mutex);
-	LIST_DELETE(&ipt_tables, table);
-	up(&ipt_mutex);
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
 
-	/* Decrease module usage counts and free resources */
-	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
-			  cleanup_entry, NULL);
-	vfree(table->private);
-}
-
-/* Returns 1 if the port is matched by the range, 0 otherwise */
-static inline int
-port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
-{
-	int ret;
-
-	ret = (port >= min && port <= max) ^ invert;
-	return ret;
-}
-
-static int
-tcp_find_option(u_int8_t option,
-		const struct sk_buff *skb,
-		unsigned int optlen,
-		int invert,
-		int *hotdrop)
-{
-	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
-	unsigned int i;
-
-	duprintf("tcp_match: finding option\n");
-
-	if (!optlen)
-		return invert;
-
-	/* If we don't have the whole header, drop packet. */
-	op = skb_header_pointer(skb,
-				skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
-				optlen, _opt);
-	if (op == NULL) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	for (i = 0; i < optlen; ) {
-		if (op[i] == option) return !invert;
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
-	}
-
-	return invert;
-}
-
-static int
-tcp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  int *hotdrop)
-{
-	struct tcphdr _tcph, *th;
-	const struct ipt_tcp *tcpinfo = matchinfo;
-
-	if (offset) {
-		/* To quote Alan:
-
-		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
-		   causes this. Its a cracker trying to break in by doing a
-		   flag overwrite to pass the direction checks.
-		*/
-		if (offset == 1) {
-			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = 1;
-		}
-		/* Must not be a fragment. */
-		return 0;
-	}
-
-#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
-
-	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
-			ntohs(th->source),
-			!!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
-		return 0;
-	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
-			ntohs(th->dest),
-			!!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
-		return 0;
-	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
-		      == tcpinfo->flg_cmp,
-		      IPT_TCP_INV_FLAGS))
-		return 0;
-	if (tcpinfo->option) {
-		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = 1;
-			return 0;
-		}
-		if (!tcp_find_option(tcpinfo->option, skb,
-				     th->doff*4 - sizeof(_tcph),
-				     tcpinfo->invflags & IPT_TCP_INV_OPTION,
-				     hotdrop))
-			return 0;
-	}
-	return 1;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-tcp_checkentry(const char *tablename,
-	       const struct ipt_ip *ip,
-	       void *matchinfo,
-	       unsigned int matchsize,
-	       unsigned int hook_mask)
-{
-	const struct ipt_tcp *tcpinfo = matchinfo;
-
-	/* Must specify proto == TCP, and no unknown invflags */
-	return ip->proto == IPPROTO_TCP
-		&& !(ip->invflags & IPT_INV_PROTO)
-		&& matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
-		&& !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
-}
-
-static int
-udp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  int *hotdrop)
-{
-	struct udphdr _udph, *uh;
-	const struct ipt_udp *udpinfo = matchinfo;
-
-	/* Must not be a fragment. */
-	if (offset)
-		return 0;
-
-	uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_udph), &_udph);
-	if (uh == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
+ 	private = xt_unregister_table(table);
 
-	return port_match(udpinfo->spts[0], udpinfo->spts[1],
-			  ntohs(uh->source),
-			  !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
-		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
-			      ntohs(uh->dest),
-			      !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-udp_checkentry(const char *tablename,
-	       const struct ipt_ip *ip,
-	       void *matchinfo,
-	       unsigned int matchinfosize,
-	       unsigned int hook_mask)
-{
-	const struct ipt_udp *udpinfo = matchinfo;
-
-	/* Must specify proto == UDP, and no unknown invflags */
-	if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
-		duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
-			 IPPROTO_UDP);
-		return 0;
-	}
-	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
-		duprintf("ipt_udp: matchsize %u != %u\n",
-			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
-		return 0;
-	}
-	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
-		duprintf("ipt_udp: unknown flags %X\n",
-			 udpinfo->invflags);
-		return 0;
-	}
-
-	return 1;
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	xt_free_table_info(private);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1730,6 +1272,7 @@ icmp_match(const struct sk_buff *skb,
 	   const struct net_device *out,
 	   const void *matchinfo,
 	   int offset,
+	   unsigned int protoff,
 	   int *hotdrop)
 {
 	struct icmphdr _icmph, *ic;
@@ -1739,8 +1282,7 @@ icmp_match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_icmph), &_icmph);
+	ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
@@ -1760,11 +1302,12 @@ icmp_match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 icmp_checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *info,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
+	const struct ipt_ip *ip = info;
 	const struct ipt_icmp *icmpinfo = matchinfo;
 
 	/* Must specify proto == ICMP, and no unknown invflags */
@@ -1794,123 +1337,22 @@ static struct nf_sockopt_ops ipt_sockopts = {
 	.get		= do_ipt_get_ctl,
 };
 
-static struct ipt_match tcp_matchstruct = {
-	.name		= "tcp",
-	.match		= &tcp_match,
-	.checkentry	= &tcp_checkentry,
-};
-
-static struct ipt_match udp_matchstruct = {
-	.name		= "udp",
-	.match		= &udp_match,
-	.checkentry	= &udp_checkentry,
-};
-
 static struct ipt_match icmp_matchstruct = {
 	.name		= "icmp",
 	.match		= &icmp_match,
 	.checkentry	= &icmp_checkentry,
 };
 
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const char *i,
-			     off_t start_offset, char *buffer, int length,
-			     off_t *pos, unsigned int *count)
-{
-	if ((*count)++ >= start_offset) {
-		unsigned int namelen;
-
-		namelen = sprintf(buffer + *pos, "%s\n",
-				  i + sizeof(struct list_head));
-		if (*pos + namelen > length) {
-			/* Stop iterating */
-			return 1;
-		}
-		*pos += namelen;
-	}
-	return 0;
-}
-
-static inline int print_target(const struct ipt_target *t,
-                               off_t start_offset, char *buffer, int length,
-                               off_t *pos, unsigned int *count)
-{
-	if (t == &ipt_standard_target || t == &ipt_error_target)
-		return 0;
-	return print_name((char *)t, start_offset, buffer, length, pos, count);
-}
-
-static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ipt_tables, print_name, void *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ipt_mutex);
-
-	/* `start' hack - see fs/proc/generic.c line ~105 */
-	*start=(char *)((unsigned long)count-offset);
-	return pos;
-}
-
-static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ipt_target, print_target, struct ipt_target *,
-		  offset, buffer, length, &pos, &count);
-	
-	up(&ipt_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return 0;
-	
-	LIST_FIND(&ipt_match, print_name, void *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ipt_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
-{ { "ip_tables_names", ipt_get_tables },
-  { "ip_tables_targets", ipt_get_targets },
-  { "ip_tables_matches", ipt_get_matches },
-  { NULL, NULL} };
-#endif /*CONFIG_PROC_FS*/
-
 static int __init init(void)
 {
 	int ret;
 
+	xt_proto_init(AF_INET);
+
 	/* Noone else will be downing sem now, so we won't sleep */
-	down(&ipt_mutex);
-	list_append(&ipt_target, &ipt_standard_target);
-	list_append(&ipt_target, &ipt_error_target);
-	list_append(&ipt_match, &tcp_matchstruct);
-	list_append(&ipt_match, &udp_matchstruct);
-	list_append(&ipt_match, &icmp_matchstruct);
-	up(&ipt_mutex);
+	xt_register_target(AF_INET, &ipt_standard_target);
+	xt_register_target(AF_INET, &ipt_error_target);
+	xt_register_match(AF_INET, &icmp_matchstruct);
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ipt_sockopts);
@@ -1919,49 +1361,23 @@ static int __init init(void)
 		return ret;
 	}
 
-#ifdef CONFIG_PROC_FS
-	{
-	struct proc_dir_entry *proc;
-	int i;
-
-	for (i = 0; ipt_proc_entry[i].name; i++) {
-		proc = proc_net_create(ipt_proc_entry[i].name, 0,
-				       ipt_proc_entry[i].get_info);
-		if (!proc) {
-			while (--i >= 0)
-				proc_net_remove(ipt_proc_entry[i].name);
-			nf_unregister_sockopt(&ipt_sockopts);
-			return -ENOMEM;
-		}
-		proc->owner = THIS_MODULE;
-	}
-	}
-#endif
-
-	printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 }
 
 static void __exit fini(void)
 {
 	nf_unregister_sockopt(&ipt_sockopts);
-#ifdef CONFIG_PROC_FS
-	{
-	int i;
-	for (i = 0; ipt_proc_entry[i].name; i++)
-		proc_net_remove(ipt_proc_entry[i].name);
-	}
-#endif
+
+	xt_unregister_match(AF_INET, &icmp_matchstruct);
+	xt_unregister_target(AF_INET, &ipt_error_target);
+	xt_unregister_target(AF_INET, &ipt_standard_target);
+
+	xt_proto_fini(AF_INET);
 }
 
 EXPORT_SYMBOL(ipt_register_table);
 EXPORT_SYMBOL(ipt_unregister_table);
-EXPORT_SYMBOL(ipt_register_match);
-EXPORT_SYMBOL(ipt_unregister_match);
 EXPORT_SYMBOL(ipt_do_table);
-EXPORT_SYMBOL(ipt_register_target);
-EXPORT_SYMBOL(ipt_unregister_target);
-EXPORT_SYMBOL(ipt_find_target);
-
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45c52d8f4d9..d9bc971f03a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -379,12 +379,13 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	const struct ipt_entry *e = e_void;
 
 	struct clusterip_config *config;
 
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 6e319570a28..898cdf79ce1 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -57,7 +57,7 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index a1319693f64..706445426a6 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -113,12 +113,13 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
+	const struct ipt_entry *e = e_void;
 
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
 		printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 30be0f1dae3..6606ddb66a2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -431,7 +431,7 @@ ipt_log_target(struct sk_buff **pskb,
 }
 
 static int ipt_log_checkentry(const char *tablename,
-			      const struct ipt_entry *e,
+			      const void *e,
 			      void *targinfo,
 			      unsigned int targinfosize,
 			      unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe..12c56d3343c 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/types.h>
+#include <linux/inetdevice.h>
 #include <linux/ip.h>
 #include <linux/timer.h>
 #include <linux/module.h>
@@ -18,6 +19,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/checksum.h>
+#include <net/route.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -38,7 +40,7 @@ static DEFINE_RWLOCK(masq_lock);
 /* FIXME: Multiple targets. --RR */
 static int
 masquerade_check(const char *tablename,
-		 const struct ipt_entry *e,
+		 const void *e,
 		 void *targinfo,
 		 unsigned int targinfosize,
 		 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index e6e7b609536..b074467fe67 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
 
 static int
 check(const char *tablename,
-      const struct ipt_entry *e,
+      const void *e,
       void *targinfo,
       unsigned int targinfosize,
       unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c
deleted file mode 100644
index 3cedc9be880..00000000000
--- a/net/ipv4/netfilter/ipt_NFQUEUE.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* iptables module for using new netfilter netlink queue
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as 
- * published by the Free Software Foundation.
- * 
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables NFQUEUE target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_NFQ_info *tinfo = targinfo;
-
-	return NF_QUEUE_NR(tinfo->queuenum);
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) {
-		printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_NFQ_info)));
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_target ipt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_target(&ipt_NFQ_reg);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_target(&ipt_NFQ_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 5245bfd33d5..140be51f2f0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables REDIRECT target module");
 /* FIXME: Take multiple ranges --RR */
 static int
 redirect_check(const char *tablename,
-	       const struct ipt_entry *e,
+	       const void *e,
 	       void *targinfo,
 	       unsigned int targinfosize,
 	       unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f057025a719..3eb47aae78c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -203,7 +203,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 						sizeof(struct tcphdr), 0));
 
 	/* Adjust IP TTL, DF */
-	nskb->nh.iph->ttl = MAXTTL;
+	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
 	/* Set DF, id = 0 */
 	nskb->nh.iph->frag_off = htons(IP_DF);
 	nskb->nh.iph->id = 0;
@@ -282,12 +282,13 @@ static unsigned int reject(struct sk_buff **pskb,
 }
 
 static int check(const char *tablename,
-		 const struct ipt_entry *e,
+		 const void *e_void,
 		 void *targinfo,
 		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ipt_reject_info *rejinfo = targinfo;
+	const struct ipt_entry *e = e_void;
 
  	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
   		DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7a0536d864a..a22de59bba0 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -49,7 +49,7 @@ MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
 
 static int
 same_check(const char *tablename,
-	      const struct ipt_entry *e,
+	      const void *e,
 	      void *targinfo,
 	      unsigned int targinfosize,
 	      unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 8db70d6908c..c122841e182 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -210,12 +210,13 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
 /* Must specify -p tcp --syn/--tcp-flags SYN */
 static int
 ipt_tcpmss_checkentry(const char *tablename,
-		      const struct ipt_entry *e,
+		      const void *e_void,
 		      void *targinfo,
 		      unsigned int targinfosize,
 		      unsigned int hook_mask)
 {
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+	const struct ipt_entry *e = e_void;
 
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
 		DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index deadb36d442..3a44a56db23 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -52,7 +52,7 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index b9ae6a9382f..b769eb23197 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -66,7 +66,7 @@ ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in,
 }
 
 static int ipt_ttl_checkentry(const char *tablename,
-		const struct ipt_entry *e,
+		const void *e,
 		void *targinfo,
 		unsigned int targinfosize,
 		unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2883ccd8a91..641dbc47765 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -77,15 +77,15 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
 
 static unsigned int nlbufsiz = 4096;
-module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
+module_param(nlbufsiz, uint, 0400);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
 
 static unsigned int flushtimeout = 10;
-module_param(flushtimeout, int, 0600);
+module_param(flushtimeout, uint, 0600);
 MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
 
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
+static int nflog = 1;
+module_param(nflog, bool, 0400);
 MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
 
 /* global data structures */
@@ -330,7 +330,7 @@ static void ipt_logfn(unsigned int pf,
 }
 
 static int ipt_ulog_checkentry(const char *tablename,
-			       const struct ipt_entry *e,
+			       const void *e,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hookmask)
@@ -376,7 +376,7 @@ static int __init init(void)
 
 	DEBUGP("ipt_ULOG: init module\n");
 
-	if (nlbufsiz >= 128*1024) {
+	if (nlbufsiz > 128*1024) {
 		printk("Netlink buffer has to be <= 128kB\n");
 		return -EINVAL;
 	}
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e19c2a52d00..d6b83a97651 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -29,7 +29,7 @@ static inline int match_type(u_int32_t addr, u_int16_t mask)
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
 	const struct iphdr *iph = skb->nh.iph;
@@ -43,7 +43,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return ret;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a0fea847cb7..144adfec13c 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -41,6 +41,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	struct ip_auth_hdr _ahdr, *ah;
@@ -50,7 +51,7 @@ match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	ah = skb_header_pointer(skb, protoff,
 				sizeof(_ahdr), &_ahdr);
 	if (ah == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -69,12 +70,13 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip_void,
 	   void *matchinfo,
 	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_ah *ahinfo = matchinfo;
+	const struct ipt_ip *ip = ip_void;
 
 	/* Must specify proto == AH, and no unknown invflags */
 	if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
index 5df52a64a5d..92063b4f860 100644
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ b/net/ipv4/netfilter/ipt_dscp.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_dscp_info *info = matchinfo;
 	const struct iphdr *iph = skb->nh.iph;
@@ -31,7 +31,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b6f7181e89c..e68b0c7981f 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,7 @@ static inline int match_tcp(const struct sk_buff *skb,
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 
@@ -85,11 +85,12 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return 1;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip_void,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	const struct ipt_ecn_info *info = matchinfo;
+	const struct ipt_ip *ip = ip_void;
 
 	if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
 		return 0;
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
index e1d0dd31e11..9de191a8162 100644
--- a/net/ipv4/netfilter/ipt_esp.c
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -42,6 +42,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	struct ip_esp_hdr _esp, *eh;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	eh = skb_header_pointer(skb, protoff,
 				sizeof(_esp), &_esp);
 	if (eh == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -70,12 +71,13 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip_void,
 	   void *matchinfo,
 	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_esp *espinfo = matchinfo;
+	const struct ipt_ip *ip = ip_void;
 
 	/* Must specify proto == ESP, and no unknown invflags */
 	if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 2dd1cccbdab..4fe48c1bd5f 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -429,6 +429,7 @@ hashlimit_match(const struct sk_buff *skb,
 		const struct net_device *out,
 		const void *matchinfo,
 		int offset,
+		unsigned int protoff,
 		int *hotdrop)
 {
 	struct ipt_hashlimit_info *r = 
@@ -504,7 +505,7 @@ hashlimit_match(const struct sk_buff *skb,
 
 static int
 hashlimit_checkentry(const char *tablename,
-		     const struct ipt_ip *ip,
+		     const void *inf,
 		     void *matchinfo,
 		     unsigned int matchsize,
 		     unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index b835b7b2e56..13fb16fb789 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -28,7 +28,7 @@ match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
       const void *matchinfo,
-      int offset, int *hotdrop)
+      int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_iprange_info *info = matchinfo;
 	const struct iphdr *iph = skb->nh.iph;
@@ -63,7 +63,7 @@ match(const struct sk_buff *skb,
 }
 
 static int check(const char *tablename,
-		 const struct ipt_ip *ip,
+		 const void *inf,
 		 void *matchinfo,
 		 unsigned int matchsize,
 		 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_length.c b/net/ipv4/netfilter/ipt_length.c
deleted file mode 100644
index 4eabcfbda9d..00000000000
--- a/net/ipv4/netfilter/ipt_length.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Kernel module to match packet length. */
-/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_length.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("IP tables packet length matching module");
-MODULE_LICENSE("GPL");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
-	
-	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ipt_ip *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_length_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match length_match = {
-	.name		= "length",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&length_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&length_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 99e8188162e..2d52326553f 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -97,6 +97,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	u16 _ports[2], *pptr;
@@ -105,7 +106,7 @@ match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	pptr = skb_header_pointer(skb, protoff,
 				  sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -128,6 +129,7 @@ match_v1(const struct sk_buff *skb,
 	 const struct net_device *out,
 	 const void *matchinfo,
 	 int offset,
+	 unsigned int protoff,
 	 int *hotdrop)
 {
 	u16 _ports[2], *pptr;
@@ -136,7 +138,7 @@ match_v1(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	pptr = skb_header_pointer(skb, protoff,
 				  sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -154,7 +156,7 @@ match_v1(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
@@ -164,7 +166,7 @@ checkentry(const char *tablename,
 
 static int
 checkentry_v1(const char *tablename,
-	      const struct ipt_ip *ip,
+	      const void *ip,
 	      void *matchinfo,
 	      unsigned int matchsize,
 	      unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 0cee2862ed8..4843d0c9734 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -27,6 +27,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	const struct ipt_owner_info *info = matchinfo;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
deleted file mode 100644
index 1a53924041f..00000000000
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Kernel module to match the bridge port in and
- * out device for IP packets coming into contact with a bridge. */
-
-/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ipt_physdev.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_bridge.h>
-#define MATCH   1
-#define NOMATCH 0
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("iptables bridge physical device match module");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	int i;
-	static const char nulldevname[IFNAMSIZ];
-	const struct ipt_physdev_info *info = matchinfo;
-	unsigned int ret;
-	const char *indev, *outdev;
-	struct nf_bridge_info *nf_bridge;
-
-	/* Not a bridged IP packet or no info available yet:
-	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
-	 * the destination device will be a bridge. */
-	if (!(nf_bridge = skb->nf_bridge)) {
-		/* Return MATCH if the invert flags of the used options are on */
-		if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
-		    !(info->invert & IPT_PHYSDEV_OP_BRIDGED))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_ISIN) &&
-		    !(info->invert & IPT_PHYSDEV_OP_ISIN))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_ISOUT) &&
-		    !(info->invert & IPT_PHYSDEV_OP_ISOUT))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_IN) &&
-		    !(info->invert & IPT_PHYSDEV_OP_IN))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_OUT) &&
-		    !(info->invert & IPT_PHYSDEV_OP_OUT))
-			return NOMATCH;
-		return MATCH;
-	}
-
-	/* This only makes sense in the FORWARD and POSTROUTING chains */
-	if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
-	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
-	    !(info->invert & IPT_PHYSDEV_OP_BRIDGED)))
-		return NOMATCH;
-
-	if ((info->bitmask & IPT_PHYSDEV_OP_ISIN &&
-	    (!nf_bridge->physindev ^ !!(info->invert & IPT_PHYSDEV_OP_ISIN))) ||
-	    (info->bitmask & IPT_PHYSDEV_OP_ISOUT &&
-	    (!nf_bridge->physoutdev ^ !!(info->invert & IPT_PHYSDEV_OP_ISOUT))))
-		return NOMATCH;
-
-	if (!(info->bitmask & IPT_PHYSDEV_OP_IN))
-		goto match_outdev;
-	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
-		ret |= (((const unsigned int *)indev)[i]
-			^ ((const unsigned int *)info->physindev)[i])
-			& ((const unsigned int *)info->in_mask)[i];
-	}
-
-	if ((ret == 0) ^ !(info->invert & IPT_PHYSDEV_OP_IN))
-		return NOMATCH;
-
-match_outdev:
-	if (!(info->bitmask & IPT_PHYSDEV_OP_OUT))
-		return MATCH;
-	outdev = nf_bridge->physoutdev ?
-		 nf_bridge->physoutdev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
-		ret |= (((const unsigned int *)outdev)[i]
-			^ ((const unsigned int *)info->physoutdev)[i])
-			& ((const unsigned int *)info->out_mask)[i];
-	}
-
-	return (ret != 0) ^ !(info->invert & IPT_PHYSDEV_OP_OUT);
-}
-
-static int
-checkentry(const char *tablename,
-		       const struct ipt_ip *ip,
-		       void *matchinfo,
-		       unsigned int matchsize,
-		       unsigned int hook_mask)
-{
-	const struct ipt_physdev_info *info = matchinfo;
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_physdev_info)))
-		return 0;
-	if (!(info->bitmask & IPT_PHYSDEV_OP_MASK) ||
-	    info->bitmask & ~IPT_PHYSDEV_OP_MASK)
-		return 0;
-	return 1;
-}
-
-static struct ipt_match physdev_match = {
-	.name		= "physdev",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&physdev_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&physdev_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 00000000000..18ca8258a1c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,173 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
+{
+#define MATCH(x,y)	(!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+	return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
+	       MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
+	       MATCH(proto, x->id.proto) &&
+	       MATCH(mode, x->props.mode) &&
+	       MATCH(spi, x->id.spi) &&
+	       MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+	const struct ipt_policy_elem *e;
+	struct sec_path *sp = skb->sp;
+	int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (sp == NULL)
+		return -1;
+	if (strict && info->len != sp->len)
+		return 0;
+
+	for (i = sp->len - 1; i >= 0; i--) {
+		pos = strict ? i - sp->len + 1 : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(sp->x[i].xvec, e)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+	const struct ipt_policy_elem *e;
+	struct dst_entry *dst = skb->dst;
+	int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (dst->xfrm == NULL)
+		return -1;
+
+	for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+		pos = strict ? i : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(dst->xfrm, e)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const void *matchinfo,
+                 int offset,
+                 unsigned int protoff,
+                 int *hotdrop)
+{
+	const struct ipt_policy_info *info = matchinfo;
+	int ret;
+
+	if (info->flags & IPT_POLICY_MATCH_IN)
+		ret = match_policy_in(skb, info);
+	else
+		ret = match_policy_out(skb, info);
+
+	if (ret < 0)
+		ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
+	else if (info->flags & IPT_POLICY_MATCH_NONE)
+		ret = 0;
+
+	return ret;
+}
+
+static int checkentry(const char *tablename, const void *ip_void,
+                      void *matchinfo, unsigned int matchsize,
+                      unsigned int hook_mask)
+{
+	struct ipt_policy_info *info = matchinfo;
+
+	if (matchsize != IPT_ALIGN(sizeof(*info))) {
+		printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
+		       matchsize, IPT_ALIGN(sizeof(*info)));
+		return 0;
+	}
+	if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
+		printk(KERN_ERR "ipt_policy: neither incoming nor "
+		                "outgoing policy selected\n");
+		return 0;
+	}
+	if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
+	    && info->flags & IPT_POLICY_MATCH_OUT) {
+		printk(KERN_ERR "ipt_policy: output policy not valid in "
+		                "PRE_ROUTING and INPUT\n");
+		return 0;
+	}
+	if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
+	    && info->flags & IPT_POLICY_MATCH_IN) {
+		printk(KERN_ERR "ipt_policy: input policy not valid in "
+		                "POST_ROUTING and OUTPUT\n");
+		return 0;
+	}
+	if (info->len > IPT_POLICY_MAX_ELEM) {
+		printk(KERN_ERR "ipt_policy: too many policy elements\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match policy_match = {
+	.name		= "policy",
+	.match		= match,
+	.checkentry 	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 2d44b07688a..44611d6d14f 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -24,10 +24,10 @@
 #define HASH_LOG 9
 
 /* Defaults, these can be overridden on the module command-line. */
-static int ip_list_tot = 100;
-static int ip_pkt_list_tot = 20;
-static int ip_list_hash_size = 0;
-static int ip_list_perms = 0644;
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
 #ifdef DEBUG
 static int debug = 1;
 #endif
@@ -38,13 +38,13 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>.  htt
 MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
 MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
 MODULE_LICENSE("GPL");
-module_param(ip_list_tot, int, 0400);
-module_param(ip_pkt_list_tot, int, 0400);
-module_param(ip_list_hash_size, int, 0400);
-module_param(ip_list_perms, int, 0400);
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
 #ifdef DEBUG
-module_param(debug, int, 0600);
-MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
+module_param(debug, bool, 0600);
+MODULE_PARM_DESC(debug,"enable debugging output");
 #endif
 MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
@@ -104,6 +104,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop);
 
 /* Function to hash a given address into the hash table of table_size size */
@@ -317,7 +318,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
 	skb->nh.iph->daddr = 0;
 	/* Clear ttl since we have no way of knowing it */
 	skb->nh.iph->ttl = 0;
-	match(skb,NULL,NULL,info,0,NULL);
+	match(skb,NULL,NULL,info,0,0,NULL);
 
 	kfree(skb->nh.iph);
 out_free_skb:
@@ -357,6 +358,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	int pkt_count, hits_found, ans;
@@ -532,6 +534,7 @@ match(const struct sk_buff *skb,
 			}
 			if(info->seconds && info->hit_count) {
 				for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+					if(r_list[location].last_pkts[pkt_count] == 0) break;
 					if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
 				}
 				if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
@@ -653,7 +656,7 @@ match(const struct sk_buff *skb,
  */
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 086a1bb61e3..9ab765e126f 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -23,6 +23,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	const struct ipt_tos_info *info = matchinfo;
@@ -32,7 +33,7 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 219aa9de88c..82da53f430a 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
 
@@ -47,7 +47,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return 0;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void  *ip,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 260a4f0a2a9..212a3079085 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -78,7 +78,8 @@ static struct ipt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
-	.me		= THIS_MODULE
+	.me		= THIS_MODULE,
+	.af		= AF_INET,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 160eb11b6e2..3212a5cc4b6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -109,6 +109,7 @@ static struct ipt_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= AF_INET,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 47449ba83eb..fdb9e9c81e8 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -83,7 +83,8 @@ static struct ipt_table packet_raw = {
 	.name = "raw", 
 	.valid_hooks =  RAW_VALID_HOOKS, 
 	.lock = RW_LOCK_UNLOCKED, 
-	.me = THIS_MODULE
+	.me = THIS_MODULE,
+	.af = AF_INET,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afa..167619f638c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
+#include <net/route.h>
 #include <net/ip.h>
 
 #include <linux/netfilter_ipv4.h>
@@ -180,30 +181,6 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_refrag(unsigned int hooknum,
-				struct sk_buff **pskb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
-{
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
-	/* We've seen it coming out the other side: confirm */
-	if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
-		return NF_DROP;
-
-	/* Local packets are never produced too large for their
-	   interface.  We degfragment them at LOCAL_OUT, however,
-	   so we have to refragment them here. */
-	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
-	    !skb_shinfo(*pskb)->tso_size) {
-		/* No hook can be after us, so this should be OK. */
-		ip_fragment(*pskb, okfn);
-		return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
 				      struct sk_buff **pskb,
 				      const struct net_device *in,
@@ -283,7 +260,7 @@ static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
 
 /* Refragmenter; last chance. */
 static struct nf_hook_ops ipv4_conntrack_out_ops = {
-	.hook		= ipv4_refrag,
+	.hook		= ipv4_confirm,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
 	.hooknum	= NF_IP_POST_ROUTING,
@@ -300,7 +277,7 @@ static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
 
 #ifdef CONFIG_SYSCTL
 /* From nf_conntrack_proto_icmp.c */
-extern unsigned long nf_ct_icmp_timeout;
+extern unsigned int nf_ct_icmp_timeout;
 static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
 
 static ctl_table nf_ct_sysctl_table[] = {
@@ -392,6 +369,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	return -ENOENT;
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
+		&tuple->src.u3.ip);
+	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
+		&tuple->dst.u3.ip);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
+	[CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+};
+
+static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	t->src.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+	t->dst.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+
+	return 0;
+}
+#endif
+
 static struct nf_sockopt_ops so_getorigdst = {
 	.pf		= PF_INET,
 	.get_optmin	= SO_ORIGINAL_DST,
@@ -408,6 +427,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.print_conntrack = ipv4_print_conntrack,
 	.prepare	 = ipv4_prepare,
 	.get_features	 = ipv4_get_features,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr = ipv4_tuple_to_nfattr,
+	.nfattr_to_tuple = ipv4_nfattr_to_tuple,
+#endif
 	.me		 = THIS_MODULE,
 };
 
@@ -551,7 +575,7 @@ MODULE_LICENSE("GPL");
 
 static int __init init(void)
 {
-	need_nf_conntrack();
+	need_conntrack();
 	return init_or_cleanup(1);
 }
 
@@ -563,9 +587,4 @@ static void __exit fini(void)
 module_init(init);
 module_exit(fini);
 
-void need_ip_conntrack(void)
-{
-}
-
-EXPORT_SYMBOL(need_ip_conntrack);
 EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b..52dc175be39 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 			     const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
 static int icmp_new(struct nf_conn *conntrack,
 		    const struct sk_buff *skb, unsigned int dataoff)
 {
-	static u_int8_t valid_new[]
-		= { [ICMP_ECHO] = 1,
-		    [ICMP_TIMESTAMP] = 1,
-		    [ICMP_INFO_REQUEST] = 1,
-		    [ICMP_ADDRESS] = 1 };
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
 
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+	innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
 	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
 	return icmp_error_message(skb, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMP_TYPE-1]
+	    || !tb[CTA_PROTO_ICMP_CODE-1]
+	    || !tb[CTA_PROTO_ICMP_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = 
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 {
 	.list			= { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 	.new			= icmp_new,
 	.error			= icmp_error,
 	.destroy		= NULL,
-	.me			= NULL
+	.me			= NULL,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmp_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmp_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db4..39d49dc333a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/sock.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d626..f29a12da510 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -40,12 +40,12 @@
  */
  
 #include <linux/config.h> 
+#include <linux/types.h>
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
-#include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
+	nf_reset(skb);
 
 	skb_push(skb, skb->data - skb->nh.raw);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f701a136a6a..d82c242ea70 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,9 +240,9 @@ static unsigned			rt_hash_mask;
 static int			rt_hash_log;
 static unsigned int		rt_hash_rnd;
 
-static struct rt_cache_stat *rt_cache_stat;
-#define RT_CACHE_STAT_INC(field)					  \
-		(per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
+static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
+#define RT_CACHE_STAT_INC(field) \
+	(per_cpu(rt_cache_stat, raw_smp_processor_id()).field++)
 
 static int rt_intern_hash(unsigned hash, struct rtable *rth,
 				struct rtable **res);
@@ -401,7 +401,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return per_cpu_ptr(rt_cache_stat, cpu);
+		return &per_cpu(rt_cache_stat, cpu);
 	}
 	return NULL;
 }
@@ -414,7 +414,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return per_cpu_ptr(rt_cache_stat, cpu);
+		return &per_cpu(rt_cache_stat, cpu);
 	}
 	return NULL;
 	
@@ -3160,10 +3160,6 @@ int __init ip_rt_init(void)
 	ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
 	ip_rt_max_size = (rt_hash_mask + 1) * 16;
 
-	rt_cache_stat = alloc_percpu(struct rt_cache_stat);
-	if (!rt_cache_stat)
-		return -ENOMEM;
-
 	devinet_init();
 	ip_fib_init();
 
@@ -3191,7 +3187,6 @@ int __init ip_rt_init(void)
 	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
 	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
 			    		     proc_net_stat))) {
-		free_percpu(rt_cache_stat);
 		return -ENOMEM;
 	}
 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a..e20be3331f6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 					   struct request_sock *req,
 					   struct dst_entry *dst)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
 
-	child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
 	if (child)
 		inet_csk_reqsk_queue_add(sk, req, child);
 	else
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48..16984d4a8a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
 #include <linux/sysctl.h>
 #include <linux/config.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -22,6 +23,7 @@
 extern int sysctl_ip_nonlocal_bind;
 
 #ifdef CONFIG_SYSCTL
+static int zero;
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = {
 		.strategy	= &sysctl_jiffies
 	},
 	{
+		.ctl_name	= NET_IPV4_IPFRAG_MAX_DIST,
+		.procname	= "ipfrag_max_dist",
+		.data		= &sysctl_ipfrag_max_dist,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero
+	},
+	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
 		.data		= &sysctl_tcp_nometrics_save,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56..00aa80e9324 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int err = 0;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->setsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
+						     optval, optlen);
 
 	/* This is a string value all the others are int's */
 	if (optname == TCP_CONGESTION) {
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
 
-	info->tcpi_pmtu = tp->pmtu_cookie;
+	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
 	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
 	info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
 	info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int val, len;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->getsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->getsockopt(sk, level, optname,
+						     optval, optlen);
 
 	if (get_user(len, optlen))
 		return -EFAULT;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 1d0cd86621b..035f2092d73 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -30,8 +30,6 @@ static int fast_convergence = 1;
 static int max_increment = 16;
 static int low_window = 14;
 static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
-static int low_utilization_threshold = 153;
-static int low_utilization_period = 2;
 static int initial_ssthresh = 100;
 static int smooth_part = 20;
 
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644);
 MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)");
 module_param(beta, int, 0644);
 MODULE_PARM_DESC(beta, "beta for multiplicative increase");
-module_param(low_utilization_threshold, int, 0644);
-MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode");
-module_param(low_utilization_period, int, 0644);
-MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)");
 module_param(initial_ssthresh, int, 0644);
 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
 module_param(smooth_part, int, 0644);
@@ -60,11 +54,6 @@ struct bictcp {
 	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
-	u32	delay_min;	/* min delay */
-	u32	delay_max;	/* max delay */
-	u32	last_delay;
-	u8	low_utilization;/* 0: high; 1: low */
-	u32	low_utilization_start;	/* starting time of low utilization detection*/
 	u32	epoch_start;	/* beginning of an epoch */
 #define ACK_RATIO_SHIFT	4
 	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca)
 	ca->loss_cwnd = 0;
 	ca->last_cwnd = 0;
 	ca->last_time = 0;
-	ca->delay_min = 0;
-	ca->delay_max = 0;
-	ca->last_delay = 0;
-	ca->low_utilization = 0;
-	ca->low_utilization_start = 0;
 	ca->epoch_start = 0;
 	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
 }
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	}
 
 	/* if in slow start or link utilization is very low */
-	if ( ca->loss_cwnd == 0 ||
-	     (cwnd > ca->loss_cwnd && ca->low_utilization)) {
+	if (ca->loss_cwnd == 0) {
 		if (ca->cnt > 20) /* increase cwnd 5% per RTT */
 			ca->cnt = 20;
 	}
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-
-/* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct sock *sk, int flag)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct bictcp *ca = inet_csk_ca(sk);
-	u32 dist, delay;
-
-	/* No time stamp */
-	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
-	     /* Discard delay samples right after fast recovery */
-	     tcp_time_stamp < ca->epoch_start + HZ ||
-	     /* this delay samples may not be accurate */
-	     flag == 0) {
-		ca->last_delay = 0;
-		goto notlow;
-	}
-
-	delay = ca->last_delay<<3;	/* use the same scale as tp->srtt*/
-	ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-	if (delay == 0) 		/* no previous delay sample */
-		goto notlow;
-
-	/* first time call or link delay decreases */
-	if (ca->delay_min == 0 || ca->delay_min > delay) {
-		ca->delay_min = ca->delay_max = delay;
-		goto notlow;
-	}
-
-	if (ca->delay_max < delay)
-		ca->delay_max = delay;
-
-	/* utilization is low, if avg delay < dist*threshold
-	   for checking_period time */
-	dist = ca->delay_max - ca->delay_min;
-	if (dist <= ca->delay_min>>6 ||
-	    tp->srtt - ca->delay_min >=  (dist*low_utilization_threshold)>>10)
-		goto notlow;
-
-	if (ca->low_utilization_start == 0) {
-		ca->low_utilization = 0;
-		ca->low_utilization_start = tcp_time_stamp;
-	} else if ((s32)(tcp_time_stamp - ca->low_utilization_start)
-			> low_utilization_period*HZ) {
-		ca->low_utilization = 1;
-	}
-
-	return;
-
- notlow:
-	ca->low_utilization = 0;
-	ca->low_utilization_start = 0;
-
-}
-
 static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 			      u32 seq_rtt, u32 in_flight, int data_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	bictcp_low_utilization(sk, data_acked);
-
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 
 	ca->epoch_start = 0;	/* end of epoch */
 
-	/* in case of wrong delay_max*/
-	if (ca->delay_min > 0 && ca->delay_max > ca->delay_min)
-		ca->delay_max = ca->delay_min
-			+ ((ca->delay_max - ca->delay_min)* 90) / 100;
-
 	/* Wmax and fast convergence */
 	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
 		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 		bictcp_reset(inet_csk_ca(sk));
 }
 
-/* Track delayed acknowledgement ratio using sliding window
+/* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
 static void bictcp_acked(struct sock *sk, u32 cnt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
-	if (cnt > 0 && 	icsk->icsk_ca_state == TCP_CA_Open) {
+	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
 		struct bictcp *ca = inet_csk_ca(sk);
 		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
 		ca->delayed_ack += cnt;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc1..e688c687d62 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	return err;
 }
 
+
+/*
+ * Linear increase during slow start
+ */
+void tcp_slow_start(struct tcp_sock *tp)
+{
+	if (sysctl_tcp_abc) {
+		/* RFC3465: Slow Start
+		 * TCP sender SHOULD increase cwnd by the number of
+		 * previously unacknowledged bytes ACKed by each incoming
+		 * acknowledgment, provided the increase is not more than L
+		 */
+		if (tp->bytes_acked < tp->mss_cache)
+			return;
+
+		/* We MAY increase by 2 if discovered delayed ack */
+		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+		}
+	}
+	tp->bytes_acked = 0;
+
+	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+		tp->snd_cwnd++;
+}
+EXPORT_SYMBOL_GPL(tcp_slow_start);
+
 /*
  * TCP Reno congestion control
  * This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
new file mode 100644
index 00000000000..31a4986dfbf
--- /dev/null
+++ b/net/ipv4/tcp_cubic.c
@@ -0,0 +1,411 @@
+/*
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ *
+ * This is from the implementation of CUBIC TCP in
+ * Injong Rhee, Lisong Xu.
+ *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant
+ *  in PFLDnet 2005
+ * Available from:
+ *  http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+ *
+ * Unless CUBIC is enabled and congestion window is large
+ * this behaves the same as the original Reno.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <asm/div64.h>
+
+#define BICTCP_BETA_SCALE    1024	/* Scale factor beta calculation
+					 * max_cwnd = snd_cwnd * beta
+					 */
+#define BICTCP_B		4	 /*
+					  * In binary search,
+					  * go to point (max+min)/N
+					  */
+#define	BICTCP_HZ		10	/* BIC HZ 2^10 = 1024 */
+
+static int fast_convergence = 1;
+static int max_increment = 16;
+static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh = 100;
+static int bic_scale = 41;
+static int tcp_friendliness = 1;
+
+static u32 cube_rtt_scale;
+static u32 beta_scale;
+static u64 cube_factor;
+
+/* Note parameters that are used for precomputing scale factors are read-only */
+module_param(fast_convergence, int, 0644);
+MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
+module_param(max_increment, int, 0644);
+MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
+module_param(beta, int, 0444);
+MODULE_PARM_DESC(beta, "beta for multiplicative increase");
+module_param(initial_ssthresh, int, 0644);
+MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
+module_param(bic_scale, int, 0444);
+MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
+module_param(tcp_friendliness, int, 0644);
+MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
+
+#include <asm/div64.h>
+
+/* BIC TCP Parameters */
+struct bictcp {
+	u32	cnt;		/* increase cwnd by 1 after ACKs */
+	u32 	last_max_cwnd;	/* last maximum snd_cwnd */
+	u32	loss_cwnd;	/* congestion window at last loss */
+	u32	last_cwnd;	/* the last snd_cwnd */
+	u32	last_time;	/* time when updated last_cwnd */
+	u32	bic_origin_point;/* origin point of bic function */
+	u32	bic_K;		/* time to origin point from the beginning of the current epoch */
+	u32	delay_min;	/* min delay */
+	u32	epoch_start;	/* beginning of an epoch */
+	u32	ack_cnt;	/* number of acks */
+	u32	tcp_cwnd;	/* estimated tcp cwnd */
+#define ACK_RATIO_SHIFT	4
+	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
+};
+
+static inline void bictcp_reset(struct bictcp *ca)
+{
+	ca->cnt = 0;
+	ca->last_max_cwnd = 0;
+	ca->loss_cwnd = 0;
+	ca->last_cwnd = 0;
+	ca->last_time = 0;
+	ca->bic_origin_point = 0;
+	ca->bic_K = 0;
+	ca->delay_min = 0;
+	ca->epoch_start = 0;
+	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
+	ca->ack_cnt = 0;
+	ca->tcp_cwnd = 0;
+}
+
+static void bictcp_init(struct sock *sk)
+{
+	bictcp_reset(inet_csk_ca(sk));
+	if (initial_ssthresh)
+		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+}
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
+{
+	u_int32_t d = divisor;
+
+	if (divisor > 0xffffffffULL) {
+		unsigned int shift = fls(divisor >> 32);
+
+		d = divisor >> shift;
+		dividend >>= shift;
+	}
+
+	/* avoid 64 bit division if possible */
+	if (dividend >> 32)
+		do_div(dividend, d);
+	else
+		dividend = (uint32_t) dividend / d;
+
+	return dividend;
+}
+
+/*
+ * calculate the cubic root of x using Newton-Raphson
+ */
+static u32 cubic_root(u64 a)
+{
+	u32 x, x1;
+
+	/* Initial estimate is based on:
+	 * cbrt(x) = exp(log(x) / 3)
+	 */
+	x = 1u << (fls64(a)/3);
+
+	/*
+	 * Iteration based on:
+	 *                         2
+	 * x    = ( 2 * x  +  a / x  ) / 3
+	 *  k+1          k         k
+	 */
+	do {
+		x1 = x;
+		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
+	} while (abs(x1 - x) > 1);
+
+	return x;
+}
+
+/*
+ * Compute congestion window to use.
+ */
+static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+{
+	u64 offs;
+	u32 delta, t, bic_target, min_cnt, max_cnt;
+
+	ca->ack_cnt++;	/* count the number of ACKs */
+
+	if (ca->last_cwnd == cwnd &&
+	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+		return;
+
+	ca->last_cwnd = cwnd;
+	ca->last_time = tcp_time_stamp;
+
+	if (ca->epoch_start == 0) {
+		ca->epoch_start = tcp_time_stamp;	/* record the beginning of an epoch */
+		ca->ack_cnt = 1;			/* start counting */
+		ca->tcp_cwnd = cwnd;			/* syn with cubic */
+
+		if (ca->last_max_cwnd <= cwnd) {
+			ca->bic_K = 0;
+			ca->bic_origin_point = cwnd;
+		} else {
+			/* Compute new K based on
+			 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+			 */
+			ca->bic_K = cubic_root(cube_factor
+					       * (ca->last_max_cwnd - cwnd));
+			ca->bic_origin_point = ca->last_max_cwnd;
+		}
+	}
+
+        /* cubic function - calc*/
+        /* calculate c * time^3 / rtt,
+         *  while considering overflow in calculation of time^3
+	 * (so time^3 is done by using 64 bit)
+	 * and without the support of division of 64bit numbers
+	 * (so all divisions are done by using 32 bit)
+         *  also NOTE the unit of those veriables
+         *	  time  = (t - K) / 2^bictcp_HZ
+         *	  c = bic_scale >> 10
+	 * rtt  = (srtt >> 3) / HZ
+	 * !!! The following code does not have overflow problems,
+	 * if the cwnd < 1 million packets !!!
+         */
+
+	/* change the unit from HZ to bictcp_HZ */
+        t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
+	     << BICTCP_HZ) / HZ;
+
+        if (t < ca->bic_K)		/* t - K */
+		offs = ca->bic_K - t;
+        else
+                offs = t - ca->bic_K;
+
+	/* c/rtt * (t-K)^3 */
+	delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
+        if (t < ca->bic_K)                                	/* below origin*/
+                bic_target = ca->bic_origin_point - delta;
+        else                                                	/* above origin*/
+                bic_target = ca->bic_origin_point + delta;
+
+        /* cubic function - calc bictcp_cnt*/
+        if (bic_target > cwnd) {
+		ca->cnt = cwnd / (bic_target - cwnd);
+        } else {
+                ca->cnt = 100 * cwnd;              /* very small increment*/
+        }
+
+	if (ca->delay_min > 0) {
+		/* max increment = Smax * rtt / 0.1  */
+		min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
+		if (ca->cnt < min_cnt)
+			ca->cnt = min_cnt;
+	}
+
+        /* slow start and low utilization  */
+	if (ca->loss_cwnd == 0)		/* could be aggressive in slow start */
+		ca->cnt = 50;
+
+	/* TCP Friendly */
+	if (tcp_friendliness) {
+		u32 scale = beta_scale;
+		delta = (cwnd * scale) >> 3;
+	        while (ca->ack_cnt > delta) {		/* update tcp cwnd */
+	                ca->ack_cnt -= delta;
+        	        ca->tcp_cwnd++;
+		}
+
+		if (ca->tcp_cwnd > cwnd){	/* if bic is slower than tcp */
+			delta = ca->tcp_cwnd - cwnd;
+			max_cnt = cwnd / delta;
+			if (ca->cnt > max_cnt)
+				ca->cnt = max_cnt;
+		}
+        }
+
+	ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
+	if (ca->cnt == 0)			/* cannot be zero */
+		ca->cnt = 1;
+}
+
+
+/* Keep track of minimum rtt */
+static inline void measure_delay(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+	u32 delay;
+
+	/* No time stamp */
+	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
+	     /* Discard delay samples right after fast recovery */
+	    (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+		return;
+
+	delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+	if (delay == 0)
+		delay = 1;
+
+	/* first time call or link delay decreases */
+	if (ca->delay_min == 0 || ca->delay_min > delay)
+		ca->delay_min = delay;
+}
+
+static void bictcp_cong_avoid(struct sock *sk, u32 ack,
+			      u32 seq_rtt, u32 in_flight, int data_acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	if (data_acked)
+		measure_delay(sk);
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
+		bictcp_update(ca, tp->snd_cwnd);
+
+		/* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= ca->cnt) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		} else
+			tp->snd_cwnd_cnt++;
+	}
+
+}
+
+static u32 bictcp_recalc_ssthresh(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	ca->epoch_start = 0;	/* end of epoch */
+
+	/* Wmax and fast convergence */
+	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+			/ (2 * BICTCP_BETA_SCALE);
+	else
+		ca->last_max_cwnd = tp->snd_cwnd;
+
+	ca->loss_cwnd = tp->snd_cwnd;
+
+	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+}
+
+static u32 bictcp_undo_cwnd(struct sock *sk)
+{
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
+}
+
+static u32 bictcp_min_cwnd(struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+static void bictcp_state(struct sock *sk, u8 new_state)
+{
+	if (new_state == TCP_CA_Loss)
+		bictcp_reset(inet_csk_ca(sk));
+}
+
+/* Track delayed acknowledgment ratio using sliding window
+ * ratio = (15*ratio + sample) / 16
+ */
+static void bictcp_acked(struct sock *sk, u32 cnt)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
+		struct bictcp *ca = inet_csk_ca(sk);
+		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+		ca->delayed_ack += cnt;
+	}
+}
+
+
+static struct tcp_congestion_ops cubictcp = {
+	.init		= bictcp_init,
+	.ssthresh	= bictcp_recalc_ssthresh,
+	.cong_avoid	= bictcp_cong_avoid,
+	.set_state	= bictcp_state,
+	.undo_cwnd	= bictcp_undo_cwnd,
+	.min_cwnd	= bictcp_min_cwnd,
+	.pkts_acked     = bictcp_acked,
+	.owner		= THIS_MODULE,
+	.name		= "cubic",
+};
+
+static int __init cubictcp_register(void)
+{
+	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+
+	/* Precompute a bunch of the scaling factors that are used per-packet
+	 * based on SRTT of 100ms
+	 */
+
+	beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+
+	cube_rtt_scale = (bic_scale << 3) / 10;	/* 1024*c/rtt */
+
+	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+	 *  so K = cubic_root( (wmax-cwnd)*rtt/c )
+	 * the unit of K is bictcp_HZ=2^10, not HZ
+	 *
+	 *  c = bic_scale >> 10
+	 *  rtt = 100ms
+	 *
+	 * the following code has been designed and tested for
+	 * cwnd < 1 million packets
+	 * RTT < 100 seconds
+	 * HZ < 1,000,00  (corresponding to 10 nano-second)
+	 */
+
+	/* 1/c * 2^2*bictcp_HZ * srtt */
+	cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
+
+	/* divide by bic_scale and by constant Srtt (100ms) */
+	do_div(cube_factor, bic_scale * 10);
+
+	return tcp_register_congestion_control(&cubictcp);
+}
+
+static void __exit cubictcp_unregister(void)
+{
+	tcp_unregister_congestion_control(&cubictcp);
+}
+
+module_init(cubictcp_register);
+module_exit(cubictcp_unregister);
+
+MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CUBIC TCP");
+MODULE_VERSION("2.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bc..a97ed5416c2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
 /* Adapt the MSS value used to make delayed ack decision to the 
  * real world.
  */ 
-static inline void tcp_measure_rcv_mss(struct sock *sk,
-				       const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk,
+				const struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const unsigned int lss = icsk->icsk_ack.last_seg_size; 
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 	return 0;
 }
 
-static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
-				   struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+			    struct sk_buff *skb)
 {
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
 }
 
+
+/* Initialize RCV_MSS value.
+ * RCV_MSS is an our guess about MSS used by the peer.
+ * We haven't any direct information about the MSS.
+ * It's better to underestimate the RCV_MSS rather than overestimate.
+ * Overestimations make us ACKing less frequently than needed.
+ * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
+ */
+void tcp_initialize_rcv_mss(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
+
+	hint = min(hint, tp->rcv_wnd/2);
+	hint = min(hint, TCP_MIN_RCVMSS);
+	hint = max(hint, TCP_MIN_MSS);
+
+	inet_csk(sk)->icsk_ack.rcv_mss = hint;
+}
+
 /* Receiver "autotuning" code.
  *
  * The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
+/* Set slow start threshold and cwnd not falling to slow start */
+void tcp_enter_cwr(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->prior_ssthresh = 0;
+	tp->bytes_acked = 0;
+	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+		tp->undo_marker = 0;
+		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+		tp->snd_cwnd = min(tp->snd_cwnd,
+				   tcp_packets_in_flight(tp) + 1U);
+		tp->snd_cwnd_cnt = 0;
+		tp->high_seq = tp->snd_nxt;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+		TCP_ECN_queue_cwr(tp);
+
+		tcp_set_ca_state(sk, TCP_CA_CWR);
+	}
+}
+
 /* Initialize metrics on socket. */
 
 static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
-static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
-				  u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+			   u32 in_flight, int good)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * RFC2988 recommends to restart timer to now+rto.
  */
 
-static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
 {
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
-static inline u32 tcp_usrtt(const struct sk_buff *skb)
+static u32 tcp_usrtt(const struct sk_buff *skb)
 {
 	struct timeval tv, now;
 
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
 			}
 		}
 	}
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
-static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-					 struct tcp_sock *tp)
+static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
+				  struct tcp_sock *tp)
 {
 	if (th->doff == sizeof(struct tcphdr)>>2) {
 		tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	}
 }
 
-static __inline__ int
-tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
 {
 	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
 		if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
 	return 0;
 }
 
-static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
 		if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 	}
 }
 
-static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (!tp->rx_opt.dsack)
 		tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 	}
 }
 
-static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
 {
 	__u32 tmp;
 
@@ -3307,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			int offset = start - TCP_SKB_CB(skb)->seq;
 			int size = TCP_SKB_CB(skb)->end_seq - start;
 
-			if (offset < 0) BUG();
+			BUG_ON(offset < 0);
 			if (size > 0) {
 				size = min(copy, size);
 				if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
 {
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
 	sk->sk_write_space(sk);
 }
 
-static inline void tcp_check_space(struct sock *sk)
+static void tcp_check_space(struct sock *sk)
 {
 	if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
 		sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
 	}
 }
 
-static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
 {
 	tcp_push_pending_frames(sk, tp);
 	tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	}
 }
 
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
+static inline void tcp_ack_snd_check(struct sock *sk)
 {
 	if (!inet_csk_ack_scheduled(sk)) {
 		/* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 	return result;
 }
 
-static __inline__ int
-tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
 	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
 		__tcp_checksum_complete_user(sk, skb);
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
 	tcp_parse_options(skb, &tp->rx_opt, 0);
 
 	if (th->ack) {
-		struct inet_connection_sock *icsk;
 		/* rfc793:
 		 * "If the state is SYN-SENT then
 		 *    first check the ACK bit
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
 			tp->rx_opt.sack_ok |= 2;
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 		/* Remember, tcp_poll() does not lock socket!
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
 		/* Make sure socket is routed, for correct metrics.  */
-		tp->af_specific->rebuild_header(sk);
+		icsk->icsk_af_ops->rebuild_header(sk);
 
 		tcp_init_metrics(sk);
 
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			sk_wake_async(sk, 0, POLL_OUT);
 		}
 
-		icsk = inet_csk(sk);
-
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
 		    icsk->icsk_ack.pingpong) {
@@ -4173,7 +4210,7 @@ discard:
 		if (tp->ecn_flags&TCP_ECN_OK)
 			sock_set_flag(sk, SOCK_NO_LARGESEND);
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			  struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int queued = 0;
 
 	tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			goto discard;
 
 		if(th->syn) {
-			if(tp->af_specific->conn_request(sk, skb) < 0)
+			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
 			/* Now we have several options: In theory there is 
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				/* Make sure socket is routed, for
 				 * correct metrics.
 				 */
-				tp->af_specific->rebuild_header(sk);
+				icsk->icsk_af_ops->rebuild_header(sk);
 
 				tcp_init_metrics(sk);
 
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
+EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d5021e1929..6ea353907af 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
+#include <net/timewait_sock.h>
 #include <net/xfrm.h>
 
 #include <linux/inet.h>
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency;
 /* Socket used for sending RSTs */
 static struct socket *tcp_socket;
 
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb);
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 	.lhash_lock	= RW_LOCK_UNLOCKED,
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 
 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 {
-	return inet_csk_get_port(&tcp_hashinfo, sk, snum);
+	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+				 inet_csk_bind_conflict);
 }
 
 static void tcp_v4_hash(struct sock *sk)
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 					  skb->h.th->source);
 }
 
-/* called with local bh disabled */
-static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
-				      struct inet_timewait_sock **twp)
+int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 {
-	struct inet_sock *inet = inet_sk(sk);
-	u32 daddr = inet->rcv_saddr;
-	u32 saddr = inet->daddr;
-	int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			/* With PAWS, it is safe from the viewpoint
-			   of data integrity. Even without PAWS it
-			   is safe provided sequence spaces do not
-			   overlap i.e. at data rates <= 80Mbit/sec.
-
-			   Actually, the idea is close to VJ's one,
-			   only timestamp cache is held not per host,
-			   but per port pair and TW bucket is used
-			   as state holder.
+	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+	struct tcp_sock *tp = tcp_sk(sk);
 
-			   If TW bucket has been already destroyed we
-			   fall back to VJ's scheme and use initial
-			   timestamp retrieved from peer table.
-			 */
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp || (sysctl_tcp_tw_reuse &&
-				      xtime.tv_sec -
-				      tcptw->tw_ts_recent_stamp > 1))) {
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (tp->write_seq == 0)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
-				goto unique;
-			} else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
+	/* With PAWS, it is safe from the viewpoint
+	   of data integrity. Even without PAWS it is safe provided sequence
+	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
+	   Actually, the idea is close to VJ's one, only timestamp cache is
+	   held not per host, but per port pair and TW bucket is used as state
+	   holder.
 
-unique:
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
+	   If TW bucket has been already destroyed we fall back to VJ's scheme
+	   and use initial timestamp retrieved from peer table.
+	 */
+	if (tcptw->tw_ts_recent_stamp &&
+	    (twp == NULL || (sysctl_tcp_tw_reuse &&
+			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+		if (tp->write_seq == 0)
+			tp->write_seq = 1;
+		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
+		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+		sock_hold(sktw);
+		return 1;
 	}
 
 	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
 }
 
-static inline u32 connect_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-
-	return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, 
-					 inet->dport);
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static inline int tcp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + connect_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v4_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__inet_hash(&tcp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);;
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb  = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet_hash(&tcp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
+EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	tp->ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt)
-		tp->ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 
 	tp->rx_opt.mss_clamp = 536;
 
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	 * complete initialization after this.
 	 */
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v4_hash_connect(sk);
+	err = inet_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto failure;
 
@@ -433,12 +270,10 @@ failure:
 /*
  * This routine does path mtu discovery as defined in RFC1191.
  */
-static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
-				     u32 mtu)
+static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 {
 	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 	 * send out by Linux are always <576bytes so they should go through
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    tp->pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		tcp_sync_mss(sk, mtu);
 
 		/* Resend the TCP packet because it's
@@ -644,10 +479,10 @@ out:
 }
 
 /* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-static inline void syn_flood_warning(struct sk_buff *skb)
+#ifdef CONFIG_SYN_COOKIES
+static void syn_flood_warning(struct sk_buff *skb)
 {
 	static unsigned long warntime;
 
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb)
 		       ntohs(skb->h.th->dest));
 	}
 }
+#endif
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
  */
-static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
-						     struct sk_buff *skb)
+static struct ip_options *tcp_v4_save_options(struct sock *sk,
+					      struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	struct ip_options *dopt = NULL;
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = {
 	.send_reset	=	tcp_v4_send_reset,
 };
 
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
+
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = skb->nh.iph->ttl;
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
-		newtp->ext_header_len = newinet->opt->optlen;
+		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
 	newinet->id = newtp->write_seq ^ jiffies;
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1238,6 +1080,7 @@ process:
 
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
+	nf_reset(skb);
 
 	if (sk_filter(sk, skb, 0))
 		goto discard_and_relse;
@@ -1314,16 +1157,6 @@ do_time_wait:
 	goto discard_it;
 }
 
-static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
-	struct inet_sock *inet = inet_sk(sk);
-
-	sin->sin_family		= AF_INET;
-	sin->sin_addr.s_addr	= inet->daddr;
-	sin->sin_port		= inet->dport;
-}
-
 /* VJ's idea. Save last timestamp seen from this destination
  * and hold it at least for normal timewait interval to use for duplicate
  * segment detection in subsequent connections, before they enter synchronized
@@ -1382,7 +1215,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 	return 0;
 }
 
-struct tcp_func ipv4_specific = {
+struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1392,7 +1225,7 @@ struct tcp_func ipv4_specific = {
 	.net_header_len	=	sizeof(struct iphdr),
 	.setsockopt	=	ip_setsockopt,
 	.getsockopt	=	ip_getsockopt,
-	.addr2sockaddr	=	v4_addr2sockaddr,
+	.addr2sockaddr	=	inet_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in),
 };
 
@@ -1433,7 +1266,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
-	tp->af_specific = &ipv4_specific;
+	icsk->icsk_af_ops = &ipv4_specific;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1989,7 +1823,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.twsk_obj_size		= sizeof(struct tcp_timewait_sock),
+	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac432..2b9b7f6c7f7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
 void tcp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	int recycle_ok = 0;
 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-		recycle_ok = tp->af_specific->remember_stamp(sk);
+		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
 
 	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
 		struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
 
 		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+			struct inet6_timewait_sock *tw6;
 
-			ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+			tw6 = inet6_twsk((struct sock *)tw);
+			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock **prev)
 {
 	struct tcphdr *th = skb->h.th;
-	struct tcp_sock *tp = tcp_sk(sk);
 	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		 * ESTABLISHED STATE. If it will be dropped after
 		 * socket is created, wait for troubles.
 		 */
-		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
+								 req, NULL);
 		if (child == NULL)
 			goto listen_overflow;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 029c70dfb58..a7623ead39a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
  */
 int sysctl_tcp_tso_win_divisor = 3;
 
-static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
-				    struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct tcp_sock *tp,
+			     struct sk_buff *skb)
 {
 	sk->sk_send_head = skb->next;
 	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
 	tp->snd_cwnd_used = 0;
 }
 
-static inline void tcp_event_data_sent(struct tcp_sock *tp,
-				       struct sk_buff *skb, struct sock *sk)
+static void tcp_event_data_sent(struct tcp_sock *tp,
+				struct sk_buff *skb, struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
 		icsk->icsk_ack.pingpong = 1;
 }
 
-static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 {
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
  * value can be stuffed directly into th->window for an outgoing
  * frame.
  */
-static __inline__ u16 tcp_select_window(struct sock *sk)
+static u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
 	return new_win;
 }
 
+static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
+					 __u32 tstamp)
+{
+	if (tp->rx_opt.tstamp_ok) {
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					  (TCPOPT_NOP << 16) |
+					  (TCPOPT_TIMESTAMP << 8) |
+					  TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);
+		*ptr++ = htonl(tp->rx_opt.ts_recent);
+	}
+	if (tp->rx_opt.eff_sacks) {
+		struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+		int this_sack;
+
+		*ptr++ = htonl((TCPOPT_NOP  << 24) |
+			       (TCPOPT_NOP  << 16) |
+			       (TCPOPT_SACK <<  8) |
+			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+						     TCPOLEN_SACK_PERBLOCK)));
+		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+			*ptr++ = htonl(sp[this_sack].start_seq);
+			*ptr++ = htonl(sp[this_sack].end_seq);
+		}
+		if (tp->rx_opt.dsack) {
+			tp->rx_opt.dsack = 0;
+			tp->rx_opt.eff_sacks--;
+		}
+	}
+}
+
+/* Construct a tcp options header for a SYN or SYN_ACK packet.
+ * If this is every changed make sure to change the definition of
+ * MAX_SYN_SIZE to match the new maximum number of options that you
+ * can generate.
+ */
+static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+				  int offer_wscale, int wscale, __u32 tstamp,
+				  __u32 ts_recent)
+{
+	/* We always get an MSS option.
+	 * The option bytes which will be seen in normal data
+	 * packets should timestamps be used, must be in the MSS
+	 * advertised.  But we subtract them from tp->mss_cache so
+	 * that calculations in tcp_sendmsg are simpler etc.
+	 * So account for this fact here if necessary.  If we
+	 * don't do this correctly, as a receiver we won't
+	 * recognize data packets as being full sized when we
+	 * should, and thus we won't abide by the delayed ACK
+	 * rules correctly.
+	 * SACKs don't matter, we never delay an ACK when we
+	 * have any of those going out.
+	 */
+	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+	if (ts) {
+		if(sack)
+			*ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		else
+			*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);		/* TSVAL */
+		*ptr++ = htonl(ts_recent);	/* TSECR */
+	} else if(sack)
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
+	if (offer_wscale)
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
+}
 
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
@@ -262,122 +331,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
 {
-	if (skb != NULL) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		struct inet_sock *inet = inet_sk(sk);
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-		int tcp_header_size = tp->tcp_header_len;
-		struct tcphdr *th;
-		int sysctl_flags;
-		int err;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet;
+	struct tcp_sock *tp;
+	struct tcp_skb_cb *tcb;
+	int tcp_header_size;
+	struct tcphdr *th;
+	int sysctl_flags;
+	int err;
+
+	BUG_ON(!skb || !tcp_skb_pcount(skb));
+
+	/* If congestion control is doing timestamping, we must
+	 * take such a timestamp before we potentially clone/copy.
+	 */
+	if (icsk->icsk_ca_ops->rtt_sample)
+		__net_timestamp(skb);
+
+	if (likely(clone_it)) {
+		if (unlikely(skb_cloned(skb)))
+			skb = pskb_copy(skb, gfp_mask);
+		else
+			skb = skb_clone(skb, gfp_mask);
+		if (unlikely(!skb))
+			return -ENOBUFS;
+	}
 
-		BUG_ON(!tcp_skb_pcount(skb));
+	inet = inet_sk(sk);
+	tp = tcp_sk(sk);
+	tcb = TCP_SKB_CB(skb);
+	tcp_header_size = tp->tcp_header_len;
 
 #define SYSCTL_FLAG_TSTAMPS	0x1
 #define SYSCTL_FLAG_WSCALE	0x2
 #define SYSCTL_FLAG_SACK	0x4
 
-		/* If congestion control is doing timestamping */
-		if (icsk->icsk_ca_ops->rtt_sample)
-			__net_timestamp(skb);
-
-		sysctl_flags = 0;
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-			if(sysctl_tcp_timestamps) {
-				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
-			}
-			if(sysctl_tcp_window_scaling) {
-				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_WSCALE;
-			}
-			if(sysctl_tcp_sack) {
-				sysctl_flags |= SYSCTL_FLAG_SACK;
-				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
-					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
-			}
-		} else if (tp->rx_opt.eff_sacks) {
-			/* A SACK is 2 pad bytes, a 2 byte header, plus
-			 * 2 32-bit sequence numbers for each SACK block.
-			 */
-			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+	sysctl_flags = 0;
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+		if(sysctl_tcp_timestamps) {
+			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
-		
-		if (tcp_packets_in_flight(tp) == 0)
-			tcp_ca_event(sk, CA_EVENT_TX_START);
-
-		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-		skb->h.th = th;
-		skb_set_owner_w(skb, sk);
-
-		/* Build TCP header and checksum it. */
-		th->source		= inet->sport;
-		th->dest		= inet->dport;
-		th->seq			= htonl(tcb->seq);
-		th->ack_seq		= htonl(tp->rcv_nxt);
-		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			/* RFC1323: The window in SYN & SYN/ACK segments
-			 * is never scaled.
-			 */
-			th->window	= htons(tp->rcv_wnd);
-		} else {
-			th->window	= htons(tcp_select_window(sk));
+		if (sysctl_tcp_window_scaling) {
+			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_WSCALE;
 		}
-		th->check		= 0;
-		th->urg_ptr		= 0;
-
-		if (tp->urg_mode &&
-		    between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
-			th->urg_ptr		= htons(tp->snd_up-tcb->seq);
-			th->urg			= 1;
+		if (sysctl_tcp_sack) {
+			sysctl_flags |= SYSCTL_FLAG_SACK;
+			if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
 		}
+	} else if (unlikely(tp->rx_opt.eff_sacks)) {
+		/* A SACK is 2 pad bytes, a 2 byte header, plus
+		 * 2 32-bit sequence numbers for each SACK block.
+		 */
+		tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+				    (tp->rx_opt.eff_sacks *
+				     TCPOLEN_SACK_PERBLOCK));
+	}
+		
+	if (tcp_packets_in_flight(tp) == 0)
+		tcp_ca_event(sk, CA_EVENT_TX_START);
+
+	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb->h.th = th;
+	skb_set_owner_w(skb, sk);
+
+	/* Build TCP header and checksum it. */
+	th->source		= inet->sport;
+	th->dest		= inet->dport;
+	th->seq			= htonl(tcb->seq);
+	th->ack_seq		= htonl(tp->rcv_nxt);
+	*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
+					tcb->flags);
+
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		/* RFC1323: The window in SYN & SYN/ACK segments
+		 * is never scaled.
+		 */
+		th->window	= htons(tp->rcv_wnd);
+	} else {
+		th->window	= htons(tcp_select_window(sk));
+	}
+	th->check		= 0;
+	th->urg_ptr		= 0;
 
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_syn_build_options((__u32 *)(th + 1),
-					      tcp_advertise_mss(sk),
-					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
-					      (sysctl_flags & SYSCTL_FLAG_SACK),
-					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
-					      tp->rx_opt.rcv_wscale,
-					      tcb->when,
-		      			      tp->rx_opt.ts_recent);
-		} else {
-			tcp_build_and_update_options((__u32 *)(th + 1),
-						     tp, tcb->when);
+	if (unlikely(tp->urg_mode &&
+		     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
+		th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+		th->urg			= 1;
+	}
 
-			TCP_ECN_send(sk, tp, skb, tcp_header_size);
-		}
-		tp->af_specific->send_check(sk, th, skb->len, skb);
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_syn_build_options((__u32 *)(th + 1),
+				      tcp_advertise_mss(sk),
+				      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+				      (sysctl_flags & SYSCTL_FLAG_SACK),
+				      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+				      tp->rx_opt.rcv_wscale,
+				      tcb->when,
+				      tp->rx_opt.ts_recent);
+	} else {
+		tcp_build_and_update_options((__u32 *)(th + 1),
+					     tp, tcb->when);
+		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+	}
 
-		if (tcb->flags & TCPCB_FLAG_ACK)
-			tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
-		if (skb->len != tcp_header_size)
-			tcp_event_data_sent(tp, skb, sk);
+	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
-		TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (skb->len != tcp_header_size)
+		tcp_event_data_sent(tp, skb, sk);
 
-		err = tp->af_specific->queue_xmit(skb, 0);
-		if (err <= 0)
-			return err;
+	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-		tcp_enter_cwr(sk);
+	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+	if (unlikely(err <= 0))
+		return err;
+
+	tcp_enter_cwr(sk);
+
+	/* NET_XMIT_CN is special. It does not guarantee,
+	 * that this packet is lost. It tells that device
+	 * is about to start to drop packets or already
+	 * drops some packets of the same priority and
+	 * invokes us to send less aggressively.
+	 */
+	return err == NET_XMIT_CN ? 0 : err;
 
-		/* NET_XMIT_CN is special. It does not guarantee,
-		 * that this packet is lost. It tells that device
-		 * is about to start to drop packets or already
-		 * drops some packets of the same priority and
-		 * invokes us to send less aggressively.
-		 */
-		return err == NET_XMIT_CN ? 0 : err;
-	}
-	return -ENOBUFS;
 #undef SYSCTL_FLAG_TSTAMPS
 #undef SYSCTL_FLAG_WSCALE
 #undef SYSCTL_FLAG_SACK
@@ -604,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
-   tp->pmtu_cookie is last pmtu, seen by this function.
+   inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
 
    tp->mss_cache is current effective sending mss, including
    all tcp options except for SACKs. It is evaluated,
@@ -614,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    NOTE1. rfc1122 clearly states that advertised MSS
    DOES NOT include either tcp or ip options.
 
-   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
-   this function.			--ANK (980731)
+   NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
+   are READ ONLY outside this function.		--ANK (980731)
  */
 
 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mss_now;
-
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	/* Calculate base mss without TCP options:
 	   It is MMS_S - sizeof(tcphdr) of rfc1122
 	 */
-	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
+		       sizeof(struct tcphdr));
 
 	/* Clamp it (mss_clamp does not include tcp options) */
 	if (mss_now > tp->rx_opt.mss_clamp)
 		mss_now = tp->rx_opt.mss_clamp;
 
 	/* Now subtract optional transport overhead */
-	mss_now -= tp->ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
 	if (mss_now < 48)
@@ -647,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
 
 	/* And store cached results */
-	tp->pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	tp->mss_cache = mss_now;
 
 	return mss_now;
@@ -677,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
-		if (mtu != tp->pmtu_cookie)
+		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
 			mss_now = tcp_sync_mss(sk, mtu);
 	}
 
@@ -688,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 	xmit_size_goal = mss_now;
 
 	if (doing_tso) {
-		xmit_size_goal = 65535 -
-			tp->af_specific->net_header_len -
-			tp->ext_header_len - tp->tcp_header_len;
+		xmit_size_goal = (65535 -
+				  inet_csk(sk)->icsk_af_ops->net_header_len -
+				  inet_csk(sk)->icsk_ext_hdr_len -
+				  tp->tcp_header_len);
 
 		if (tp->max_window &&
 		    (xmit_size_goal > (tp->max_window >> 1)))
@@ -706,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 /* Congestion window validation. (RFC2861) */
 
-static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
 {
 	__u32 packets_out = tp->packets_out;
 
@@ -755,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
 /* This must be invoked the first time we consider transmitting
  * SKB onto the wire.
  */
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);
 
@@ -1036,7 +1123,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
+		if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
 			break;
 
 		/* Advance the send_head.  This one is sent out.
@@ -1109,7 +1196,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		/* Send it out now. */
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
 			update_send_head(sk, tp, skb);
 			tcp_cwnd_validate(sk, tp);
 			return;
@@ -1405,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	   (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
-	if(tp->af_specific->rebuild_header(sk))
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
 	/* Some Solaris stacks overoptimize and ignore the FIN on a
@@ -1429,9 +1516,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
-				    pskb_copy(skb, GFP_ATOMIC):
-				    skb_clone(skb, GFP_ATOMIC)));
+	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
@@ -1665,7 +1750,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	if (tcp_transmit_skb(sk, skb))
+	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
 
@@ -1700,7 +1785,7 @@ int tcp_send_synack(struct sock *sk)
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 }
 
 /*
@@ -1778,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 /* 
  * Do all connect socket setups that can be done AF independent.
  */ 
-static inline void tcp_connect_init(struct sock *sk)
+static void tcp_connect_init(struct sock *sk)
 {
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1861,7 +1946,7 @@ int tcp_connect(struct sock *sk)
 	__skb_queue_tail(&sk->sk_write_queue, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
-	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
@@ -1957,7 +2042,7 @@ void tcp_send_ack(struct sock *sk)
 		/* Send it off, this clears delayed acks for us. */
 		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
-		tcp_transmit_skb(sk, buff);
+		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
 }
 
@@ -1997,7 +2082,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb);
+	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
 int tcp_write_wakeup(struct sock *sk)
@@ -2030,7 +2115,7 @@ int tcp_write_wakeup(struct sock *sk)
 
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
-			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
 				update_send_head(sk, tp, skb);
 			}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index b7d296a8ac6..3b740349505 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 		vegas->beg_snd_nxt  = tp->snd_nxt;
 		vegas->beg_snd_cwnd = tp->snd_cwnd;
 
-		/* Take into account the current RTT sample too, to
-		 * decrease the impact of delayed acks. This double counts
-		 * this sample since we count it for the next window as well,
-		 * but that's not too awful, since we're taking the min,
-		 * rather than averaging.
-		 */
-		tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
-
 		/* We do the Vegas calculations only if we got enough RTT
 		 * samples that we can be reasonably sure that we got
 		 * at least one RTT sample that wasn't from a delayed ACK.
@@ -333,11 +325,15 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
 				tp->snd_cwnd = tp->snd_cwnd_clamp;
 		}
-	}
 
-	/* Wipe the slate clean for the next RTT. */
-	vegas->cntRTT = 0;
-	vegas->minRTT = 0x7fffffff;
+		/* Wipe the slate clean for the next RTT. */
+		vegas->cntRTT = 0;
+		vegas->minRTT = 0x7fffffff;
+	}
+	/* Use normal slow start */
+	else if (tp->snd_cwnd <= tp->snd_ssthresh) 
+		tcp_slow_start(tp);
+	
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195..00840474a44 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
+#include <linux/igmp.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -846,20 +847,7 @@ out:
 csum_copy_err:
 	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (noblock)
 		return -EAGAIN;	
@@ -1001,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		kfree_skb(skb);
 		return -1;
 	}
+	nf_reset(skb);
 
 	if (up->encap_type) {
 		/*
@@ -1094,7 +1083,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
  * Otherwise, csum completion requires chacksumming packet body,
  * including udp header and folding it to skb->csum.
  */
-static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 			     unsigned short ulen, u32 saddr, u32 daddr)
 {
 	if (uh->check == 0) {
@@ -1108,7 +1097,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 	/* Probably, we should checksum udp header (it should be in cache
 	 * in any case) and data in tiny packets (< rx copybreak).
 	 */
-	return 0;
 }
 
 /*
@@ -1141,8 +1129,7 @@ int udp_rcv(struct sk_buff *skb)
 	if (pskb_trim_rcsum(skb, ulen))
 		goto short_packet;
 
-	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
-		goto csum_error;
+	udp_checksum_init(skb, uh, ulen, saddr, daddr);
 
 	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
@@ -1163,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto drop;
+	nf_reset(skb);
 
 	/* No socket. Drop packet silently, if checksum is wrong */
 	if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0..850d919591d 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
 	return xfrm_parse_spi(skb, nexthdr, spi, seq);
 }
 
+#ifdef CONFIG_NETFILTER
+static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+
+	if (skb->dst == NULL) {
+		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+		                   skb->dev))
+			goto drop;
+	}
+	return dst_input(skb);
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+#endif
+
 int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 {
 	int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
 	skb->sp->len += xfrm_nr;
 
+	nf_reset(skb);
+
 	if (decaps) {
 		if (!(skb->dev->flags&IFF_LOOPBACK)) {
 			dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		netif_rx(skb);
 		return 0;
 	} else {
+#ifdef CONFIG_NETFILTER
+		__skb_push(skb, skb->data - skb->nh.raw);
+		skb->nh.iph->tot_len = htons(skb->len);
+		ip_send_check(skb->nh.iph);
+
+		NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+		        xfrm4_rcv_encap_finish);
+		return 0;
+#else
 		return -skb->nh.iph->protocol;
+#endif
 	}
 
 drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942..d4df0ddd424 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
 	return ret;
 }
 
-int xfrm4_output(struct sk_buff *skb)
+static int xfrm4_output_one(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	spin_lock_bh(&x->lock);
-	err = xfrm_state_check(x, skb);
-	if (err)
-		goto error;
+	do {
+		spin_lock_bh(&x->lock);
+		err = xfrm_state_check(x, skb);
+		if (err)
+			goto error;
 
-	xfrm4_encap(skb);
+		xfrm4_encap(skb);
 
-	err = x->type->output(x, skb);
-	if (err)
-		goto error;
+		err = x->type->output(x, skb);
+		if (err)
+			goto error;
 
-	x->curlft.bytes += skb->len;
-	x->curlft.packets++;
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
 
-	spin_unlock_bh(&x->lock);
+		spin_unlock_bh(&x->lock);
 	
-	if (!(skb->dst = dst_pop(dst))) {
-		err = -EHOSTUNREACH;
-		goto error_nolock;
-	}
-	err = NET_XMIT_BYPASS;
+		if (!(skb->dst = dst_pop(dst))) {
+			err = -EHOSTUNREACH;
+			goto error_nolock;
+		}
+		dst = skb->dst;
+		x = dst->xfrm;
+	} while (x && !x->props.mode);
+
+	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+	err = 0;
 
 out_exit:
 	return err;
@@ -143,3 +151,33 @@ error_nolock:
 	kfree_skb(skb);
 	goto out_exit;
 }
+
+int xfrm4_output_finish(struct sk_buff *skb)
+{
+	int err;
+
+	while (likely((err = xfrm4_output_one(skb)) == 0)) {
+		nf_reset(skb);
+
+		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+			      skb->dst->dev, dst_output);
+		if (unlikely(err != 1))
+			break;
+
+		if (!skb->dst->xfrm)
+			return dst_output(skb);
+
+		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+			      skb->dst->dev, xfrm4_output_finish);
+		if (unlikely(err != 1))
+			break;
+	}
+
+	return err;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+		       xfrm4_output_finish);
+}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cd..42196ba3b0b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_UDP:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				u16 *ports = (u16 *)xprth;
 
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d23e07fc81f..dbabf81a9b7 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,6 +42,21 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
+	if (tmpl->mode && x->props.saddr.a4 == 0) {
+		struct rtable *rt;
+	        struct flowi fl_tunnel = {
+        	        .nl_u = {
+        			.ip4_u = {
+					.daddr = x->id.daddr.a4,
+				}
+			}
+		};
+		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
+		                     &fl_tunnel, AF_INET)) {
+			x->props.saddr.a4 = rt->rt_src;
+			dst_release(&rt->u.dst);
+		}
+	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET;
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834b..41877abd22e 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,10 +8,11 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
 		protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
 		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-		ip6_flowlabel.o ipv6_syms.o netfilter.o
+		ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
 
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
+ipv6-$(CONFIG_NETFILTER) += netfilter.o
 ipv6-objs += $(ipv6-y)
 
 obj-$(CONFIG_INET6_AH) += ah6.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 76ff9f4fe89..d328d598614 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -58,6 +58,7 @@
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
+#include <linux/capability.h>
 #include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/string.h>
@@ -137,6 +138,7 @@ static int addrconf_ifdown(struct net_device *dev, int how);
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
 static void addrconf_dad_timer(unsigned long data);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -379,8 +381,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 		    dev->type == ARPHRD_NONE ||
 		    dev->type == ARPHRD_SIT) {
 			printk(KERN_INFO
-				"Disabled Privacy Extensions on device %p(%s)\n",
-				dev, dev->name);
+			       "%s: Disabled Privacy Extensions\n",
+			       dev->name);
 			ndev->cnf.use_tempaddr = -1;
 		} else {
 			in6_dev_hold(ndev);
@@ -388,6 +390,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 		}
 #endif
 
+		if (netif_carrier_ok(dev))
+			ndev->if_flags |= IF_READY;
+
 		write_lock_bh(&addrconf_lock);
 		dev->ip6_ptr = ndev;
 		write_unlock_bh(&addrconf_lock);
@@ -415,6 +420,7 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
 		if ((idev = ipv6_add_dev(dev)) == NULL)
 			return NULL;
 	}
+
 	if (dev->flags&IFF_UP)
 		ipv6_mc_up(idev);
 	return idev;
@@ -634,8 +640,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	}
 #endif
 
-	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
-	     ifap = &ifa->if_next) {
+	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
 		if (ifa == ifp) {
 			*ifap = ifa->if_next;
 			__in6_ifa_put(ifp);
@@ -643,6 +648,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
 				break;
 			deleted = 1;
+			continue;
 		} else if (ifp->flags & IFA_F_PERMANENT) {
 			if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
 					      ifp->prefix_len)) {
@@ -666,6 +672,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				}
 			}
 		}
+		ifap = &ifa->if_next;
 	}
 	write_unlock_bh(&idev->lock);
 
@@ -903,11 +910,18 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 
 			score.addr_type = __ipv6_addr_type(&ifa->addr);
 
-			/* Rule 0: Candidate Source Address (section 4)
+			/* Rule 0:
+			 * - Tentative Address (RFC2462 section 5.4)
+			 *  - A tentative address is not considered
+			 *    "assigned to an interface" in the traditional
+			 *    sense.
+			 * - Candidate Source Address (section 4)
 			 *  - In any case, anycast addresses, multicast
 			 *    addresses, and the unspecified address MUST
 			 *    NOT be included in a candidate set.
 			 */
+			if (ifa->flags & IFA_F_TENTATIVE)
+				continue;
 			if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
 				     score.addr_type & IPV6_ADDR_MULTICAST)) {
 				LIMIT_NETDEBUG(KERN_DEBUG
@@ -1182,7 +1196,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
-	const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
+	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
 	u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
 	u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
@@ -1215,10 +1229,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 
 /* Gets referenced address, destroys ifaddr */
 
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
 {
-	if (net_ratelimit())
-		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
 		addrconf_del_timer(ifp);
@@ -1244,6 +1256,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		ipv6_del_addr(ifp);
 }
 
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+	if (net_ratelimit())
+		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+	addrconf_dad_stop(ifp);
+}
 
 /* Join to solicited addr multicast group. */
 
@@ -1596,9 +1614,17 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	   not good.
 	 */
 	if (valid_lft >= 0x7FFFFFFF/HZ)
-		rt_expires = 0;
+		rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
 	else
-		rt_expires = jiffies + valid_lft * HZ;
+		rt_expires = valid_lft * HZ;
+
+	/*
+	 * We convert this (in jiffies) to clock_t later.
+	 * Avoid arithmetic overflow there as well.
+	 * Overflow can happen only if HZ < USER_HZ.
+	 */
+	if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
+		rt_expires = 0x7FFFFFFF / USER_HZ;
 
 	if (pinfo->onlink) {
 		struct rt6_info *rt;
@@ -1610,12 +1636,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 					ip6_del_rt(rt, NULL, NULL, NULL);
 					rt = NULL;
 				} else {
-					rt->rt6i_expires = rt_expires;
+					rt->rt6i_expires = jiffies + rt_expires;
 				}
 			}
 		} else if (valid_lft) {
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-					      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+					      dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
 		}
 		if (rt)
 			dst_release(&rt->u.dst);
@@ -2125,9 +2151,42 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = (struct net_device *) data;
 	struct inet6_dev *idev = __in6_dev_get(dev);
+	int run_pending = 0;
 
 	switch(event) {
 	case NETDEV_UP:
+	case NETDEV_CHANGE:
+		if (event == NETDEV_UP) {
+			if (!netif_carrier_ok(dev)) {
+				/* device is not ready yet. */
+				printk(KERN_INFO
+					"ADDRCONF(NETDEV_UP): %s: "
+					"link is not ready\n",
+					dev->name);
+				break;
+			}
+		} else {
+			if (!netif_carrier_ok(dev)) {
+				/* device is still not ready. */
+				break;
+			}
+
+			if (idev) {
+				if (idev->if_flags & IF_READY) {
+					/* device is already configured. */
+					break;
+				}
+				idev->if_flags |= IF_READY;
+			}
+
+			printk(KERN_INFO
+					"ADDRCONF(NETDEV_CHANGE): %s: "
+					"link becomes ready\n",
+					dev->name);
+
+			run_pending = 1;
+		}
+
 		switch(dev->type) {
 		case ARPHRD_SIT:
 			addrconf_sit_config(dev);
@@ -2144,6 +2203,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			break;
 		};
 		if (idev) {
+			if (run_pending)
+				addrconf_dad_run(idev);
+
 			/* If the MTU changed during the interface down, when the
 			   interface up, the changed MTU must be reflected in the
 			   idev as well as routers.
@@ -2178,8 +2240,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		 */
 		addrconf_ifdown(dev, event != NETDEV_DOWN);
 		break;
-	case NETDEV_CHANGE:
-		break;
+
 	case NETDEV_CHANGENAME:
 #ifdef CONFIG_SYSCTL
 		if (idev) {
@@ -2260,7 +2321,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	/* Step 3: clear flags for stateless addrconf */
 	if (how != 1)
-		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
+		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
 	/* Step 4: clear address list */
 #ifdef CONFIG_IPV6_PRIVACY
@@ -2369,11 +2430,20 @@ out:
 /*
  *	Duplicate Address Detection
  */
+static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
+{
+	unsigned long rand_num;
+	struct inet6_dev *idev = ifp->idev;
+
+	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+	ifp->probes = idev->cnf.dad_transmits;
+	addrconf_mod_timer(ifp, AC_DAD, rand_num);
+}
+
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
-	unsigned long rand_num;
 
 	addrconf_join_solict(dev, &ifp->addr);
 
@@ -2382,7 +2452,6 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 					flags);
 
 	net_srandom(ifp->addr.s6_addr32[3]);
-	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
 
 	read_lock_bh(&idev->lock);
 	if (ifp->dead)
@@ -2399,9 +2468,19 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		return;
 	}
 
-	ifp->probes = idev->cnf.dad_transmits;
-	addrconf_mod_timer(ifp, AC_DAD, rand_num);
-
+	if (!(idev->if_flags & IF_READY)) {
+		spin_unlock_bh(&ifp->lock);
+		read_unlock_bh(&idev->lock);
+		/*
+		 * If the defice is not ready:
+		 * - keep it tentative if it is a permanent address.
+		 * - otherwise, kill it.
+		 */
+		in6_ifa_hold(ifp);
+		addrconf_dad_stop(ifp);
+		return;
+	}
+	addrconf_dad_kick(ifp);
 	spin_unlock_bh(&ifp->lock);
 out:
 	read_unlock_bh(&idev->lock);
@@ -2484,6 +2563,22 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	}
 }
 
+static void addrconf_dad_run(struct inet6_dev *idev) {
+	struct inet6_ifaddr *ifp;
+
+	read_lock_bh(&idev->lock);
+	for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+		spin_lock_bh(&ifp->lock);
+		if (!(ifp->flags & IFA_F_TENTATIVE)) {
+			spin_unlock_bh(&ifp->lock);
+			continue;
+		}
+		spin_unlock_bh(&ifp->lock);
+		addrconf_dad_kick(ifp);
+	}
+	read_unlock_bh(&idev->lock);
+}
+
 #ifdef CONFIG_PROC_FS
 struct if6_iter_state {
 	int bucket;
@@ -2549,7 +2644,7 @@ static int if6_seq_show(struct seq_file *seq, void *v)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
 	seq_printf(seq,
-		   "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n",
+		   NIP6_SEQFMT " %02x %02x %02x %02x %8s\n",
 		   NIP6(ifp->addr),
 		   ifp->idev->dev->ifindex,
 		   ifp->prefix_len,
@@ -2689,6 +2784,9 @@ restart:
 						in6_ifa_hold(ifpub);
 						spin_unlock(&ifp->lock);
 						read_unlock(&addrconf_hash_lock);
+						spin_lock(&ifpub->lock);
+						ifpub->regen_count = 0;
+						spin_unlock(&ifpub->lock);
 						ipv6_create_tempaddr(ifpub, ifp);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c63b8ce0e1b..064ffab82a9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -22,6 +22,7 @@
 
 
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -92,10 +93,13 @@ static int inet6_create(struct socket *sock, int protocol)
 	struct proto *answer_prot;
 	unsigned char answer_flags;
 	char answer_no_check;
-	int rc;
+	int try_loading_module = 0;
+	int err;
 
 	/* Look for the requested type/protocol pair. */
 	answer = NULL;
+lookup_protocol:
+	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
 	list_for_each_rcu(p, &inetsw6[sock->type]) {
 		answer = list_entry(p, struct inet_protosw, list);
@@ -113,21 +117,37 @@ static int inet6_create(struct socket *sock, int protocol)
 			if (IPPROTO_IP == answer->protocol)
 				break;
 		}
+		err = -EPROTONOSUPPORT;
 		answer = NULL;
 	}
 
-	rc = -ESOCKTNOSUPPORT;
-	if (!answer)
-		goto out_rcu_unlock;
-	rc = -EPERM;
+	if (!answer) {
+		if (try_loading_module < 2) {
+			rcu_read_unlock();
+			/*
+			 * Be more specific, e.g. net-pf-10-proto-132-type-1
+			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
+			 */
+			if (++try_loading_module == 1)
+				request_module("net-pf-%d-proto-%d-type-%d",
+						PF_INET6, protocol, sock->type);
+			/*
+			 * Fall back to generic, e.g. net-pf-10-proto-132
+			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
+			 */
+			else
+				request_module("net-pf-%d-proto-%d",
+						PF_INET6, protocol);
+			goto lookup_protocol;
+		} else
+			goto out_rcu_unlock;
+	}
+
+	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
-	rc = -EPROTONOSUPPORT;
-	if (!protocol)
-		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
-
 	answer_prot = answer->prot;
 	answer_no_check = answer->no_check;
 	answer_flags = answer->flags;
@@ -135,19 +155,20 @@ static int inet6_create(struct socket *sock, int protocol)
 
 	BUG_TRAP(answer_prot->slab != NULL);
 
-	rc = -ENOBUFS;
+	err = -ENOBUFS;
 	sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
 	if (sk == NULL)
 		goto out;
 
 	sock_init_data(sock, sk);
 
-	rc = 0;
+	err = 0;
 	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
@@ -202,14 +223,14 @@ static int inet6_create(struct socket *sock, int protocol)
 		sk->sk_prot->hash(sk);
 	}
 	if (sk->sk_prot->init) {
-		rc = sk->sk_prot->init(sk);
-		if (rc) {
+		err = sk->sk_prot->init(sk);
+		if (err) {
 			sk_common_release(sk);
 			goto out;
 		}
 	}
 out:
-	return rc;
+	return err;
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
@@ -370,6 +391,8 @@ int inet6_destroy_sock(struct sock *sk)
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+
 /*
  *	This does both peername and sockname.
  */
@@ -412,7 +435,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
-	int err = -EINVAL;
 
 	switch(cmd) 
 	{
@@ -431,16 +453,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFDSTADDR:
 		return addrconf_set_dstaddr((void __user *) arg);
 	default:
-		if (!sk->sk_prot->ioctl ||
-		    (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
-			return(dev_ioctl(cmd,(void __user *) arg));		
-		return err;
+		if (!sk->sk_prot->ioctl)
+			return -ENOIOCTLCMD;
+		return sk->sk_prot->ioctl(sk, cmd, arg);
 	}
 	/*NOTREACHED*/
 	return(0);
 }
 
-struct proto_ops inet6_stream_ops = {
+const struct proto_ops inet6_stream_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -461,7 +482,7 @@ struct proto_ops inet6_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet6_dgram_ops = {
+const struct proto_ops inet6_dgram_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -489,7 +510,7 @@ static struct net_proto_family inet6_family_ops = {
 };
 
 /* Same as inet6_dgram_ops, sans udp_poll.  */
-static struct proto_ops inet6_sockraw_ops = {
+static const struct proto_ops inet6_sockraw_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -590,17 +611,90 @@ inet6_unregister_protosw(struct inet_protosw *p)
 	}
 }
 
+int inet6_sk_rebuild_header(struct sock *sk)
+{
+	int err;
+	struct dst_entry *dst;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = sk->sk_protocol;
+		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+		fl.fl6_flowlabel = np->flow_label;
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet->dport;
+		fl.fl_ip_sport = inet->sport;
+
+		if (np->opt && np->opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
+
+int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	if (np->rxopt.all) {
+		if ((opt->hop && (np->rxopt.bits.hopopts ||
+				  np->rxopt.bits.ohopopts)) ||
+		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
+		     np->rxopt.bits.rxflow) ||
+		    (opt->srcrt && (np->rxopt.bits.srcrt ||
+		     np->rxopt.bits.osrcrt)) ||
+		    ((opt->dst1 || opt->dst0) &&
+		     (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
+			return 1;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
+
 int
 snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
 {
 	if (ptr == NULL)
 		return -EINVAL;
 
-	ptr[0] = __alloc_percpu(mibsize, mibalign);
+	ptr[0] = __alloc_percpu(mibsize);
 	if (!ptr[0])
 		goto err0;
 
-	ptr[1] = __alloc_percpu(mibsize, mibalign);
+	ptr[1] = __alloc_percpu(mibsize);
 	if (!ptr[1])
 		goto err1;
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb1..c7932cb420a 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <net/xfrm.h>
 #include <asm/scatterlist.h>
 
@@ -331,8 +332,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!x)
 		return;
 
-	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
-		 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n",
 		 ntohl(ah->spi), NIP6(iph->daddr));
 
 	xfrm_state_put(x);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 6b729404723..840a33d3329 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -13,6 +13,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -531,9 +532,7 @@ static int ac6_seq_show(struct seq_file *seq, void *v)
 	struct ac6_iter_state *state = ac6_seq_private(seq);
 
 	seq_printf(seq,
-		   "%-4d %-15s "
-		   "%04x%04x%04x%04x%04x%04x%04x%04x "
-		   "%5d\n",
+		   "%-4d %-15s " NIP6_SEQFMT " %5d\n",
 		   state->dev->ifindex, state->dev->name,
 		   NIP6(im->aca_addr),
 		   im->aca_users);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c4a3a993acb..99a6eb23378 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -13,6 +13,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40d9a1935ab..7b5b94f1390 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
 #include <linux/random.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/icmpv6.h>
 
 static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
@@ -248,7 +249,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
 	if (esp->conf.padlen)
 		mtu = ALIGN(mtu, esp->conf.padlen);
 
-	return mtu + x->props.header_len + esp->auth.icv_full_len;
+	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -265,8 +266,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
 	if (!x)
 		return;
-	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
-			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
+	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n", 
 			ntohl(esph->spi), NIP6(iph->daddr));
 	xfrm_state_put(x);
 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index be6faf31138..2a1e7e45b89 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
 	{-1,			NULL}
 };
 
-static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_destopt_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
-		*nhoffp = opt->dst1;
+		opt->nhoff = opt->dst1;
 		return 1;
 	}
 
@@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void)
   NONE header. No data in packet.
  ********************************/
 
-static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_nodata_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 
@@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void)
   Routing header.
  ********************************/
 
-static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -249,7 +249,7 @@ looped_back:
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
-		*nhoffp = (&hdr->nexthdr) - skb->nh.raw;
+		opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
 		return 1;
 	}
 
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
 	return opt;
 }
 
+EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
+
 /**********************************
   Hop-by-hop options.
  **********************************/
@@ -485,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
 
 int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
 {
-	IP6CB(skb)->hop = sizeof(struct ipv6hdr);
-	if (ip6_parse_tlv(tlvprochopopt_lst, skb))
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	opt->hop = sizeof(struct ipv6hdr);
+	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+		skb->h.raw += (skb->h.raw[1]+1)<<3;
+		opt->nhoff = sizeof(struct ipv6hdr);
 		return sizeof(struct ipv6hdr);
+	}
 	return -1;
 }
 
@@ -579,6 +586,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
 	return opt2;
 }
 
+EXPORT_SYMBOL_GPL(ipv6_dup_options);
+
 static int ipv6_renew_option(void *ohdr,
 			     struct ipv6_opt_hdr __user *newopt, int newoptlen,
 			     int inherit,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 34a332225c1..fcf883183ce 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
 #define icmpv6_socket	__get_cpu_var(__icmpv6_socket)
 
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+static int icmpv6_rcv(struct sk_buff **pskb);
 
 static struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
@@ -328,8 +328,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		iif = skb->dev->ifindex;
 
 	/*
-	 *	Must not send if we know that source is Anycast also.
-	 *	for now we don't know that.
+	 *	Must not send error if the source does not uniquely
+	 *	identify a single node (RFC2463 Section 2.4).
+	 *	We check unspecified / multicast addresses here,
+	 *	and anycast addresses will be checked later.
 	 */
 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
@@ -373,6 +375,16 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto out;
+
+	/*
+	 * We won't send icmp if the destination is known
+	 * anycast.
+	 */
+	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+		goto out_dst_release;
+	}
+
 	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 		goto out;
 
@@ -569,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
  *	Handle icmp messages
  */
 
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int icmpv6_rcv(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
@@ -595,7 +607,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
 					     IPPROTO_ICMPV6, 0);
 		if (__skb_checksum_complete(skb)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
 				       NIP6(*saddr), NIP6(*daddr));
 			goto discard_it;
 		}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 00000000000..f8f3a37a149
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,200 @@
+/*
+ * INET        An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Support for INET6 connection oriented protocols.
+ *
+ * Authors:    See the TCPv6 sources
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or(at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
+#include <net/addrconf.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_ecn.h>
+#include <net/inet_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/sock.h>
+#include <net/inet6_connection_sock.h>
+
+int inet6_csk_bind_conflict(const struct sock *sk,
+			    const struct inet_bind_bucket *tb)
+{
+	const struct sock *sk2;
+	const struct hlist_node *node;
+
+	/* We must walk the whole port owner list in this case. -DaveM */
+	sk_for_each_bound(sk2, node, &tb->owners) {
+		if (sk != sk2 &&
+		    (!sk->sk_bound_dev_if ||
+		     !sk2->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+		    (!sk->sk_reuse || !sk2->sk_reuse ||
+		     sk2->sk_state == TCP_LISTEN) &&
+		     ipv6_rcv_saddr_equal(sk, sk2))
+			break;
+	}
+
+	return node != NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+
+/*
+ * request_sock (formerly open request) hash tables.
+ */
+static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
+			   const u32 rnd, const u16 synq_hsize)
+{
+	u32 a = raddr->s6_addr32[0];
+	u32 b = raddr->s6_addr32[1];
+	u32 c = raddr->s6_addr32[2];
+
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += rnd;
+	__jhash_mix(a, b, c);
+
+	a += raddr->s6_addr32[3];
+	b += (u32)rport;
+	__jhash_mix(a, b, c);
+
+	return c & (synq_hsize - 1);
+}
+
+struct request_sock *inet6_csk_search_req(const struct sock *sk,
+					  struct request_sock ***prevp,
+					  const __u16 rport,
+					  const struct in6_addr *raddr,
+					  const struct in6_addr *laddr,
+					  const int iif)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+	struct request_sock *req, **prev;
+
+	for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
+						     lopt->hash_rnd,
+						     lopt->nr_table_entries)];
+	     (req = *prev) != NULL;
+	     prev = &req->dl_next) {
+		const struct inet6_request_sock *treq = inet6_rsk(req);
+
+		if (inet_rsk(req)->rmt_port == rport &&
+		    req->rsk_ops->family == AF_INET6 &&
+		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
+		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
+		    (!treq->iif || treq->iif == iif)) {
+			BUG_TRAP(req->sk == NULL);
+			*prevp = prev;
+			return req;
+		}
+	}
+
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_search_req);
+
+void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
+				    struct request_sock *req,
+				    const unsigned long timeout)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
+				      inet_rsk(req)->rmt_port,
+				      lopt->hash_rnd, lopt->nr_table_entries);
+
+	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+	inet_csk_reqsk_queue_added(sk, timeout);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+	sin6->sin6_family = AF_INET6;
+	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+	sin6->sin6_port	= inet_sk(sk)->dport;
+	/* We do not store received flowlabel for TCP */
+	sin6->sin6_flowinfo = 0;
+	sin6->sin6_scope_id = 0;
+	if (sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		sin6->sin6_scope_id = sk->sk_bound_dev_if;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
+
+int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+{
+	struct sock *sk = skb->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi fl;
+	struct dst_entry *dst;
+	struct in6_addr *final_p = NULL, final;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = sk->sk_protocol;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+	fl.fl6_flowlabel = np->flow_label;
+	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_dport = inet->dport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		int err = ip6_dst_lookup(sk, &dst, &fl);
+
+		if (err) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	skb->dst = dst_clone(dst);
+
+	/* Restore final destination back after routing done */
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+
+	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e4..4154f3a8b6c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
  *
  *		Generic INET6 transport hashtables
  *
- * Authors:	Lotsa people, from code originally in tcp
+ * Authors:	Lotsa people, from code originally in tcp, generalised here
+ * 		by Arnaldo Carvalho de Melo <acme@mandriva.com>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
  */
 
 #include <linux/config.h>
-
 #include <linux/module.h>
+#include <linux/random.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
 #include <net/inet6_hashtables.h>
+#include <net/ip.h>
 
 struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
 				   const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
 }
 
 EXPORT_SYMBOL_GPL(inet6_lookup);
+
+static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+				     struct sock *sk, const __u16 lport,
+				     struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct in6_addr *daddr = &np->rcv_saddr;
+	const struct in6_addr *saddr = &np->daddr;
+	const int dif = sk->sk_bound_dev_if;
+	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
+						inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
+
+		tw = inet_twsk(sk2);
+
+		if(*((__u32 *)&(tw->tw_dport)) == ports		 &&
+		   sk2->sk_family	       == PF_INET6	 &&
+		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
+		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sk->sk_hash = hash;
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp != NULL) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw != NULL) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet6_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+					  np->daddr.s6_addr32,
+					  inet->dport);
+}
+
+int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+		       struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (snum == 0) {
+ 		const int low = sysctl_local_port_range[0];
+ 		const int high = sysctl_local_port_range[1];
+		const int range = high - low;
+ 		int i, port;
+		static u32 hint;
+		const u32 offset = hint + inet6_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet6_check_established(death_row,
+								       sk, port,
+								       &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+						     head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet6_hash(hinfo, sk);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb   = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+
+	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+		__inet6_hash(hinfo, sk);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet6_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1cf02765fb5..69cbe8a66d0 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -9,6 +9,7 @@
  *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -200,6 +201,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
 	return NULL;
 }
 
+EXPORT_SYMBOL_GPL(fl6_sock_lookup);
+
 void fl6_free_socklist(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -626,9 +629,7 @@ static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
 {
 	while(fl) {
 		seq_printf(seq,
-			   "%05X %-1d %-6d %-6d %-6ld %-8ld "
-			   "%02x%02x%02x%02x%02x%02x%02x%02x "
-			   "%-4d\n",
+			   "%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n",
 			   (unsigned)ntohl(fl->label),
 			   fl->share,
 			   (unsigned)fl->owner,
@@ -644,8 +645,8 @@ static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
 static int ip6fl_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Label S Owner  Users  Linger Expires  "
-			      "Dst                              Opt\n");
+		seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
+			   "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
 	else
 		ip6fl_fl_seq_show(seq, v);
 	return 0;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a6026d2787d..29f73592e68 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -48,7 +48,7 @@
 
 
 
-static inline int ip6_rcv_finish( struct sk_buff *skb) 
+inline int ip6_rcv_finish( struct sk_buff *skb) 
 {
 	if (skb->dst == NULL)
 		ip6_route_input(skb);
@@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
+	skb->h.raw = (u8 *)(hdr + 1);
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
 	pkt_len = ntohs(hdr->payload_len);
 
 	/* pkt_len may be zero if Jumbo payload option is present */
@@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
-		skb->h.raw = (u8*)(hdr+1);
-		if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
+		if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) {
 			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
 			return 0;
 		}
@@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb)
 	int nexthdr;
 	u8 hash;
 
-	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
-
 	/*
 	 *	Parse extension headers
 	 */
 
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	nhoff = offsetof(struct ipv6hdr, nexthdr);
-
-	/* Skip hop-by-hop options, they are already parsed. */
-	if (nexthdr == NEXTHDR_HOP) {
-		nhoff = sizeof(struct ipv6hdr);
-		nexthdr = skb->h.raw[0];
-		skb->h.raw += (skb->h.raw[1]+1)<<3;
-	}
-
 	rcu_read_lock();
 resubmit:
 	if (!pskb_pull(skb, skb->h.raw - skb->data))
 		goto discard;
+	nhoff = IP6CB(skb)->nhoff;
 	nexthdr = skb->nh.raw[nhoff];
 
 	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
@@ -194,7 +185,7 @@ resubmit:
 		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 
 			goto discard;
 		
-		ret = ipprot->handler(&skb, &nhoff);
+		ret = ipprot->handler(&skb);
 		if (ret > 0)
 			goto resubmit;
 		else if (ret == 0)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8523c76ebf7..efa3e72cfcf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -226,6 +226,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 	ipv6_addr_copy(&hdr->daddr, first_hop);
 
+	skb->priority = sk->sk_priority;
+
 	mtu = dst_mtu(dst);
 	if ((skb->len <= mtu) || ipfragok) {
 		IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
@@ -775,6 +777,8 @@ out_err_release:
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+
 static inline int ip6_ufo_append_data(struct sock *sk,
 			int getfrag(void *from, char *to, int offset, int len,
 			int odd, struct sk_buff *skb),
@@ -1180,6 +1184,8 @@ int ip6_push_pending_frames(struct sock *sk)
 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 	ipv6_addr_copy(&hdr->daddr, final_dst);
 
+	skb->priority = sk->sk_priority;
+
 	skb->dst = dst_clone(&rt->u.dst);
 	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e315d0f80af..92ead3cf956 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -21,6 +21,7 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sockios.h>
@@ -243,7 +244,7 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
 	if (dev == NULL)
 		return -ENOMEM;
 
-	t = dev->priv;
+	t = netdev_priv(dev);
 	dev->init = ip6ip6_tnl_dev_init;
 	t->parms = *p;
 
@@ -308,7 +309,7 @@ ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
 static void
 ip6ip6_tnl_dev_uninit(struct net_device *dev)
 {
-	struct ip6_tnl *t = dev->priv;
+	struct ip6_tnl *t = netdev_priv(dev);
 
 	if (dev == ip6ip6_fb_tnl_dev) {
 		write_lock_bh(&ip6ip6_lock);
@@ -510,7 +511,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
  **/
 
 static int 
-ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+ip6ip6_rcv(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
 	struct ipv6hdr *ipv6h;
@@ -623,7 +624,7 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
 static int 
 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->stat;
 	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
 	struct ipv6_txoptions *opt = NULL;
@@ -933,11 +934,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 				break;
 			}
 			if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
-				t = (struct ip6_tnl *) dev->priv;
+				t = netdev_priv(dev);
 			else if (err)
 				break;
 		} else
-			t = (struct ip6_tnl *) dev->priv;
+			t = netdev_priv(dev);
 
 		memcpy(&p, &t->parms, sizeof (p));
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
@@ -955,7 +956,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			break;
 		}
 		if (!create && dev != ip6ip6_fb_tnl_dev) {
-			t = (struct ip6_tnl *) dev->priv;
+			t = netdev_priv(dev);
 		}
 		if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
 			break;
@@ -991,12 +992,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			err = ip6ip6_tnl_locate(&p, &t, 0);
 			if (err)
 				break;
-			if (t == ip6ip6_fb_tnl_dev->priv) {
+			if (t == netdev_priv(ip6ip6_fb_tnl_dev)) {
 				err = -EPERM;
 				break;
 			}
 		} else {
-			t = (struct ip6_tnl *) dev->priv;
+			t = netdev_priv(dev);
 		}
 		err = unregister_netdevice(t->dev);
 		break;
@@ -1016,7 +1017,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 static struct net_device_stats *
 ip6ip6_tnl_get_stats(struct net_device *dev)
 {
-	return &(((struct ip6_tnl *) dev->priv)->stat);
+	return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
 }
 
 /**
@@ -1073,7 +1074,7 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
 static inline void
 ip6ip6_tnl_dev_init_gen(struct net_device *dev)
 {
-	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	struct ip6_tnl *t = netdev_priv(dev);
 	t->fl.proto = IPPROTO_IPV6;
 	t->dev = dev;
 	strcpy(t->parms.name, dev->name);
@@ -1087,7 +1088,7 @@ ip6ip6_tnl_dev_init_gen(struct net_device *dev)
 static int
 ip6ip6_tnl_dev_init(struct net_device *dev)
 {
-	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	struct ip6_tnl *t = netdev_priv(dev);
 	ip6ip6_tnl_dev_init_gen(dev);
 	ip6ip6_tnl_link_config(t);
 	return 0;
@@ -1103,7 +1104,7 @@ ip6ip6_tnl_dev_init(struct net_device *dev)
 static int 
 ip6ip6_fb_tnl_dev_init(struct net_device *dev)
 {
-	struct ip6_tnl *t = dev->priv;
+	struct ip6_tnl *t = netdev_priv(dev);
 	ip6ip6_tnl_dev_init_gen(dev);
 	dev_hold(dev);
 	tnls_wc[0] = t;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb1709..d511a884dad 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
 #include <linux/rtnetlink.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 
@@ -211,8 +212,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!x)
 		return;
 
-	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
-			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n",
 			spi, NIP6(iph->daddr));
 	xfrm_state_put(x);
 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718defe..f7142ba519a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -26,6 +26,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -163,17 +164,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 			sk_refcnt_debug_dec(sk);
 
 			if (sk->sk_protocol == IPPROTO_TCP) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 
 				local_bh_disable();
 				sock_prot_dec_use(sk->sk_prot);
 				sock_prot_inc_use(&tcp_prot);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
-				tp->af_specific = &ipv4_specific;
+				icsk->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 			} else {
 				local_bh_disable();
 				sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +318,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 		}
 
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
+		if (inet_sk(sk)->is_icsk) {
 			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				if (!((1 << sk->sk_state) &
 				      (TCPF_LISTEN | TCPF_CLOSE))
 				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+					icsk->icsk_ext_hdr_len =
+						opt->opt_flen + opt->opt_nflen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 				}
 			}
 			opt = xchg(&np->opt, opt);
@@ -380,14 +382,15 @@ sticky_done:
 			goto done;
 update:
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
+		if (inet_sk(sk)->is_icsk) {
 			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				if (!((1 << sk->sk_state) &
 				      (TCPF_LISTEN | TCPF_CLOSE))
 				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+					icsk->icsk_ext_hdr_len =
+						opt->opt_flen + opt->opt_nflen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 				}
 			}
 			opt = xchg(&np->opt, opt);
@@ -547,7 +550,7 @@ done:
 			retv = -ENOBUFS;
 			break;
 		}
-		gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+		gsf = kmalloc(optlen,GFP_KERNEL);
 		if (gsf == 0) {
 			retv = -ENOBUFS;
 			break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f15e04ad026..6c05c7978be 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -170,7 +170,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 #define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
 #define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
 
-#define IPV6_MLD_MAX_MSF	10
+#define IPV6_MLD_MAX_MSF	64
 
 int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
 
@@ -224,6 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 
 	mc_lst->ifindex = dev->ifindex;
 	mc_lst->sfmode = MCAST_EXCLUDE;
+	rwlock_init(&mc_lst->sflock);
 	mc_lst->sflist = NULL;
 
 	/*
@@ -360,6 +361,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	struct ip6_sf_socklist *psl;
 	int i, j, rv;
 	int leavegroup = 0;
+	int pmclocked = 0;
 	int err;
 
 	if (pgsr->gsr_group.ss_family != AF_INET6 ||
@@ -403,6 +405,9 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
+	write_lock_bh(&pmc->sflock);
+	pmclocked = 1;
+
 	psl = pmc->sflist;
 	if (!add) {
 		if (!psl)
@@ -444,8 +449,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
 		if (psl)
 			count += psl->sl_max;
-		newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
-			IP6_SFLSIZE(count), GFP_ATOMIC);
+		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -475,6 +479,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	/* update the interface list */
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
+	if (pmclocked)
+		write_unlock_bh(&pmc->sflock);
 	read_unlock_bh(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	in6_dev_put(idev);
@@ -510,6 +516,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	dev = idev->dev;
 
 	err = 0;
+	read_lock_bh(&ipv6_sk_mc_lock);
+
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
 		goto done;
@@ -526,8 +534,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		goto done;
 	}
 	if (gsf->gf_numsrc) {
-		newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
-				IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC);
+		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
+							  GFP_ATOMIC);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -549,6 +557,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		newpsl = NULL;
 		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
 	}
+
+	write_lock_bh(&pmc->sflock);
 	psl = pmc->sflist;
 	if (psl) {
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -558,8 +568,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
 	pmc->sflist = newpsl;
 	pmc->sfmode = gsf->gf_fmode;
+	write_unlock_bh(&pmc->sflock);
 	err = 0;
 done:
+	read_unlock_bh(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	in6_dev_put(idev);
 	dev_put(dev);
@@ -592,6 +604,11 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
+	/*
+	 * changes to the ipv6_mc_list require the socket lock and
+	 * a read lock on ip6_sk_mc_lock. We have the socket lock,
+	 * so reading the list is safe.
+	 */
 
 	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
 		if (pmc->ifindex != gsf->gf_interface)
@@ -614,6 +631,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
 		return -EFAULT;
 	}
+	/* changes to psl require the socket lock, a read lock on
+	 * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
+	 * have the socket lock, so reading here is safe.
+	 */
 	for (i=0; i<copycount; i++) {
 		struct sockaddr_in6 *psin6;
 		struct sockaddr_storage ss;
@@ -650,6 +671,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
 		read_unlock(&ipv6_sk_mc_lock);
 		return 1;
 	}
+	read_lock(&mc->sflock);
 	psl = mc->sflist;
 	if (!psl) {
 		rv = mc->sfmode == MCAST_EXCLUDE;
@@ -665,6 +687,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
 		if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
 			rv = 0;
 	}
+	read_unlock(&mc->sflock);
 	read_unlock(&ipv6_sk_mc_lock);
 
 	return rv;
@@ -744,7 +767,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC);
+	pmc = kmalloc(sizeof(*pmc), GFP_ATOMIC);
 	if (!pmc)
 		return;
 	memset(pmc, 0, sizeof(*pmc));
@@ -1068,23 +1091,64 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 	ma->mca_flags |= MAF_TIMER_RUNNING;
 }
 
-static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+/* mark EXCLUDE-mode sources */
+static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
+	struct in6_addr *srcs)
+{
+	struct ip6_sf_list *psf;
+	int i, scount;
+
+	scount = 0;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
+		for (i=0; i<nsrcs; i++) {
+			/* skip inactive filters */
+			if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+			    pmc->mca_sfcount[MCAST_EXCLUDE] !=
+			    psf->sf_count[MCAST_EXCLUDE])
+				continue;
+			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+				scount++;
+				break;
+			}
+		}
+	}
+	pmc->mca_flags &= ~MAF_GSQUERY;
+	if (scount == nsrcs)	/* all sources excluded */
+		return 0;
+	return 1;
+}
+
+static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	struct in6_addr *srcs)
 {
 	struct ip6_sf_list *psf;
 	int i, scount;
 
+	if (pmc->mca_sfmode == MCAST_EXCLUDE)
+		return mld_xmarksources(pmc, nsrcs, srcs);
+
+	/* mark INCLUDE-mode sources */
+
 	scount = 0;
 	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
 		if (scount == nsrcs)
 			break;
-		for (i=0; i<nsrcs; i++)
+		for (i=0; i<nsrcs; i++) {
 			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
 				psf->sf_gsresp = 1;
 				scount++;
 				break;
 			}
+		}
+	}
+	if (!scount) {
+		pmc->mca_flags &= ~MAF_GSQUERY;
+		return 0;
 	}
+	pmc->mca_flags |= MAF_GSQUERY;
+	return 1;
 }
 
 int igmp6_event_query(struct sk_buff *skb)
@@ -1167,7 +1231,7 @@ int igmp6_event_query(struct sk_buff *skb)
 		/* mark sources to include, if group & source-specific */
 		if (mlh2->nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset + 
-				mlh2->nsrcs * sizeof(struct in6_addr))) {
+			    ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) {
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
@@ -1203,10 +1267,9 @@ int igmp6_event_query(struct sk_buff *skb)
 				else
 					ma->mca_flags &= ~MAF_GSQUERY;
 			}
-			if (ma->mca_flags & MAF_GSQUERY)
-				mld_marksources(ma, ntohs(mlh2->nsrcs),
-					mlh2->srcs);
-			igmp6_group_queried(ma, max_delay);
+			if (!(ma->mca_flags & MAF_GSQUERY) ||
+			   mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs))
+				igmp6_group_queried(ma, max_delay);
 			spin_unlock_bh(&ma->mca_lock);
 			if (group_type != IPV6_ADDR_ANY)
 				break;
@@ -1231,6 +1294,11 @@ int igmp6_event_report(struct sk_buff *skb)
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		return 0;
 
+	/* send our report if the MC router may not have heard this report */
+	if (skb->pkt_type != PACKET_MULTICAST &&
+	    skb->pkt_type != PACKET_BROADCAST)
+		return 0;
+
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
 		return -EINVAL;
 
@@ -1276,7 +1344,18 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
 	case MLD2_MODE_IS_EXCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
-		return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
+		if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
+			if (pmc->mca_sfmode == MCAST_INCLUDE)
+				return 1;
+			/* don't include if this source is excluded
+			 * in all filters
+			 */
+			if (psf->sf_count[MCAST_INCLUDE])
+				return 0;
+			return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+				psf->sf_count[MCAST_EXCLUDE];
+		}
+		return 0;
 	case MLD2_CHANGE_TO_INCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
@@ -1445,7 +1524,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	struct mld2_report *pmr;
 	struct mld2_grec *pgr = NULL;
 	struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
-	int scount, first, isquery, truncate;
+	int scount, stotal, first, isquery, truncate;
 
 	if (pmc->mca_flags & MAF_NOREPORT)
 		return skb;
@@ -1455,25 +1534,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	truncate = type == MLD2_MODE_IS_EXCLUDE ||
 		    type == MLD2_CHANGE_TO_EXCLUDE;
 
+	stotal = scount = 0;
+
 	psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
 
-	if (!*psf_list) {
-		if (type == MLD2_ALLOW_NEW_SOURCES ||
-		    type == MLD2_BLOCK_OLD_SOURCES)
-			return skb;
-		if (pmc->mca_crcount || isquery) {
-			/* make sure we have room for group header and at
-			 * least one source.
-			 */
-			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
-			    sizeof(struct in6_addr)) {
-				mld_sendpack(skb);
-				skb = NULL; /* add_grhead will get a new one */
-			}
-			skb = add_grhead(skb, pmc, type, &pgr);
-		}
-		return skb;
-	}
+	if (!*psf_list)
+		goto empty_source;
+
 	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
@@ -1486,7 +1553,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 	}
 	first = 1;
-	scount = 0;
 	psf_prev = NULL;
 	for (psf=*psf_list; psf; psf=psf_next) {
 		struct in6_addr *psrc;
@@ -1520,7 +1586,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 		psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
 		*psrc = psf->sf_addr;
-		scount++;
+		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
 		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
 			psf->sf_crcount--;
@@ -1535,6 +1601,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 		psf_prev = psf;
 	}
+
+empty_source:
+	if (!stotal) {
+		if (type == MLD2_ALLOW_NEW_SOURCES ||
+		    type == MLD2_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->mca_crcount || isquery) {
+			/* make sure we have room for group header */
+			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
+				mld_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+	}
 	if (pgr)
 		pgr->grec_nsrcs = htons(scount);
 
@@ -1616,11 +1697,11 @@ static void mld_send_cr(struct inet6_dev *idev)
 			skb = add_grec(skb, pmc, dtype, 1, 1);
 		}
 		if (pmc->mca_crcount) {
-			pmc->mca_crcount--;
 			if (pmc->mca_sfmode == MCAST_EXCLUDE) {
 				type = MLD2_CHANGE_TO_INCLUDE;
 				skb = add_grec(skb, pmc, type, 1, 0);
 			}
+			pmc->mca_crcount--;
 			if (pmc->mca_crcount == 0) {
 				mld_clear_zeros(&pmc->mca_tomb);
 				mld_clear_zeros(&pmc->mca_sources);
@@ -1654,12 +1735,12 @@ static void mld_send_cr(struct inet6_dev *idev)
 
 		/* filter mode changes */
 		if (pmc->mca_crcount) {
-			pmc->mca_crcount--;
 			if (pmc->mca_sfmode == MCAST_EXCLUDE)
 				type = MLD2_CHANGE_TO_EXCLUDE;
 			else
 				type = MLD2_CHANGE_TO_INCLUDE;
 			skb = add_grec(skb, pmc, type, 0, 0);
+			pmc->mca_crcount--;
 		}
 		spin_unlock_bh(&pmc->mca_lock);
 	}
@@ -1855,7 +1936,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 		psf_prev = psf;
 	}
 	if (!psf) {
-		psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+		psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
 		if (!psf)
 			return -ENOBUFS;
 		memset(psf, 0, sizeof(*psf));
@@ -2018,6 +2099,9 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 {
 	int err;
 
+	/* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+	 * so no other readers or writers of iml or its sflist
+	 */
 	if (iml->sflist == 0) {
 		/* any-source empty exclude case */
 		return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
@@ -2289,7 +2373,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
 
 	seq_printf(seq,
-		   "%-4d %-15s %04x%04x%04x%04x%04x%04x%04x%04x %5d %08X %ld\n", 
+		   "%-4d %-15s " NIP6_SEQFMT " %5d %08X %ld\n", 
 		   state->dev->ifindex, state->dev->name,
 		   NIP6(im->mca_addr),
 		   im->mca_users, im->mca_flags,
@@ -2463,10 +2547,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
 			   "Source Address", "INC", "EXC");
 	} else {
 		seq_printf(seq,
-			   "%3d %6.6s "
-			   "%04x%04x%04x%04x%04x%04x%04x%04x "
-			   "%04x%04x%04x%04x%04x%04x%04x%04x "
-			   "%6lu %6lu\n",
+			   "%3d %6.6s " NIP6_SEQFMT " " NIP6_SEQFMT " %6lu %6lu\n",
 			   state->dev->ifindex, state->dev->name,
 			   NIP6(state->im->mca_addr),
 			   NIP6(psf->sf_addr),
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 305d9ee6d7d..cb8856b1d95 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -692,7 +692,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 		if (!(neigh->nud_state & NUD_VALID)) {
 			ND_PRINTK1(KERN_DEBUG
 				   "%s(): trying to ucast probe in NUD_INVALID: "
-				   "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+				   NIP6_FMT "\n",
 				   __FUNCTION__,
 				   NIP6(*target));
 		}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index f8626ebf90f..d750cfc019d 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,15 +1,12 @@
-#include <linux/config.h>
-#include <linux/init.h>
-
-#ifdef CONFIG_NETFILTER
-
 #include <linux/kernel.h>
+#include <linux/init.h>
 #include <linux/ipv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/dst.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/xfrm.h>
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
@@ -21,11 +18,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
 		{ .ip6_u =
 		  { .daddr = iph->daddr,
 		    .saddr = iph->saddr, } },
-		.proto = iph->nexthdr,
 	};
 
 	dst = ip6_route_output(skb->sk, &fl);
 
+#ifdef CONFIG_XFRM
+	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, &fl, AF_INET6) == 0)
+		if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
+			return -1;
+#endif
+
 	if (dst->error) {
 		IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -87,18 +90,10 @@ int __init ipv6_netfilter_init(void)
 	return nf_register_queue_rerouter(PF_INET6, &ip6_reroute);
 }
 
+/* This can be called from inet6_init() on errors, so it cannot
+ * be marked __exit. -DaveM
+ */
 void ipv6_netfilter_fini(void)
 {
 	nf_unregister_queue_rerouter(PF_INET6);
 }
-
-#else /* CONFIG_NETFILTER */
-int __init ipv6_netfilter_init(void)
-{
-	return 0;
-}
-
-void ipv6_netfilter_fini(void)
-{
-}
-#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 060d6120241..2d6f8ecbc27 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -41,6 +41,7 @@ config IP6_NF_QUEUE
 
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering/masq/NAT)"
+	depends on NETFILTER_XTABLES
 	help
 	  ip6tables is a general, extensible packet identification framework.
 	  Currently only the packet filtering and packet mangling subsystem
@@ -50,25 +51,6 @@ config IP6_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The simple matches.
-config IP6_NF_MATCH_LIMIT
-	tristate "limit match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  limit matching allows you to control the rate at which a rule can be
-	  matched: mainly useful in combination with the LOG target ("LOG
-	  target support", below) and to avoid some Denial of Service attacks.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP6_NF_MATCH_MAC
-	tristate "MAC address match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  mac matching allows you to match packets based on the source
-	  Ethernet address of the packet.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MATCH_RT
 	tristate "Routing header match support"
 	depends on IP6_NF_IPTABLES
@@ -124,16 +106,6 @@ config IP6_NF_MATCH_OWNER
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_MARK
-	tristate "netfilter MARK match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MATCH_IPV6HEADER
 	tristate "IPv6 Extension Headers Match"
 	depends on IP6_NF_IPTABLES
@@ -151,15 +123,6 @@ config IP6_NF_MATCH_AHESP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_LENGTH
-	tristate "Packet Length match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  This option allows you to match the length of a packet against a
-	  specific value or range of values.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MATCH_EUI64
 	tristate "EUI64 address check"
 	depends on IP6_NF_IPTABLES
@@ -170,12 +133,13 @@ config IP6_NF_MATCH_EUI64
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_PHYSDEV
-	tristate "Physdev match support"
-	depends on IP6_NF_IPTABLES && BRIDGE_NETFILTER
+config IP6_NF_MATCH_POLICY
+	tristate "IPsec policy match support"
+	depends on IP6_NF_IPTABLES && XFRM
 	help
-	  Physdev packet matching matches against the physical bridge ports
-	  the IP packet arrived on or will leave by.
+	  Policy matching allows you to match packets based on the
+	  IPsec policy that was used during decapsulation/will
+	  be used during encapsulation.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -209,17 +173,6 @@ config IP6_NF_TARGET_REJECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_TARGET_NFQUEUE
-	tristate "NFQUEUE Target Support"
-	depends on IP_NF_IPTABLES
-	help
-	  This Target replaced the old obsolete QUEUE target.
-
-	  As opposed to QUEUE, it supports 65535 different queues,
-	  not just one.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MANGLE
 	tristate "Packet mangling"
 	depends on IP6_NF_IPTABLES
@@ -230,19 +183,6 @@ config IP6_NF_MANGLE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_TARGET_MARK
-	tristate "MARK target support"
-	depends on IP6_NF_MANGLE
-	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_TARGET_HL
 	tristate  'HL (hoplimit) target support'
 	depends on IP6_NF_MANGLE
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 9ab5b2ca1f5..db6073c9416 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,24 +4,18 @@
 
 # Link order matters here.
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
-obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o
-obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o
-obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o
-obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o
 obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
 obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
 obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
 obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
 obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
+obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o
 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
 obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
 obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
-obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
 obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
-obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
 obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
-obj-$(CONFIG_IP6_NF_TARGET_NFQUEUE) += ip6t_NFQUEUE.o
 obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
 obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c4..847068fd336 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -13,15 +13,19 @@
  * 	  a table
  * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
  *      - new extension header parser code
+ * 15 Oct 2005 Harald Welte <laforge@netfilter.org>
+ * 	- Unification of {ip,ip6}_tables into x_tables
+ * 	- Removed tcp and udp code, since it's not ipv6 specific
  */
+
+#include <linux/capability.h>
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/skbuff.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/module.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
 #include <linux/icmpv6.h>
 #include <net/ipv6.h>
 #include <asm/uaccess.h>
@@ -30,6 +34,7 @@
 #include <linux/cpumask.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -64,13 +69,8 @@ do {								\
 #else
 #define IP_NF_ASSERT(x)
 #endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
-static DECLARE_MUTEX(ip6t_mutex);
 
-/* Must have mutex */
-#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
 #include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
@@ -86,55 +86,22 @@ static DECLARE_MUTEX(ip6t_mutex);
    context stops packets coming through and allows user context to read
    the counters or update the rules.
 
-   To be cache friendly on SMP, we arrange them like so:
-   [ n-entries ]
-   ... cache-align padding ...
-   [ n-entries ]
-
    Hence the start of any table is given by get_table() below.  */
 
-/* The table itself */
-struct ip6t_table_info
-{
-	/* Size per table */
-	unsigned int size;
-	/* Number of entries: FIXME. --RR */
-	unsigned int number;
-	/* Initial number of entries. Needed for module usage count */
-	unsigned int initial_entries;
-
-	/* Entry points and underflows */
-	unsigned int hook_entry[NF_IP6_NUMHOOKS];
-	unsigned int underflow[NF_IP6_NUMHOOKS];
-
-	/* ip6t_entry tables: one per CPU */
-	char entries[0] ____cacheline_aligned;
-};
-
-static LIST_HEAD(ip6t_target);
-static LIST_HEAD(ip6t_match);
-static LIST_HEAD(ip6t_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
 #if 0
 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
 #endif
 
-static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
-			      struct in6_addr addr2)
+int
+ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask,
+                   const struct in6_addr *addr2)
 {
 	int i;
 	for( i = 0; i < 16; i++){
-		if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
-		   (addr2.s6_addr[i] & mask.s6_addr[i]))
+		if((addr1->s6_addr[i] & mask->s6_addr[i]) != 
+		   (addr2->s6_addr[i] & mask->s6_addr[i]))
 			return 1;
 	}
 	return 0;
@@ -168,10 +135,10 @@ ip6_packet_match(const struct sk_buff *skb,
 
 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
 
-	if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
-		  IP6T_INV_SRCIP)
-	    || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
-		     IP6T_INV_DSTIP)) {
+	if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk,
+	                             &ip6info->src), IP6T_INV_SRCIP)
+	    || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk,
+	                                &ip6info->dst), IP6T_INV_DSTIP)) {
 		dprintf("Source or dest mismatch.\n");
 /*
 		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -214,69 +181,21 @@ ip6_packet_match(const struct sk_buff *skb,
 
 	/* look for the desired protocol header */
 	if((ip6info->flags & IP6T_F_PROTO)) {
-		u_int8_t currenthdr = ipv6->nexthdr;
-		struct ipv6_opt_hdr _hdr, *hp;
-		u_int16_t ptr;		/* Header offset in skb */
-		u_int16_t hdrlen;	/* Header */
-		u_int16_t _fragoff = 0, *fp = NULL;
-
-		ptr = IPV6_HDR_LEN;
-
-		while (ip6t_ext_hdr(currenthdr)) {
-	                /* Is there enough space for the next ext header? */
-	                if (skb->len - ptr < IPV6_OPTHDR_LEN)
-	                        return 0;
-
-			/* NONE or ESP: there isn't protocol part */
-			/* If we want to count these packets in '-p all',
-			 * we will change the return 0 to 1*/
-			if ((currenthdr == IPPROTO_NONE) || 
-				(currenthdr == IPPROTO_ESP))
-				break;
-
-			hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-			BUG_ON(hp == NULL);
-
-			/* Size calculation */
-	                if (currenthdr == IPPROTO_FRAGMENT) {
-				fp = skb_header_pointer(skb,
-						   ptr+offsetof(struct frag_hdr,
-								frag_off),
-						   sizeof(_fragoff),
-						   &_fragoff);
-				if (fp == NULL)
-					return 0;
-
-				_fragoff = ntohs(*fp) & ~0x7;
-	                        hdrlen = 8;
-	                } else if (currenthdr == IPPROTO_AH)
-	                        hdrlen = (hp->hdrlen+2)<<2;
-	                else
-	                        hdrlen = ipv6_optlen(hp);
-
-			currenthdr = hp->nexthdr;
-	                ptr += hdrlen;
-			/* ptr is too large */
-	                if ( ptr > skb->len ) 
-				return 0;
-			if (_fragoff) {
-				if (ip6t_ext_hdr(currenthdr))
-					return 0;
-				break;
-			}
-		}
+		int protohdr;
+		unsigned short _frag_off;
 
-		*protoff = ptr;
-		*fragoff = _fragoff;
+		protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
+		if (protohdr < 0)
+			return 0;
 
-		/* currenthdr contains the protocol header */
+		*fragoff = _frag_off;
 
 		dprintf("Packet protocol %hi ?= %s%hi.\n",
-				currenthdr, 
+				protohdr, 
 				ip6info->invflags & IP6T_INV_PROTO ? "!":"",
 				ip6info->proto);
 
-		if (ip6info->proto == currenthdr) {
+		if (ip6info->proto == protohdr) {
 			if(ip6info->invflags & IP6T_INV_PROTO) {
 				return 0;
 			}
@@ -351,7 +270,7 @@ ip6t_do_table(struct sk_buff **pskb,
 	      unsigned int hook,
 	      const struct net_device *in,
 	      const struct net_device *out,
-	      struct ip6t_table *table,
+	      struct xt_table *table,
 	      void *userdata)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
@@ -363,6 +282,7 @@ ip6t_do_table(struct sk_buff **pskb,
 	const char *indev, *outdev;
 	void *table_base;
 	struct ip6t_entry *e, *back;
+	struct xt_table_info *private;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -375,10 +295,10 @@ ip6t_do_table(struct sk_buff **pskb,
 	 * match it. */
 
 	read_lock_bh(&table->lock);
+	private = table->private;
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private, smp_processor_id());
-	e = get_entry(table_base, table->private->hook_entry[hook]);
+	table_base = (void *)private->entries[smp_processor_id()];
+	e = get_entry(table_base, private->hook_entry[hook]);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	/* Check noone else using our table */
@@ -394,7 +314,7 @@ ip6t_do_table(struct sk_buff **pskb,
 #endif
 
 	/* For return from builtin chain */
-	back = get_entry(table_base, table->private->underflow[hook]);
+	back = get_entry(table_base, private->underflow[hook]);
 
 	do {
 		IP_NF_ASSERT(e);
@@ -494,145 +414,6 @@ ip6t_do_table(struct sk_buff **pskb,
 #endif
 }
 
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-static inline struct ip6t_table *find_table_lock(const char *name)
-{
-	struct ip6t_table *t;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ip6t_tables, list)
-		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
-			return t;
-	up(&ip6t_mutex);
-	return NULL;
-}
-
-/* Find match, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ip6t_match *find_match(const char *name, u8 revision)
-{
-	struct ip6t_match *m;
-	int err = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(m, &ip6t_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision == revision) {
-				if (try_module_get(m->me)) {
-					up(&ip6t_mutex);
-					return m;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ip6t_mutex);
-	return ERR_PTR(err);
-}
-
-/* Find target, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ip6t_target *find_target(const char *name, u8 revision)
-{
-	struct ip6t_target *t;
-	int err = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ip6t_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision == revision) {
-				if (try_module_get(t->me)) {
-					up(&ip6t_mutex);
-					return t;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ip6t_mutex);
-	return ERR_PTR(err);
-}
-
-struct ip6t_target *ip6t_find_target(const char *name, u8 revision)
-{
-	struct ip6t_target *target;
-
-	target = try_then_request_module(find_target(name, revision),
-					 "ip6t_%s", name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
-}
-
-static int match_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ip6t_match *m;
-	int have_rev = 0;
-
-	list_for_each_entry(m, &ip6t_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision > *bestp)
-				*bestp = m->revision;
-			if (m->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ip6t_target *t;
-	int have_rev = 0;
-
-	list_for_each_entry(t, &ip6t_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision > *bestp)
-				*bestp = t->revision;
-			if (t->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-/* Returns true or fals (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
-				int (*revfn)(const char *, u8, int *),
-				int *err)
-{
-	int have_rev, best = -1;
-
-	if (down_interruptible(&ip6t_mutex) != 0) {
-		*err = -EINTR;
-		return 1;
-	}
-	have_rev = revfn(name, revision, &best);
-	up(&ip6t_mutex);
-
-	/* Nothing at all?  Return 0 to try loading module. */
-	if (best == -1) {
-		*err = -ENOENT;
-		return 0;
-	}
-
-	*err = best;
-	if (!have_rev)
-		*err = -EPROTONOSUPPORT;
-	return 1;
-}
-
-
 /* All zeroes == unconditional rule. */
 static inline int
 unconditional(const struct ip6t_ip6 *ipv6)
@@ -649,7 +430,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct xt_table_info *newinfo,
+		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -658,7 +440,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 	for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ip6t_entry *e
-			= (struct ip6t_entry *)(newinfo->entries + pos);
+			= (struct ip6t_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -708,13 +490,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 						goto next;
 
 					e = (struct ip6t_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct ip6t_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -731,7 +513,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 					newpos = pos + e->next_offset;
 				}
 				e = (struct ip6t_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -794,11 +576,11 @@ check_match(struct ip6t_entry_match *m,
 {
 	struct ip6t_match *match;
 
-	match = try_then_request_module(find_match(m->u.user.name,
-						   m->u.user.revision),
+	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
+			      		m->u.user.revision),
 					"ip6t_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
-		duprintf("check_match: `%s' not found\n", m->u.user.name);
+	  	duprintf("check_match: `%s' not found\n", m->u.user.name);
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
@@ -839,8 +621,9 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 		goto cleanup_matches;
 
 	t = ip6t_get_target(e);
-	target = try_then_request_module(find_target(t->u.user.name,
-						     t->u.user.revision),
+	target = try_then_request_module(xt_find_target(AF_INET6,
+							t->u.user.name,
+							t->u.user.revision),
 					 "ip6t_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
 		duprintf("check_entry: `%s' not found\n", t->u.user.name);
@@ -876,7 +659,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 
 static inline int
 check_entry_size_and_hooks(struct ip6t_entry *e,
-			   struct ip6t_table_info *newinfo,
+			   struct xt_table_info *newinfo,
 			   unsigned char *base,
 			   unsigned char *limit,
 			   const unsigned int *hook_entries,
@@ -910,7 +693,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
            < 0 (not IP6T_RETURN). --RR */
 
 	/* Clear counters and comefrom */
-	e->counters = ((struct ip6t_counters) { 0, 0 });
+	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
 
 	(*i)++;
@@ -940,7 +723,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 static int
 translate_table(const char *name,
 		unsigned int valid_hooks,
-		struct ip6t_table_info *newinfo,
+		struct xt_table_info *newinfo,
+		void *entry0,
 		unsigned int size,
 		unsigned int number,
 		const unsigned int *hook_entries,
@@ -961,11 +745,11 @@ translate_table(const char *name,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry_size_and_hooks,
 				newinfo,
-				newinfo->entries,
-				newinfo->entries + size,
+				entry0,
+				entry0 + size,
 				hook_entries, underflows, &i);
 	if (ret != 0)
 		return ret;
@@ -993,95 +777,79 @@ translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks))
+	if (!mark_source_chains(newinfo, valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry, name, size, &i);
 
 	if (ret != 0) {
-		IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
 				  cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
 }
 
-static struct ip6t_table_info *
-replace_table(struct ip6t_table *table,
-	      unsigned int num_counters,
-	      struct ip6t_table_info *newinfo,
-	      int *error)
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ip6t_entry *e,
+		     struct xt_counters total[],
+		     unsigned int *i)
 {
-	struct ip6t_table_info *oldinfo;
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	{
-		struct ip6t_entry *table_base;
-		unsigned int i;
-
-		for_each_cpu(i) {
-			table_base =
-				(void *)newinfo->entries
-				+ TABLE_OFFSET(newinfo, i);
-
-			table_base->comefrom = 0xdead57ac;
-		}
-	}
-#endif
-
-	/* Do the substitution. */
-	write_lock_bh(&table->lock);
-	/* Check inside lock: is the old number correct? */
-	if (num_counters != table->private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
-			 num_counters, table->private->number);
-		write_unlock_bh(&table->lock);
-		*error = -EAGAIN;
-		return NULL;
-	}
-	oldinfo = table->private;
-	table->private = newinfo;
-	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
-	return oldinfo;
+	(*i)++;
+	return 0;
 }
 
-/* Gets counters. */
 static inline int
-add_entry_to_counter(const struct ip6t_entry *e,
+set_entry_to_counter(const struct ip6t_entry *e,
 		     struct ip6t_counters total[],
 		     unsigned int *i)
 {
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
 	(*i)++;
 	return 0;
 }
 
 static void
-get_counters(const struct ip6t_table_info *t,
-	     struct ip6t_counters counters[])
+get_counters(const struct xt_table_info *t,
+	     struct xt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	IP6T_ENTRY_ITERATE(t->entries[curcpu],
+			   t->size,
+			   set_entry_to_counter,
+			   counters,
+			   &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		IP6T_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
@@ -1091,31 +859,33 @@ get_counters(const struct ip6t_table_info *t,
 
 static int
 copy_entries_to_user(unsigned int total_size,
-		     struct ip6t_table *table,
+		     struct xt_table *table,
 		     void __user *userptr)
 {
 	unsigned int off, num, countersize;
 	struct ip6t_entry *e;
-	struct ip6t_counters *counters;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
-	countersize = sizeof(struct ip6t_counters) * table->private->number;
+	countersize = sizeof(struct xt_counters) * private->number;
 	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
-	get_counters(table->private, counters);
+	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	/* choose the copy that is on ourc node/cpu */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -1127,7 +897,7 @@ copy_entries_to_user(unsigned int total_size,
 		struct ip6t_entry_match *m;
 		struct ip6t_entry_target *t;
 
-		e = (struct ip6t_entry *)(table->private->entries + off);
+		e = (struct ip6t_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct ip6t_entry, counters),
 				 &counters[num],
@@ -1173,23 +943,22 @@ get_entries(const struct ip6t_get_entries *entries,
 	    struct ip6t_get_entries __user *uptr)
 {
 	int ret;
-	struct ip6t_table *t;
+	struct xt_table *t;
 
-	t = find_table_lock(entries->name);
+	t = xt_find_table_lock(AF_INET6, entries->name);
 	if (t && !IS_ERR(t)) {
-		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
+		struct xt_table_info *private = t->private;
+		duprintf("t->private->number = %u\n", private->number);
+		if (entries->size == private->size)
+			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
-				 entries->size);
+				 private->size, entries->size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
-		up(&ip6t_mutex);
+		xt_table_unlock(t);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
 
@@ -1201,45 +970,41 @@ do_replace(void __user *user, unsigned int len)
 {
 	int ret;
 	struct ip6t_replace tmp;
-	struct ip6t_table *t;
-	struct ip6t_table_info *newinfo, *oldinfo;
-	struct ip6t_counters *counters;
+	struct xt_table *t;
+	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return -ENOMEM;
-
-	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(tmp.size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
+	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
 
 	duprintf("ip_tables: Translated table\n");
 
-	t = try_then_request_module(find_table_lock(tmp.name),
+	t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
 				    "ip6table_%s", tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1254,7 +1019,7 @@ do_replace(void __user *user, unsigned int len)
 		goto put_module;
 	}
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -1271,24 +1036,25 @@ do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	xt_free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
+			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
-	up(&ip6t_mutex);
+	xt_table_unlock(t);
 	return ret;
 
  put_module:
 	module_put(t->me);
-	up(&ip6t_mutex);
+	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	xt_free_table_info(newinfo);
 	return ret;
 }
 
@@ -1296,7 +1062,7 @@ do_replace(void __user *user, unsigned int len)
  * and everything is OK. */
 static inline int
 add_counter_to_entry(struct ip6t_entry *e,
-		     const struct ip6t_counters addme[],
+		     const struct xt_counters addme[],
 		     unsigned int *i)
 {
 #if 0
@@ -1318,14 +1084,16 @@ static int
 do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct ip6t_counters_info tmp, *paddc;
-	struct ip6t_table *t;
+	struct xt_counters_info tmp, *paddc;
+	struct xt_table_info *private;
+	struct xt_table *t;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
 	paddc = vmalloc(len);
@@ -1337,27 +1105,30 @@ do_add_counters(void __user *user, unsigned int len)
 		goto free;
 	}
 
-	t = find_table_lock(tmp.name);
+	t = xt_find_table_lock(AF_INET6, tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
 
 	write_lock_bh(&t->lock);
-	if (t->private->number != paddc->num_counters) {
+	private = t->private;
+	if (private->number != paddc->num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
-	IP6T_ENTRY_ITERATE(t->private->entries,
-			  t->private->size,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = private->entries[smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_entry,
+			  private->size,
 			  add_counter_to_entry,
 			  paddc->counters,
 			  &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
-	up(&ip6t_mutex);
+	xt_table_unlock(t);
 	module_put(t->me);
  free:
 	vfree(paddc);
@@ -1401,7 +1172,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	switch (cmd) {
 	case IP6T_SO_GET_INFO: {
 		char name[IP6T_TABLE_MAXNAMELEN];
-		struct ip6t_table *t;
+		struct xt_table *t;
 
 		if (*len != sizeof(struct ip6t_getinfo)) {
 			duprintf("length %u != %u\n", *len,
@@ -1416,25 +1187,26 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		}
 		name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
 
-		t = try_then_request_module(find_table_lock(name),
+		t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
 					    "ip6table_%s", name);
 		if (t && !IS_ERR(t)) {
 			struct ip6t_getinfo info;
+			struct xt_table_info *private = t->private;
 
 			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
+			memcpy(info.hook_entry, private->hook_entry,
 			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
+			memcpy(info.underflow, private->underflow,
 			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
+			info.num_entries = private->number;
+			info.size = private->size;
 			memcpy(info.name, name, sizeof(info.name));
 
 			if (copy_to_user(user, &info, *len) != 0)
 				ret = -EFAULT;
 			else
 				ret = 0;
-			up(&ip6t_mutex);
+			xt_table_unlock(t);
 			module_put(t->me);
 		} else
 			ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1461,7 +1233,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	case IP6T_SO_GET_REVISION_MATCH:
 	case IP6T_SO_GET_REVISION_TARGET: {
 		struct ip6t_get_revision rev;
-		int (*revfn)(const char *, u8, int *);
+		int target;
 
 		if (*len != sizeof(rev)) {
 			ret = -EINVAL;
@@ -1473,12 +1245,13 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		}
 
 		if (cmd == IP6T_SO_GET_REVISION_TARGET)
-			revfn = target_revfn;
+			target = 1;
 		else
-			revfn = match_revfn;
+			target = 0;
 
-		try_then_request_module(find_revision(rev.name, rev.revision,
-						      revfn, &ret),
+		try_then_request_module(xt_find_revision(AF_INET6, rev.name,
+							 rev.revision,
+							 target, &ret),
 					"ip6t_%s", rev.name);
 		break;
 	}
@@ -1491,308 +1264,52 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-/* Registration hooks for targets. */
-int
-ip6t_register_target(struct ip6t_target *target)
-{
-	int ret;
-
-	ret = down_interruptible(&ip6t_mutex);
-	if (ret != 0)
-		return ret;
-	list_add(&target->list, &ip6t_target);
-	up(&ip6t_mutex);
-	return ret;
-}
-
-void
-ip6t_unregister_target(struct ip6t_target *target)
-{
-	down(&ip6t_mutex);
-	LIST_DELETE(&ip6t_target, target);
-	up(&ip6t_mutex);
-}
-
-int
-ip6t_register_match(struct ip6t_match *match)
-{
-	int ret;
-
-	ret = down_interruptible(&ip6t_mutex);
-	if (ret != 0)
-		return ret;
-
-	list_add(&match->list, &ip6t_match);
-	up(&ip6t_mutex);
-
-	return ret;
-}
-
-void
-ip6t_unregister_match(struct ip6t_match *match)
-{
-	down(&ip6t_mutex);
-	LIST_DELETE(&ip6t_match, match);
-	up(&ip6t_mutex);
-}
-
-int ip6t_register_table(struct ip6t_table *table,
+int ip6t_register_table(struct xt_table *table,
 			const struct ip6t_replace *repl)
 {
 	int ret;
-	struct ip6t_table_info *newinfo;
-	static struct ip6t_table_info bootstrap
+	struct xt_table_info *newinfo;
+	static struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(repl->size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	memcpy(newinfo->entries, repl->entries, repl->size);
+	/* choose the copy on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
 	if (ret != 0) {
-		vfree(newinfo);
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	ret = down_interruptible(&ip6t_mutex);
-	if (ret != 0) {
-		vfree(newinfo);
+	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&ip6t_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
-
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
-
-	duprintf("table->private->number = %u\n",
-		 table->private->number);
-
-	/* save number of initial entries */
-	table->private->initial_entries = table->private->number;
-
-	rwlock_init(&table->lock);
-	list_prepend(&ip6t_tables, table);
-
- unlock:
-	up(&ip6t_mutex);
-	return ret;
-
- free_unlock:
-	vfree(newinfo);
-	goto unlock;
-}
-
-void ip6t_unregister_table(struct ip6t_table *table)
-{
-	down(&ip6t_mutex);
-	LIST_DELETE(&ip6t_tables, table);
-	up(&ip6t_mutex);
-
-	/* Decrease module usage counts and free resources */
-	IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
-			  cleanup_entry, NULL);
-	vfree(table->private);
-}
-
-/* Returns 1 if the port is matched by the range, 0 otherwise */
-static inline int
-port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
-{
-	int ret;
-
-	ret = (port >= min && port <= max) ^ invert;
-	return ret;
-}
-
-static int
-tcp_find_option(u_int8_t option,
-		const struct sk_buff *skb,
-		unsigned int tcpoff,
-		unsigned int optlen,
-		int invert,
-		int *hotdrop)
-{
-	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
-	unsigned int i;
-
-	duprintf("tcp_match: finding option\n");
-	if (!optlen)
-		return invert;
-	/* If we don't have the whole header, drop packet. */
-	op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
-				_opt);
-	if (op == NULL) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	for (i = 0; i < optlen; ) {
-		if (op[i] == option) return !invert;
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
-	}
-
-	return invert;
-}
-
-static int
-tcp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  int *hotdrop)
-{
-	struct tcphdr _tcph, *th;
-	const struct ip6t_tcp *tcpinfo = matchinfo;
-
-	if (offset) {
-		/* To quote Alan:
-
-		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
-		   causes this. Its a cracker trying to break in by doing a
-		   flag overwrite to pass the direction checks.
-		*/
-		if (offset == 1) {
-			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = 1;
-		}
-		/* Must not be a fragment. */
-		return 0;
-	}
-
-#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
-
-	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
-			ntohs(th->source),
-			!!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
-		return 0;
-	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
-			ntohs(th->dest),
-			!!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
-		return 0;
-	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
-		      == tcpinfo->flg_cmp,
-		      IP6T_TCP_INV_FLAGS))
-		return 0;
-	if (tcpinfo->option) {
-		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = 1;
-			return 0;
-		}
-		if (!tcp_find_option(tcpinfo->option, skb, protoff,
-				     th->doff*4 - sizeof(*th),
-				     tcpinfo->invflags & IP6T_TCP_INV_OPTION,
-				     hotdrop))
-			return 0;
-	}
-	return 1;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-tcp_checkentry(const char *tablename,
-	       const struct ip6t_ip6 *ipv6,
-	       void *matchinfo,
-	       unsigned int matchsize,
-	       unsigned int hook_mask)
-{
-	const struct ip6t_tcp *tcpinfo = matchinfo;
-
-	/* Must specify proto == TCP, and no unknown invflags */
-	return ipv6->proto == IPPROTO_TCP
-		&& !(ipv6->invflags & IP6T_INV_PROTO)
-		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
-		&& !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
+	return 0;
 }
 
-static int
-udp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  int *hotdrop)
+void ip6t_unregister_table(struct xt_table *table)
 {
-	struct udphdr _udph, *uh;
-	const struct ip6t_udp *udpinfo = matchinfo;
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
 
-	/* Must not be a fragment. */
-	if (offset)
-		return 0;
-
-	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
-	if (uh == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	return port_match(udpinfo->spts[0], udpinfo->spts[1],
-			  ntohs(uh->source),
-			  !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
-		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
-			      ntohs(uh->dest),
-			      !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-udp_checkentry(const char *tablename,
-	       const struct ip6t_ip6 *ipv6,
-	       void *matchinfo,
-	       unsigned int matchinfosize,
-	       unsigned int hook_mask)
-{
-	const struct ip6t_udp *udpinfo = matchinfo;
+	private = xt_unregister_table(table);
 
-	/* Must specify proto == UDP, and no unknown invflags */
-	if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
-		duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
-			 IPPROTO_UDP);
-		return 0;
-	}
-	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
-		duprintf("ip6t_udp: matchsize %u != %u\n",
-			 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
-		return 0;
-	}
-	if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
-		duprintf("ip6t_udp: unknown flags %X\n",
-			 udpinfo->invflags);
-		return 0;
-	}
-
-	return 1;
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	xt_free_table_info(private);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1840,11 +1357,12 @@ icmp6_match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 icmp6_checkentry(const char *tablename,
-	   const struct ip6t_ip6 *ipv6,
+	   const void *entry,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
+	const struct ip6t_ip6 *ipv6 = entry;
 	const struct ip6t_icmp *icmpinfo = matchinfo;
 
 	/* Must specify proto == ICMP, and no unknown invflags */
@@ -1874,187 +1392,78 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.get		= do_ip6t_get_ctl,
 };
 
-static struct ip6t_match tcp_matchstruct = {
-	.name		= "tcp",
-	.match		= &tcp_match,
-	.checkentry	= &tcp_checkentry,
-};
-
-static struct ip6t_match udp_matchstruct = {
-	.name		= "udp",
-	.match		= &udp_match,
-	.checkentry	= &udp_checkentry,
-};
-
 static struct ip6t_match icmp6_matchstruct = {
 	.name		= "icmp6",
 	.match		= &icmp6_match,
 	.checkentry	= &icmp6_checkentry,
 };
 
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const char *i,
-			     off_t start_offset, char *buffer, int length,
-			     off_t *pos, unsigned int *count)
-{
-	if ((*count)++ >= start_offset) {
-		unsigned int namelen;
-
-		namelen = sprintf(buffer + *pos, "%s\n",
-				  i + sizeof(struct list_head));
-		if (*pos + namelen > length) {
-			/* Stop iterating */
-			return 1;
-		}
-		*pos += namelen;
-	}
-	return 0;
-}
-
-static inline int print_target(const struct ip6t_target *t,
-                               off_t start_offset, char *buffer, int length,
-                               off_t *pos, unsigned int *count)
-{
-	if (t == &ip6t_standard_target || t == &ip6t_error_target)
-		return 0;
-	return print_name((char *)t, start_offset, buffer, length, pos, count);
-}
-
-static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ip6t_tables, print_name, char *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ip6t_mutex);
-
-	/* `start' hack - see fs/proc/generic.c line ~105 */
-	*start=(char *)((unsigned long)count-offset);
-	return pos;
-}
-
-static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ip6t_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ip6t_match, print_name, char *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ip6t_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static const struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
-{ { "ip6_tables_names", ip6t_get_tables },
-  { "ip6_tables_targets", ip6t_get_targets },
-  { "ip6_tables_matches", ip6t_get_matches },
-  { NULL, NULL} };
-#endif /*CONFIG_PROC_FS*/
-
 static int __init init(void)
 {
 	int ret;
 
+	xt_proto_init(AF_INET6);
+
 	/* Noone else will be downing sem now, so we won't sleep */
-	down(&ip6t_mutex);
-	list_append(&ip6t_target, &ip6t_standard_target);
-	list_append(&ip6t_target, &ip6t_error_target);
-	list_append(&ip6t_match, &tcp_matchstruct);
-	list_append(&ip6t_match, &udp_matchstruct);
-	list_append(&ip6t_match, &icmp6_matchstruct);
-	up(&ip6t_mutex);
+	xt_register_target(AF_INET6, &ip6t_standard_target);
+	xt_register_target(AF_INET6, &ip6t_error_target);
+	xt_register_match(AF_INET6, &icmp6_matchstruct);
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ip6t_sockopts);
 	if (ret < 0) {
 		duprintf("Unable to register sockopts.\n");
+		xt_proto_fini(AF_INET6);
 		return ret;
 	}
 
-#ifdef CONFIG_PROC_FS
-	{
-		struct proc_dir_entry *proc;
-		int i;
-
-		for (i = 0; ip6t_proc_entry[i].name; i++) {
-			proc = proc_net_create(ip6t_proc_entry[i].name, 0,
-					       ip6t_proc_entry[i].get_info);
-			if (!proc) {
-				while (--i >= 0)
-				       proc_net_remove(ip6t_proc_entry[i].name);
-				nf_unregister_sockopt(&ip6t_sockopts);
-				return -ENOMEM;
-			}
-			proc->owner = THIS_MODULE;
-		}
-	}
-#endif
-
-	printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
+	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 }
 
 static void __exit fini(void)
 {
 	nf_unregister_sockopt(&ip6t_sockopts);
-#ifdef CONFIG_PROC_FS
-	{
-		int i;
-		for (i = 0; ip6t_proc_entry[i].name; i++)
-			proc_net_remove(ip6t_proc_entry[i].name);
-	}
-#endif
+	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
+	xt_unregister_target(AF_INET6, &ip6t_error_target);
+	xt_unregister_target(AF_INET6, &ip6t_standard_target);
+	xt_proto_fini(AF_INET6);
 }
 
 /*
- * find specified header up to transport protocol header.
- * If found target header, the offset to the header is set to *offset
- * and return 0. otherwise, return -1.
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
  *
- * Notes: - non-1st Fragment Header isn't skipped.
- *	  - ESP header isn't skipped.
- *	  - The target header may be trancated.
  */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+		  int target, unsigned short *fragoff)
 {
 	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
 	u8 nexthdr = skb->nh.ipv6h->nexthdr;
 	unsigned int len = skb->len - start;
 
+	if (fragoff)
+		*fragoff = 0;
+
 	while (nexthdr != target) {
 		struct ipv6_opt_hdr _hdr, *hp;
 		unsigned int hdrlen;
 
-		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+			if (target < 0)
+				break;
 			return -1;
+		}
+
 		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return -1;
@@ -2068,8 +1477,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
 			if (fp == NULL)
 				return -1;
 
-			if (ntohs(*fp) & ~0x7)
+			_frag_off = ntohs(*fp) & ~0x7;
+			if (_frag_off) {
+				if (target < 0 &&
+				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
+				     nexthdr == NEXTHDR_NONE)) {
+					if (fragoff)
+						*fragoff = _frag_off;
+					return hp->nexthdr;
+				}
 				return -1;
+			}
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH)
 			hdrlen = (hp->hdrlen + 2) << 2; 
@@ -2082,18 +1500,15 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
 	}
 
 	*offset = start;
-	return 0;
+	return nexthdr;
 }
 
 EXPORT_SYMBOL(ip6t_register_table);
 EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_do_table);
-EXPORT_SYMBOL(ip6t_register_match);
-EXPORT_SYMBOL(ip6t_unregister_match);
-EXPORT_SYMBOL(ip6t_register_target);
-EXPORT_SYMBOL(ip6t_unregister_target);
 EXPORT_SYMBOL(ip6t_ext_hdr);
 EXPORT_SYMBOL(ipv6_find_hdr);
+EXPORT_SYMBOL(ip6_masked_addrcmp);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 8f5549b7272..306200c3505 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -62,7 +62,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 }
 
 static int ip6t_hl_checkentry(const char *tablename,
-		const struct ip6t_entry *e,
+		const void *entry,
 		void *targinfo,
 		unsigned int targinfosize,
 		unsigned int hook_mask)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd903..77c725832de 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
@@ -62,9 +63,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		return;
 	}
 
-	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000" */
-	printk("SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->saddr));
-	printk("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->daddr));
+	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+	printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr));
 
 	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
 	printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
@@ -443,7 +443,7 @@ ip6t_log_target(struct sk_buff **pskb,
 
 
 static int ip6t_log_checkentry(const char *tablename,
-			       const struct ip6t_entry *e,
+			       const void *entry,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hook_mask)
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
deleted file mode 100644
index eab8fb864ee..00000000000
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* This is a module which is used for setting the NFMARK field of an skb. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ip6t_mark_target_info *markinfo = targinfo;
-
-	if((*pskb)->nfmark != markinfo->mark)
-		(*pskb)->nfmark = markinfo->mark;
-
-	return IP6T_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ip6t_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_mark_target_info))) {
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IP6T_ALIGN(sizeof(struct ip6t_mark_target_info)));
-		return 0;
-	}
-
-	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ip6t_target ip6t_mark_reg = { 
-	.name		= "MARK",
-	.target		= target,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	printk(KERN_DEBUG "registering ipv6 mark target\n");
-	if (ip6t_register_target(&ip6t_mark_reg))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_target(&ip6t_mark_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c
deleted file mode 100644
index c6e3730e740..00000000000
--- a/net/ipv6/netfilter/ip6t_NFQUEUE.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* ip6tables module for using new netfilter netlink queue
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as 
- * published by the Free Software Foundation.
- * 
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("ip6tables NFQUEUE target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_NFQ_info *tinfo = targinfo;
-
-	return NF_QUEUE_NR(tinfo->queuenum);
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ip6t_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) {
-		printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IP6T_ALIGN(sizeof(struct ipt_NFQ_info)));
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ip6t_target ipt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_target(&ipt_NFQ_reg);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_target(&ipt_NFQ_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index b03e87adca9..c745717b4ce 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -218,12 +218,13 @@ static unsigned int reject6_target(struct sk_buff **pskb,
 }
 
 static int check(const char *tablename,
-		 const struct ip6t_entry *e,
+		 const void *entry,
 		 void *targinfo,
 		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ip6t_reject_info *rejinfo = targinfo;
+	const struct ip6t_entry *e = entry;
 
  	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) {
   		DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20..219a30365df 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
@@ -53,7 +54,7 @@ match(const struct sk_buff *skb,
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
 		return 0;
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
@@ -97,7 +98,7 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
+          const void *entry,
           void *matchinfo,
           unsigned int matchinfosize,
           unsigned int hook_mask)
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index c450a635e54..b4c153a5350 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -36,19 +36,19 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 #endif
 
 /*
- * (Type & 0xC0) >> 6
- * 	0	-> ignorable
- * 	1	-> must drop the packet
- * 	2	-> send ICMP PARM PROB regardless and drop packet
- * 	3	-> Send ICMP if not a multicast address and drop packet
+ *  (Type & 0xC0) >> 6
+ *	0	-> ignorable
+ *	1	-> must drop the packet
+ *	2	-> send ICMP PARM PROB regardless and drop packet
+ *	3	-> Send ICMP if not a multicast address and drop packet
  *  (Type & 0x20) >> 5
- *  	0	-> invariant
- *  	1	-> can change the routing
+ *	0	-> invariant
+ *	1	-> can change the routing
  *  (Type & 0x1F) Type
- *      0	-> Pad1 (only 1 byte!)
- *      1	-> PadN LENGTH info (total length = length + 2)
- *      C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
- *      5	-> RTALERT 2 x x
+ *	0	-> Pad1 (only 1 byte!)
+ *	1	-> PadN LENGTH info (total length = length + 2)
+ *	C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
+ *	5	-> RTALERT 2 x x
  */
 
 static int
@@ -60,59 +60,58 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct ipv6_opt_hdr _optsh, *oh;
-       const struct ip6t_opts *optinfo = matchinfo;
-       unsigned int temp;
-       unsigned int ptr;
-       unsigned int hdrlen = 0;
-       unsigned int ret = 0;
-       u8 _opttype, *tp = NULL;
-       u8 _optlen, *lp = NULL;
-       unsigned int optlen;
-       
+	struct ipv6_opt_hdr _optsh, *oh;
+	const struct ip6t_opts *optinfo = matchinfo;
+	unsigned int temp;
+	unsigned int ptr;
+	unsigned int hdrlen = 0;
+	unsigned int ret = 0;
+	u8 _opttype, *tp = NULL;
+	u8 _optlen, *lp = NULL;
+	unsigned int optlen;
+
 #if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
 #else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
 #endif
 		return 0;
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       if (oh == NULL){
-	       *hotdrop = 1;
-       		return 0;
-       }
-
-       hdrlen = ipv6_optlen(oh);
-       if (skb->len - ptr < hdrlen){
-	       /* Packet smaller than it's length field */
-       		return 0;
-       }
-
-       DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
-       DEBUGP("len %02X %04X %02X ",
-       		optinfo->hdrlen, hdrlen,
-       		(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
-       ret = (oh != NULL)
-       		&&
-	      	(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
-       ptr += 2;
-       hdrlen -= 2;
-       if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
-	       return ret;
+	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+	if (oh == NULL) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	hdrlen = ipv6_optlen(oh);
+	if (skb->len - ptr < hdrlen) {
+		/* Packet smaller than it's length field */
+		return 0;
+	}
+
+	DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+
+	DEBUGP("len %02X %04X %02X ",
+	       optinfo->hdrlen, hdrlen,
+	       (!(optinfo->flags & IP6T_OPTS_LEN) ||
+		((optinfo->hdrlen == hdrlen) ^
+		 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+
+	ret = (oh != NULL) &&
+	      (!(optinfo->flags & IP6T_OPTS_LEN) ||
+	       ((optinfo->hdrlen == hdrlen) ^
+		!!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
+
+	ptr += 2;
+	hdrlen -= 2;
+	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
+		return ret;
 	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
 		DEBUGP("Not strict - not implemented");
 	} else {
 		DEBUGP("Strict ");
-		DEBUGP("#%d ",optinfo->optsnr);
-		for(temp=0; temp<optinfo->optsnr; temp++){
+		DEBUGP("#%d ", optinfo->optsnr);
+		for (temp = 0; temp < optinfo->optsnr; temp++) {
 			/* type field exists ? */
 			if (hdrlen < 1)
 				break;
@@ -122,10 +121,10 @@ match(const struct sk_buff *skb,
 				break;
 
 			/* Type check */
-			if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
+			if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
 				DEBUGP("Tbad %02X %02X\n",
 				       *tp,
-				       (optinfo->opts[temp] & 0xFF00)>>8);
+				       (optinfo->opts[temp] & 0xFF00) >> 8);
 				return 0;
 			} else {
 				DEBUGP("Tok ");
@@ -169,7 +168,8 @@ match(const struct sk_buff *skb,
 		}
 		if (temp == optinfo->optsnr)
 			return ret;
-		else return 0;
+		else
+			return 0;
 	}
 
 	return 0;
@@ -178,25 +178,24 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+	   const void *info,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
 {
-       const struct ip6t_opts *optsinfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
-              DEBUGP("ip6t_opts: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
-              return 0;
-       }
-       if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-              DEBUGP("ip6t_opts: unknown flags %X\n",
-                      optsinfo->invflags);
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_opts *optsinfo = matchinfo;
+
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
+		DEBUGP("ip6t_opts: matchsize %u != %u\n",
+		       matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
+		return 0;
+	}
+	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
+		DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
+		return 0;
+	}
+
+	return 1;
 }
 
 static struct ip6t_match opts_match = {
@@ -212,12 +211,12 @@ static struct ip6t_match opts_match = {
 
 static int __init init(void)
 {
-       return ip6t_register_match(&opts_match);
+	return ip6t_register_match(&opts_match);
 }
 
 static void __exit cleanup(void)
 {
-       ip6t_unregister_match(&opts_match);
+	ip6t_unregister_match(&opts_match);
 }
 
 module_init(init);
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43a..724285df871 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
@@ -55,7 +56,7 @@ match(const struct sk_buff *skb,
 	/* Make sure this isn't an evil packet */
 	/*DEBUGP("ipv6_esp entered \n");*/
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0)
 		return 0;
 
 	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
@@ -75,7 +76,7 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ip6t_ip6 *ip,
+	   const void *ip,
 	   void *matchinfo,
 	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 616c2cbcd54..27396ac0b9e 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -27,45 +27,45 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
+	unsigned char eui64[8];
+	int i = 0;
 
-    unsigned char eui64[8];
-    int i=0;
-
-     if ( !(skb->mac.raw >= skb->head
-                && (skb->mac.raw + ETH_HLEN) <= skb->data)
-                && offset != 0) {
-                        *hotdrop = 1;
-                        return 0;
-                }
-    
-    memset(eui64, 0, sizeof(eui64));
-
-    if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) {
-      if (skb->nh.ipv6h->version == 0x6) { 
-         memcpy(eui64, eth_hdr(skb)->h_source, 3);
-         memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
-	 eui64[3]=0xff;
-	 eui64[4]=0xfe;
-	 eui64[0] |= 0x02;
-
-	 i=0;
-	 while ((skb->nh.ipv6h->saddr.s6_addr[8+i] ==
-			 eui64[i]) && (i<8)) i++;
-
-	 if ( i == 8 )
-	 	return 1;
-      }
-    }
-
-    return 0;
+	if (!(skb->mac.raw >= skb->head &&
+	      (skb->mac.raw + ETH_HLEN) <= skb->data) &&
+	    offset != 0) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	memset(eui64, 0, sizeof(eui64));
+
+	if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) {
+		if (skb->nh.ipv6h->version == 0x6) {
+			memcpy(eui64, eth_hdr(skb)->h_source, 3);
+			memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
+			eui64[3] = 0xff;
+			eui64[4] = 0xfe;
+			eui64[0] |= 0x02;
+
+			i = 0;
+			while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i])
+			       && (i < 8))
+				i++;
+
+			if (i == 8)
+				return 1;
+		}
+	}
+
+	return 0;
 }
 
 static int
 ip6t_eui64_checkentry(const char *tablename,
-		   const struct ip6t_ip6 *ip,
-		   void *matchinfo,
-		   unsigned int matchsize,
-		   unsigned int hook_mask)
+		      const void *ip,
+		      void *matchinfo,
+		      unsigned int matchsize,
+		      unsigned int hook_mask)
 {
 	if (hook_mask
 	    & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) |
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea2..4c14125a0e2 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -31,12 +31,12 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 static inline int
 id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
 {
-       int r=0;
-       DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-              min,id,max);
-       r=(id >= min && id <= max) ^ invert;
-       DEBUGP(" result %s\n",r? "PASS" : "FAILED");
-       return r;
+	int r = 0;
+	DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
+	       min, id, max);
+	r = (id >= min && id <= max) ^ invert;
+	DEBUGP(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
 }
 
 static int
@@ -48,92 +48,91 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct frag_hdr _frag, *fh;
-       const struct ip6t_frag *fraginfo = matchinfo;
-       unsigned int ptr;
+	struct frag_hdr _frag, *fh;
+	const struct ip6t_frag *fraginfo = matchinfo;
+	unsigned int ptr;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
 		return 0;
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
-	if (fh == NULL){
+	if (fh == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-       DEBUGP("INFO %04X ", fh->frag_off);
-       DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
-       DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
-       DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF));
-       DEBUGP("ID %u %08X\n", ntohl(fh->identification),
-	      ntohl(fh->identification));
-
-       DEBUGP("IPv6 FRAG id %02X ",
-       		(id_match(fraginfo->ids[0], fraginfo->ids[1],
-                           ntohl(fh->identification),
-                           !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))));
-       DEBUGP("res %02X %02X%04X %02X ", 
-       		(fraginfo->flags & IP6T_FRAG_RES), fh->reserved,
-		ntohs(fh->frag_off) & 0x6,
-       		!((fraginfo->flags & IP6T_FRAG_RES)
-			&& (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
-       DEBUGP("first %02X %02X %02X ", 
-       		(fraginfo->flags & IP6T_FRAG_FST),
-		ntohs(fh->frag_off) & ~0x7,
-       		!((fraginfo->flags & IP6T_FRAG_FST)
-			&& (ntohs(fh->frag_off) & ~0x7)));
-       DEBUGP("mf %02X %02X %02X ", 
-       		(fraginfo->flags & IP6T_FRAG_MF),
-		ntohs(fh->frag_off) & IP6_MF,
-       		!((fraginfo->flags & IP6T_FRAG_MF)
-			&& !((ntohs(fh->frag_off) & IP6_MF))));
-       DEBUGP("last %02X %02X %02X\n", 
-       		(fraginfo->flags & IP6T_FRAG_NMF),
-		ntohs(fh->frag_off) & IP6_MF,
-       		!((fraginfo->flags & IP6T_FRAG_NMF)
-			&& (ntohs(fh->frag_off) & IP6_MF)));
-
-       return (fh != NULL)
-       		&&
-       		(id_match(fraginfo->ids[0], fraginfo->ids[1],
-			  ntohl(fh->identification),
-                           !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_RES)
-			&& (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_FST)
-			&& (ntohs(fh->frag_off) & ~0x7))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_MF)
-			&& !(ntohs(fh->frag_off) & IP6_MF))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_NMF)
-			&& (ntohs(fh->frag_off) & IP6_MF));
+	DEBUGP("INFO %04X ", fh->frag_off);
+	DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
+	DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
+	DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF));
+	DEBUGP("ID %u %08X\n", ntohl(fh->identification),
+	       ntohl(fh->identification));
+
+	DEBUGP("IPv6 FRAG id %02X ",
+	       (id_match(fraginfo->ids[0], fraginfo->ids[1],
+			 ntohl(fh->identification),
+			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))));
+	DEBUGP("res %02X %02X%04X %02X ",
+	       (fraginfo->flags & IP6T_FRAG_RES), fh->reserved,
+	       ntohs(fh->frag_off) & 0x6,
+	       !((fraginfo->flags & IP6T_FRAG_RES)
+		 && (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
+	DEBUGP("first %02X %02X %02X ",
+	       (fraginfo->flags & IP6T_FRAG_FST),
+	       ntohs(fh->frag_off) & ~0x7,
+	       !((fraginfo->flags & IP6T_FRAG_FST)
+		 && (ntohs(fh->frag_off) & ~0x7)));
+	DEBUGP("mf %02X %02X %02X ",
+	       (fraginfo->flags & IP6T_FRAG_MF),
+	       ntohs(fh->frag_off) & IP6_MF,
+	       !((fraginfo->flags & IP6T_FRAG_MF)
+		 && !((ntohs(fh->frag_off) & IP6_MF))));
+	DEBUGP("last %02X %02X %02X\n",
+	       (fraginfo->flags & IP6T_FRAG_NMF),
+	       ntohs(fh->frag_off) & IP6_MF,
+	       !((fraginfo->flags & IP6T_FRAG_NMF)
+		 && (ntohs(fh->frag_off) & IP6_MF)));
+
+	return (fh != NULL)
+	       &&
+	       (id_match(fraginfo->ids[0], fraginfo->ids[1],
+			 ntohl(fh->identification),
+			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))
+	       &&
+	       !((fraginfo->flags & IP6T_FRAG_RES)
+		 && (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
+	       &&
+	       !((fraginfo->flags & IP6T_FRAG_FST)
+		 && (ntohs(fh->frag_off) & ~0x7))
+	       &&
+	       !((fraginfo->flags & IP6T_FRAG_MF)
+		 && !(ntohs(fh->frag_off) & IP6_MF))
+	       &&
+	       !((fraginfo->flags & IP6T_FRAG_NMF)
+		 && (ntohs(fh->frag_off) & IP6_MF));
 }
 
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+	   const void *ip,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
 {
-       const struct ip6t_frag *fraginfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) {
-              DEBUGP("ip6t_frag: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag)));
-              return 0;
-       }
-       if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
-              DEBUGP("ip6t_frag: unknown flags %X\n",
-                      fraginfo->invflags);
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_frag *fraginfo = matchinfo;
+
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) {
+		DEBUGP("ip6t_frag: matchsize %u != %u\n",
+		       matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag)));
+		return 0;
+	}
+	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
+		DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
+		return 0;
+	}
+
+	return 1;
 }
 
 static struct ip6t_match frag_match = {
@@ -145,12 +144,12 @@ static struct ip6t_match frag_match = {
 
 static int __init init(void)
 {
-       return ip6t_register_match(&frag_match);
+	return ip6t_register_match(&frag_match);
 }
 
 static void __exit cleanup(void)
 {
-       ip6t_unregister_match(&frag_match);
+	ip6t_unregister_match(&frag_match);
 }
 
 module_init(init);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d..37a8474a7e0 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -36,19 +36,19 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 #endif
 
 /*
- * (Type & 0xC0) >> 6
- * 	0	-> ignorable
- * 	1	-> must drop the packet
- * 	2	-> send ICMP PARM PROB regardless and drop packet
- * 	3	-> Send ICMP if not a multicast address and drop packet
+ *  (Type & 0xC0) >> 6
+ *	0	-> ignorable
+ *	1	-> must drop the packet
+ *	2	-> send ICMP PARM PROB regardless and drop packet
+ *	3	-> Send ICMP if not a multicast address and drop packet
  *  (Type & 0x20) >> 5
- *  	0	-> invariant
- *  	1	-> can change the routing
+ *	0	-> invariant
+ *	1	-> can change the routing
  *  (Type & 0x1F) Type
- *      0	-> Pad1 (only 1 byte!)
- *      1	-> PadN LENGTH info (total length = length + 2)
- *      C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
- *      5	-> RTALERT 2 x x
+ *	0	-> Pad1 (only 1 byte!)
+ *	1	-> PadN LENGTH info (total length = length + 2)
+ *	C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
+ *	5	-> RTALERT 2 x x
  */
 
 static int
@@ -60,59 +60,58 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct ipv6_opt_hdr _optsh, *oh;
-       const struct ip6t_opts *optinfo = matchinfo;
-       unsigned int temp;
-       unsigned int ptr;
-       unsigned int hdrlen = 0;
-       unsigned int ret = 0;
-       u8 _opttype, *tp = NULL;
-       u8 _optlen, *lp = NULL;
-       unsigned int optlen;
-       
+	struct ipv6_opt_hdr _optsh, *oh;
+	const struct ip6t_opts *optinfo = matchinfo;
+	unsigned int temp;
+	unsigned int ptr;
+	unsigned int hdrlen = 0;
+	unsigned int ret = 0;
+	u8 _opttype, *tp = NULL;
+	u8 _optlen, *lp = NULL;
+	unsigned int optlen;
+
 #if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
 #else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
 #endif
 		return 0;
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       if (oh == NULL){
-	       *hotdrop = 1;
-       		return 0;
-       }
-
-       hdrlen = ipv6_optlen(oh);
-       if (skb->len - ptr < hdrlen){
-	       /* Packet smaller than it's length field */
-       		return 0;
-       }
-
-       DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
-       DEBUGP("len %02X %04X %02X ",
-       		optinfo->hdrlen, hdrlen,
-       		(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
-       ret = (oh != NULL)
-       		&&
-	      	(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
-       ptr += 2;
-       hdrlen -= 2;
-       if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
-	       return ret;
+	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+	if (oh == NULL) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	hdrlen = ipv6_optlen(oh);
+	if (skb->len - ptr < hdrlen) {
+		/* Packet smaller than it's length field */
+		return 0;
+	}
+
+	DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+
+	DEBUGP("len %02X %04X %02X ",
+	       optinfo->hdrlen, hdrlen,
+	       (!(optinfo->flags & IP6T_OPTS_LEN) ||
+		((optinfo->hdrlen == hdrlen) ^
+		 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+
+	ret = (oh != NULL) &&
+	      (!(optinfo->flags & IP6T_OPTS_LEN) ||
+	       ((optinfo->hdrlen == hdrlen) ^
+		!!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
+
+	ptr += 2;
+	hdrlen -= 2;
+	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
+		return ret;
 	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
 		DEBUGP("Not strict - not implemented");
 	} else {
 		DEBUGP("Strict ");
-		DEBUGP("#%d ",optinfo->optsnr);
-		for(temp=0; temp<optinfo->optsnr; temp++){
+		DEBUGP("#%d ", optinfo->optsnr);
+		for (temp = 0; temp < optinfo->optsnr; temp++) {
 			/* type field exists ? */
 			if (hdrlen < 1)
 				break;
@@ -122,10 +121,10 @@ match(const struct sk_buff *skb,
 				break;
 
 			/* Type check */
-			if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
+			if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
 				DEBUGP("Tbad %02X %02X\n",
 				       *tp,
-				       (optinfo->opts[temp] & 0xFF00)>>8);
+				       (optinfo->opts[temp] & 0xFF00) >> 8);
 				return 0;
 			} else {
 				DEBUGP("Tok ");
@@ -169,7 +168,8 @@ match(const struct sk_buff *skb,
 		}
 		if (temp == optinfo->optsnr)
 			return ret;
-		else return 0;
+		else
+			return 0;
 	}
 
 	return 0;
@@ -178,25 +178,24 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+	   const void *entry,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
 {
-       const struct ip6t_opts *optsinfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
-              DEBUGP("ip6t_opts: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
-              return 0;
-       }
-       if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-              DEBUGP("ip6t_opts: unknown flags %X\n",
-                      optsinfo->invflags);
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_opts *optsinfo = matchinfo;
+
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
+		DEBUGP("ip6t_opts: matchsize %u != %u\n",
+		       matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
+		return 0;
+	}
+	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
+		DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
+		return 0;
+	}
+
+	return 1;
 }
 
 static struct ip6t_match opts_match = {
@@ -212,12 +211,12 @@ static struct ip6t_match opts_match = {
 
 static int __init init(void)
 {
-       return ip6t_register_match(&opts_match);
+	return ip6t_register_match(&opts_match);
 }
 
 static void __exit cleanup(void)
 {
-       ip6t_unregister_match(&opts_match);
+	ip6t_unregister_match(&opts_match);
 }
 
 module_init(init);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 0beaff5471d..c5d9079f2d9 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -48,7 +48,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return 0;
 }
 
-static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
+static int checkentry(const char *tablename, const void *entry,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 32e67f05845..83ad6b272f7 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -50,20 +50,20 @@ ipv6header_match(const struct sk_buff *skb,
 	len = skb->len - ptr;
 	temp = 0;
 
-        while (ip6t_ext_hdr(nexthdr)) {
+	while (ip6t_ext_hdr(nexthdr)) {
 		struct ipv6_opt_hdr _hdr, *hp;
-        	int hdrlen;
+		int hdrlen;
 
 		/* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
+		if (len < (int)sizeof(struct ipv6_opt_hdr))
+			return 0;
 		/* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
+		if (nexthdr == NEXTHDR_NONE) {
 			temp |= MASK_NONE;
 			break;
 		}
 		/* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
+		if (nexthdr == NEXTHDR_ESP) {
 			temp |= MASK_ESP;
 			break;
 		}
@@ -72,43 +72,43 @@ ipv6header_match(const struct sk_buff *skb,
 		BUG_ON(hp == NULL);
 
 		/* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen + 2) << 2;
+		else
+			hdrlen = ipv6_optlen(hp);
 
 		/* set the flag */
-		switch (nexthdr){
-			case NEXTHDR_HOP:
-				temp |= MASK_HOPOPTS;
-				break;
-			case NEXTHDR_ROUTING:
-				temp |= MASK_ROUTING;
-				break;
-			case NEXTHDR_FRAGMENT:
-				temp |= MASK_FRAGMENT;
-				break;
-			case NEXTHDR_AUTH:
-				temp |= MASK_AH;
-				break;
-			case NEXTHDR_DEST:
-				temp |= MASK_DSTOPTS;
-				break;
-			default:
-				return 0;
-				break;
+		switch (nexthdr) {
+		case NEXTHDR_HOP:
+			temp |= MASK_HOPOPTS;
+			break;
+		case NEXTHDR_ROUTING:
+			temp |= MASK_ROUTING;
+			break;
+		case NEXTHDR_FRAGMENT:
+			temp |= MASK_FRAGMENT;
+			break;
+		case NEXTHDR_AUTH:
+			temp |= MASK_AH;
+			break;
+		case NEXTHDR_DEST:
+			temp |= MASK_DSTOPTS;
+			break;
+		default:
+			return 0;
+			break;
 		}
 
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		ptr += hdrlen;
 		if (ptr > skb->len)
 			break;
-        }
+	}
 
-	if ( (nexthdr != NEXTHDR_NONE ) && (nexthdr != NEXTHDR_ESP) )
+	if ((nexthdr != NEXTHDR_NONE) && (nexthdr != NEXTHDR_ESP))
 		temp |= MASK_PROTO;
 
 	if (info->modeflag)
@@ -124,7 +124,7 @@ ipv6header_match(const struct sk_buff *skb,
 
 static int
 ipv6header_checkentry(const char *tablename,
-		      const struct ip6t_ip6 *ip,
+		      const void *ip,
 		      void *matchinfo,
 		      unsigned int matchsize,
 		      unsigned int hook_mask)
@@ -137,8 +137,8 @@ ipv6header_checkentry(const char *tablename,
 		return 0;
 
 	/* invflags is 0 or 0xff in hard mode */
-	if ((!info->modeflag) && info->invflags != 0x00
-			      && info->invflags != 0xFF)
+	if ((!info->modeflag) && info->invflags != 0x00 &&
+	    info->invflags != 0xFF)
 		return 0;
 
 	return 1;
@@ -152,7 +152,7 @@ static struct ip6t_match ip6t_ipv6header_match = {
 	.me		= THIS_MODULE,
 };
 
-static int  __init ipv6header_init(void)
+static int __init ipv6header_init(void)
 {
 	return ip6t_register_match(&ip6t_ipv6header_match);
 }
@@ -164,4 +164,3 @@ static void __exit ipv6header_exit(void)
 
 module_init(ipv6header_init);
 module_exit(ipv6header_exit);
-
diff --git a/net/ipv6/netfilter/ip6t_length.c b/net/ipv6/netfilter/ip6t_length.c
deleted file mode 100644
index e0537d3811d..00000000000
--- a/net/ipv6/netfilter/ip6t_length.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Length Match - IPv6 Port */
-
-/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv6/ip6t_length.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("IPv6 packet length match");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct ip6t_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
-	
-	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ip6t_ip6 *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_length_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match length_match = {
-	.name		= "length",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&length_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&length_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_limit.c b/net/ipv6/netfilter/ip6t_limit.c
deleted file mode 100644
index fb782f610be..00000000000
--- a/net/ipv6/netfilter/ip6t_limit.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- *                   `limit', removed logging.  Did I mention that
- *                   Alexey is a fucking genius?
- *                   Rusty Russell (rusty@rustcorp.com.au).  */
-
-/* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
- * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_limit.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
-MODULE_DESCRIPTION("rate limiting within ip6tables");
-
-/* The algorithm used is the Simple Token Bucket Filter (TBF)
- * see net/sched/sch_tbf.c in the linux source tree
- */
-
-static DEFINE_SPINLOCK(limit_lock);
-
-/* Rusty: This is my (non-mathematically-inclined) understanding of
-   this algorithm.  The `average rate' in jiffies becomes your initial
-   amount of credit `credit' and the most credit you can ever have
-   `credit_cap'.  The `peak rate' becomes the cost of passing the
-   test, `cost'.
-
-   `prev' tracks the last packet hit: you gain one credit per jiffy.
-   If you get credit balance more than this, the extra credit is
-   discarded.  Every time the match passes, you lose `cost' credits;
-   if you don't have that many, the test fails.
-
-   See Alexey's formal explanation in net/sched/sch_tbf.c.
-
-   To avoid underflow, we multiply by 128 (ie. you get 128 credits per
-   jiffy).  Hence a cost of 2^32-1, means one pass per 32768 seconds
-   at 1024HZ (or one every 9 hours).  A cost of 1 means 12800 passes
-   per second at 100HZ.  */
-
-#define CREDITS_PER_JIFFY 128
-
-static int
-ip6t_limit_match(const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const void *matchinfo,
-		int offset,
-		unsigned int protoff,
-		int *hotdrop)
-{
-	struct ip6t_rateinfo *r = ((struct ip6t_rateinfo *)matchinfo)->master;
-	unsigned long now = jiffies;
-
-	spin_lock_bh(&limit_lock);
-	r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
-	if (r->credit > r->credit_cap)
-		r->credit = r->credit_cap;
-
-	if (r->credit >= r->cost) {
-		/* We're not limited. */
-		r->credit -= r->cost;
-		spin_unlock_bh(&limit_lock);
-		return 1;
-	}
-
-       	spin_unlock_bh(&limit_lock);
-	return 0;
-}
-
-/* Precision saver. */
-static u_int32_t
-user2credits(u_int32_t user)
-{
-	/* If multiplying would overflow... */
-	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
-		/* Divide first. */
-		return (user / IP6T_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
-
-	return (user * HZ * CREDITS_PER_JIFFY) / IP6T_LIMIT_SCALE;
-}
-
-static int
-ip6t_limit_checkentry(const char *tablename,
-		     const struct ip6t_ip6 *ip,
-		     void *matchinfo,
-		     unsigned int matchsize,
-		     unsigned int hook_mask)
-{
-	struct ip6t_rateinfo *r = matchinfo;
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_rateinfo)))
-		return 0;
-
-	/* Check for overflow. */
-	if (r->burst == 0
-	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Call rusty: overflow in ip6t_limit: %u/%u\n",
-		       r->avg, r->burst);
-		return 0;
-	}
-
-	/* User avg in seconds * IP6T_LIMIT_SCALE: convert to jiffies *
-	   128. */
-	r->prev = jiffies;
-	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
-	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
-	r->cost = user2credits(r->avg);
-
-	/* For SMP, we only want to use one set of counters. */
-	r->master = r;
-
-	return 1;
-}
-
-static struct ip6t_match ip6t_limit_reg = {
-	.name		= "limit",
-	.match		= ip6t_limit_match,
-	.checkentry	= ip6t_limit_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	if (ip6t_register_match(&ip6t_limit_reg))
-		return -EINVAL;
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&ip6t_limit_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c
deleted file mode 100644
index 526d43e3723..00000000000
--- a/net/ipv6/netfilter/ip6t_mac.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Kernel module to match MAC address parameters. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-
-#include <linux/netfilter_ipv6/ip6t_mac.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MAC address matching module for IPv6");
-MODULE_AUTHOR("Netfilter Core Teaam <coreteam@netfilter.org>");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-    const struct ip6t_mac_info *info = matchinfo;
-
-    /* Is mac pointer valid? */
-    return (skb->mac.raw >= skb->head
-	    && (skb->mac.raw + ETH_HLEN) <= skb->data
-	    /* If so, compare... */
-	    && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
-		== 0) ^ info->invert));
-}
-
-static int
-ip6t_mac_checkentry(const char *tablename,
-		   const struct ip6t_ip6 *ip,
-		   void *matchinfo,
-		   unsigned int matchsize,
-		   unsigned int hook_mask)
-{
-	if (hook_mask
-	    & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN)
-		| (1 << NF_IP6_FORWARD))) {
-		printk("ip6t_mac: only valid for PRE_ROUTING, LOCAL_IN or"
-		       " FORWARD\n");
-		return 0;
-	}
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match mac_match = {
-	.name		= "mac",
-	.match		= &match,
-	.checkentry	= &ip6t_mac_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&mac_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&mac_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mark.c b/net/ipv6/netfilter/ip6t_mark.c
deleted file mode 100644
index affc3de364f..00000000000
--- a/net/ipv6/netfilter/ip6t_mark.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Kernel module to match NFMARK values. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv6/ip6t_mark.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("ip6tables mark match");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct ip6t_mark_info *info = matchinfo;
-
-	return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ip6t_ip6 *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mark_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match mark_match = {
-	.name		= "mark",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&mark_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&mark_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c
index 6e3246153fa..49f7829dfbc 100644
--- a/net/ipv6/netfilter/ip6t_multiport.c
+++ b/net/ipv6/netfilter/ip6t_multiport.c
@@ -84,11 +84,12 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ip6t_ip6 *ip,
+	   const void *info,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
+	const struct ip6t_ip6 *ip = info;
 	const struct ip6t_multiport *multiinfo = matchinfo;
 
 	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_multiport)))
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 4de4cdad4b7..8c8a4c7ec93 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -36,14 +36,14 @@ match(const struct sk_buff *skb,
 	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
 		return 0;
 
-	if(info->match & IP6T_OWNER_UID) {
-		if((skb->sk->sk_socket->file->f_uid != info->uid) ^
+	if (info->match & IP6T_OWNER_UID) {
+		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
 		    !!(info->invert & IP6T_OWNER_UID))
 			return 0;
 	}
 
-	if(info->match & IP6T_OWNER_GID) {
-		if((skb->sk->sk_socket->file->f_gid != info->gid) ^
+	if (info->match & IP6T_OWNER_GID) {
+		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
 		    !!(info->invert & IP6T_OWNER_GID))
 			return 0;
 	}
@@ -53,23 +53,23 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-           const struct ip6t_ip6 *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
+	   const void *ip,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
 {
 	const struct ip6t_owner_info *info = matchinfo;
 
-        if (hook_mask
-            & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
-                printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
-                return 0;
-        }
+	if (hook_mask
+	    & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
+		printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
+		return 0;
+	}
 
 	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info)))
 		return 0;
 
-	if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
+	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
 		printk("ipt_owner: pid and sid matching "
 		       "not supported anymore\n");
 		return 0;
diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c
new file mode 100644
index 00000000000..afe1cc4c18a
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_policy.c
@@ -0,0 +1,175 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e)
+{
+#define MATCH_ADDR(x,y,z)	(!e->match.x || \
+				 ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x)
+#define MATCH(x,y)		(!e->match.x || ((e->x == (y)) ^ e->invert.x))
+	
+	return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) &&
+	       MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) &&
+	       MATCH(proto, x->id.proto) &&
+	       MATCH(mode, x->props.mode) &&
+	       MATCH(spi, x->id.spi) &&
+	       MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info)
+{
+	const struct ip6t_policy_elem *e;
+	struct sec_path *sp = skb->sp;
+	int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (sp == NULL)
+		return -1;
+	if (strict && info->len != sp->len)
+		return 0;
+
+	for (i = sp->len - 1; i >= 0; i--) {
+		pos = strict ? i - sp->len + 1 : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(sp->x[i].xvec, e)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info)
+{
+	const struct ip6t_policy_elem *e;
+	struct dst_entry *dst = skb->dst;
+	int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (dst->xfrm == NULL)
+		return -1;
+
+	for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+		pos = strict ? i : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(dst->xfrm, e)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const void *matchinfo,
+		 int offset,
+		 unsigned int protoff,
+		 int *hotdrop)
+{
+	const struct ip6t_policy_info *info = matchinfo;
+	int ret;
+
+	if (info->flags & IP6T_POLICY_MATCH_IN)
+		ret = match_policy_in(skb, info);
+	else
+		ret = match_policy_out(skb, info);
+
+	if (ret < 0)
+		ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0;
+	else if (info->flags & IP6T_POLICY_MATCH_NONE)
+		ret = 0;
+
+	return ret;
+}
+
+static int checkentry(const char *tablename, const void *ip_void,
+                      void *matchinfo, unsigned int matchsize,
+                      unsigned int hook_mask)
+{
+	struct ip6t_policy_info *info = matchinfo;
+
+	if (matchsize != IP6T_ALIGN(sizeof(*info))) {
+		printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n",
+		       matchsize, IP6T_ALIGN(sizeof(*info)));
+		return 0;
+	}
+	if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) {
+		printk(KERN_ERR "ip6t_policy: neither incoming nor "
+		                "outgoing policy selected\n");
+		return 0;
+	}
+	if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN)
+	    && info->flags & IP6T_POLICY_MATCH_OUT) {
+		printk(KERN_ERR "ip6t_policy: output policy not valid in "
+		                "PRE_ROUTING and INPUT\n");
+		return 0;
+	}
+	if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT)
+	    && info->flags & IP6T_POLICY_MATCH_IN) {
+		printk(KERN_ERR "ip6t_policy: input policy not valid in "
+		                "POST_ROUTING and OUTPUT\n");
+		return 0;
+	}
+	if (info->len > IP6T_POLICY_MAX_ELEM) {
+		printk(KERN_ERR "ip6t_policy: too many policy elements\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ip6t_match policy_match = {
+	.name		= "policy",
+	.match		= match,
+	.checkentry 	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebb..8f82476dc89 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -33,12 +33,12 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 static inline int
 segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
 {
-       int r=0;
-       DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-              min,id,max);
-       r=(id >= min && id <= max) ^ invert;
-       DEBUGP(" result %s\n",r? "PASS" : "FAILED");
-       return r;
+	int r = 0;
+	DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",
+	       invert ? '!' : ' ', min, id, max);
+	r = (id >= min && id <= max) ^ invert;
+	DEBUGP(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
 }
 
 static int
@@ -50,87 +50,93 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct ipv6_rt_hdr _route, *rh;
-       const struct ip6t_rt *rtinfo = matchinfo;
-       unsigned int temp;
-       unsigned int ptr;
-       unsigned int hdrlen = 0;
-       unsigned int ret = 0;
-       struct in6_addr *ap, _addr;
-
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+	struct ipv6_rt_hdr _route, *rh;
+	const struct ip6t_rt *rtinfo = matchinfo;
+	unsigned int temp;
+	unsigned int ptr;
+	unsigned int hdrlen = 0;
+	unsigned int ret = 0;
+	struct in6_addr *ap, _addr;
+
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
 		return 0;
 
-       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
-       if (rh == NULL){
-	       *hotdrop = 1;
-       		return 0;
-       }
-
-       hdrlen = ipv6_optlen(rh);
-       if (skb->len - ptr < hdrlen){
-	       /* Pcket smaller than its length field */
-       		return 0;
-       }
-
-       DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
-       DEBUGP("TYPE %04X ", rh->type);
-       DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
-
-       DEBUGP("IPv6 RT segsleft %02X ",
-       		(segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
-                           rh->segments_left,
-                           !!(rtinfo->invflags & IP6T_RT_INV_SGS))));
-       DEBUGP("type %02X %02X %02X ",
-       		rtinfo->rt_type, rh->type, 
-       		(!(rtinfo->flags & IP6T_RT_TYP) ||
-                           ((rtinfo->rt_type == rh->type) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
-       DEBUGP("len %02X %04X %02X ",
-       		rtinfo->hdrlen, hdrlen,
-       		(!(rtinfo->flags & IP6T_RT_LEN) ||
-                           ((rtinfo->hdrlen == hdrlen) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
-       DEBUGP("res %02X %02X %02X ", 
-       		(rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->reserved,
-       		!((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->reserved)));
-
-       ret = (rh != NULL)
-       		&&
-       		(segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
-                           rh->segments_left,
-                           !!(rtinfo->invflags & IP6T_RT_INV_SGS)))
-		&&
-	      	(!(rtinfo->flags & IP6T_RT_LEN) ||
-                           ((rtinfo->hdrlen == hdrlen) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_LEN)))
-		&&
-       		(!(rtinfo->flags & IP6T_RT_TYP) ||
-                           ((rtinfo->rt_type == rh->type) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_TYP)));
+	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+	if (rh == NULL) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	hdrlen = ipv6_optlen(rh);
+	if (skb->len - ptr < hdrlen) {
+		/* Pcket smaller than its length field */
+		return 0;
+	}
+
+	DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
+	DEBUGP("TYPE %04X ", rh->type);
+	DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
+
+	DEBUGP("IPv6 RT segsleft %02X ",
+	       (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+			       rh->segments_left,
+			       !!(rtinfo->invflags & IP6T_RT_INV_SGS))));
+	DEBUGP("type %02X %02X %02X ",
+	       rtinfo->rt_type, rh->type,
+	       (!(rtinfo->flags & IP6T_RT_TYP) ||
+		((rtinfo->rt_type == rh->type) ^
+		 !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
+	DEBUGP("len %02X %04X %02X ",
+	       rtinfo->hdrlen, hdrlen,
+	       (!(rtinfo->flags & IP6T_RT_LEN) ||
+		((rtinfo->hdrlen == hdrlen) ^
+		 !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
+	DEBUGP("res %02X %02X %02X ",
+	       (rtinfo->flags & IP6T_RT_RES),
+	       ((struct rt0_hdr *)rh)->reserved,
+	       !((rtinfo->flags & IP6T_RT_RES) &&
+		 (((struct rt0_hdr *)rh)->reserved)));
+
+	ret = (rh != NULL)
+	      &&
+	      (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+			      rh->segments_left,
+			      !!(rtinfo->invflags & IP6T_RT_INV_SGS)))
+	      &&
+	      (!(rtinfo->flags & IP6T_RT_LEN) ||
+	       ((rtinfo->hdrlen == hdrlen) ^
+		!!(rtinfo->invflags & IP6T_RT_INV_LEN)))
+	      &&
+	      (!(rtinfo->flags & IP6T_RT_TYP) ||
+	       ((rtinfo->rt_type == rh->type) ^
+		!!(rtinfo->invflags & IP6T_RT_INV_TYP)));
 
 	if (ret && (rtinfo->flags & IP6T_RT_RES)) {
 		u_int32_t *rp, _reserved;
 		rp = skb_header_pointer(skb,
-					ptr + offsetof(struct rt0_hdr, reserved),
-					sizeof(_reserved), &_reserved);
+					ptr + offsetof(struct rt0_hdr,
+						       reserved),
+					sizeof(_reserved),
+					&_reserved);
 
 		ret = (*rp == 0);
 	}
 
-	DEBUGP("#%d ",rtinfo->addrnr);
-       if ( !(rtinfo->flags & IP6T_RT_FST) ){
-	       return ret;
+	DEBUGP("#%d ", rtinfo->addrnr);
+	if (!(rtinfo->flags & IP6T_RT_FST)) {
+		return ret;
 	} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
 		DEBUGP("Not strict ");
-		if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
+		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
 			DEBUGP("There isn't enough space\n");
 			return 0;
 		} else {
 			unsigned int i = 0;
 
-			DEBUGP("#%d ",rtinfo->addrnr);
-			for(temp=0; temp<(unsigned int)((hdrlen-8)/16); temp++){
+			DEBUGP("#%d ", rtinfo->addrnr);
+			for (temp = 0;
+			     temp < (unsigned int)((hdrlen - 8) / 16);
+			     temp++) {
 				ap = skb_header_pointer(skb,
 							ptr
 							+ sizeof(struct rt0_hdr)
@@ -141,24 +147,26 @@ match(const struct sk_buff *skb,
 				BUG_ON(ap == NULL);
 
 				if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
-					DEBUGP("i=%d temp=%d;\n",i,temp);
+					DEBUGP("i=%d temp=%d;\n", i, temp);
 					i++;
 				}
-				if (i==rtinfo->addrnr) break;
+				if (i == rtinfo->addrnr)
+					break;
 			}
 			DEBUGP("i=%d #%d\n", i, rtinfo->addrnr);
 			if (i == rtinfo->addrnr)
 				return ret;
-			else return 0;
+			else
+				return 0;
 		}
 	} else {
 		DEBUGP("Strict ");
-		if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
+		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
 			DEBUGP("There isn't enough space\n");
 			return 0;
 		} else {
-			DEBUGP("#%d ",rtinfo->addrnr);
-			for(temp=0; temp<rtinfo->addrnr; temp++){
+			DEBUGP("#%d ", rtinfo->addrnr);
+			for (temp = 0; temp < rtinfo->addrnr; temp++) {
 				ap = skb_header_pointer(skb,
 							ptr
 							+ sizeof(struct rt0_hdr)
@@ -171,9 +179,11 @@ match(const struct sk_buff *skb,
 					break;
 			}
 			DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr);
-			if ((temp == rtinfo->addrnr) && (temp == (unsigned int)((hdrlen-8)/16)))
+			if ((temp == rtinfo->addrnr) &&
+			    (temp == (unsigned int)((hdrlen - 8) / 16)))
 				return ret;
-			else return 0;
+			else
+				return 0;
 		}
 	}
 
@@ -183,32 +193,31 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+	   const void *entry,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
 {
-       const struct ip6t_rt *rtinfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) {
-              DEBUGP("ip6t_rt: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt)));
-              return 0;
-       }
-       if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
-              DEBUGP("ip6t_rt: unknown flags %X\n",
-                      rtinfo->invflags);
-              return 0;
-       }
-       if ( (rtinfo->flags & (IP6T_RT_RES|IP6T_RT_FST_MASK)) && 
-		       (!(rtinfo->flags & IP6T_RT_TYP) || 
-		       (rtinfo->rt_type != 0) || 
-		       (rtinfo->invflags & IP6T_RT_INV_TYP)) ) {
-	      DEBUGP("`--rt-type 0' required before `--rt-0-*'");
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_rt *rtinfo = matchinfo;
+
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) {
+		DEBUGP("ip6t_rt: matchsize %u != %u\n",
+		       matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt)));
+		return 0;
+	}
+	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
+		DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
+		return 0;
+	}
+	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
+	    (!(rtinfo->flags & IP6T_RT_TYP) ||
+	     (rtinfo->rt_type != 0) ||
+	     (rtinfo->invflags & IP6T_RT_INV_TYP))) {
+		DEBUGP("`--rt-type 0' required before `--rt-0-*'");
+		return 0;
+	}
+
+	return 1;
 }
 
 static struct ip6t_match rt_match = {
@@ -220,12 +229,12 @@ static struct ip6t_match rt_match = {
 
 static int __init init(void)
 {
-       return ip6t_register_match(&rt_match);
+	return ip6t_register_match(&rt_match);
 }
 
 static void __exit cleanup(void)
 {
-       ip6t_unregister_match(&rt_match);
+	ip6t_unregister_match(&rt_match);
 }
 
 module_init(init);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 4c0028671c2..ce4a968e1f7 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -97,6 +97,7 @@ static struct ip6t_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= AF_INET6,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 85c1e6eada1..30a4627e000 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -127,6 +127,7 @@ static struct ip6t_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= AF_INET6,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index c2982efd14a..db28ba3855e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -106,11 +106,12 @@ static struct
 	}
 };
 
-static struct ip6t_table packet_raw = { 
+static struct xt_table packet_raw = { 
 	.name = "raw", 
 	.valid_hooks = RAW_VALID_HOOKS, 
 	.lock = RW_LOCK_UNLOCKED, 
-	.me = THIS_MODULE
+	.me = THIS_MODULE,
+	.af = AF_INET6,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 753a3ae8502..ac702a29dd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -74,7 +74,7 @@ static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 static int ipv6_print_tuple(struct seq_file *s,
 			    const struct nf_conntrack_tuple *tuple)
 {
-	return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ",
+	return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ",
 			  NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
 			  NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
 }
@@ -335,10 +335,10 @@ static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
 #ifdef CONFIG_SYSCTL
 
 /* From nf_conntrack_proto_icmpv6.c */
-extern unsigned long nf_ct_icmpv6_timeout;
+extern unsigned int nf_ct_icmpv6_timeout;
 
 /* From nf_conntrack_frag6.c */
-extern unsigned long nf_ct_frag6_timeout;
+extern unsigned int nf_ct_frag6_timeout;
 extern unsigned int nf_ct_frag6_low_thresh;
 extern unsigned int nf_ct_frag6_high_thresh;
 
@@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = {
 };
 #endif
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv6_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+		&tuple->src.u3.ip6);
+	NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+		&tuple->dst.u3.ip6);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V6_SRC-1]       = sizeof(u_int32_t)*4,
+	[CTA_IP_V6_DST-1]       = sizeof(u_int32_t)*4,
+};
+
+static int ipv6_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), 
+	       sizeof(u_int32_t) * 4);
+	memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]),
+	       sizeof(u_int32_t) * 4);
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
 	.name			= "ipv6",
@@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.print_tuple		= ipv6_print_tuple,
 	.print_conntrack	= ipv6_print_conntrack,
 	.prepare		= ipv6_prepare,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= ipv6_tuple_to_nfattr,
+	.nfattr_to_tuple	= ipv6_nfattr_to_tuple,
+#endif
 	.get_features		= ipv6_get_features,
 	.me			= THIS_MODULE,
 };
@@ -537,7 +584,7 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
 
 static int __init init(void)
 {
-	need_nf_conntrack();
+	need_conntrack();
 	return init_or_cleanup(1);
 }
 
@@ -548,9 +595,3 @@ static void __exit fini(void)
 
 module_init(init);
 module_exit(fini);
-
-void need_ip6_conntrack(void)
-{
-}
-
-EXPORT_SYMBOL(need_ip6_conntrack);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c0f1da5497a..09945c33305 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -57,19 +57,19 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
+};
+
 static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 			       const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[] = {
-		[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
-		[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
-		[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
-		[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
-	};
-
-	__u8 type = orig->dst.u.icmp.type - 128;
-	if (type >= sizeof(invmap) || !invmap[type])
+	int type = orig->dst.u.icmp.type - 128;
+	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
 		return 0;
 
 	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
@@ -129,12 +129,12 @@ static int icmpv6_new(struct nf_conn *conntrack,
 		[ICMPV6_ECHO_REQUEST - 128] = 1,
 		[ICMPV6_NI_QUERY - 128] = 1
 	};
+	int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128;
 
-	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
-	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
+	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
 		/* Can't create a new ICMPv6 `conn' with this. */
-		DEBUGP("icmp: can't create new conn with type %u\n",
-		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+		DEBUGP("icmpv6: can't create new conn with type %u\n",
+		       type + 128);
 		NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
 		return 0;
 	}
@@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	inproto = nf_ct_find_proto(PF_INET6, inprotonum);
+	inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
 
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -255,6 +255,60 @@ skipped:
 	return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nfattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMPV6_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMPV6_TYPE-1]
+	    || !tb[CTA_PROTO_ICMPV6_CODE-1]
+	    || !tb[CTA_PROTO_ICMPV6_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
+
+	if (tuple->dst.u.icmp.type < 128
+	    || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type - 128])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
 {
 	.l3proto		= PF_INET6,
@@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
 	.packet			= icmpv6_packet,
 	.new			= icmpv6_new,
 	.error			= icmpv6_error,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmpv6_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmpv6_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c2c52af9e56..84ef9a13108 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -70,8 +70,8 @@ struct nf_ct_frag6_skb_cb
 
 struct nf_ct_frag6_queue
 {
-	struct nf_ct_frag6_queue	*next;
-	struct list_head lru_list;		/* lru list member	*/
+	struct hlist_node	list;
+	struct list_head	lru_list;	/* lru list member	*/
 
 	__u32			id;		/* fragment id		*/
 	struct in6_addr		saddr;
@@ -90,24 +90,21 @@ struct nf_ct_frag6_queue
 #define FIRST_IN		2
 #define LAST_IN			1
 	__u16			nhoffset;
-	struct nf_ct_frag6_queue	**pprev;
 };
 
 /* Hash table. */
 
 #define FRAG6Q_HASHSZ	64
 
-static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
+static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ];
+static DEFINE_RWLOCK(nf_ct_frag6_lock);
 static u32 nf_ct_frag6_hash_rnd;
 static LIST_HEAD(nf_ct_frag6_lru_list);
 int nf_ct_frag6_nqueues = 0;
 
 static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
 {
-	if (fq->next)
-		fq->next->pprev = fq->pprev;
-	*fq->pprev = fq->next;
+	hlist_del(&fq->list);
 	list_del(&fq->lru_list);
 	nf_ct_frag6_nqueues--;
 }
@@ -158,28 +155,18 @@ static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
 	get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
 	for (i = 0; i < FRAG6Q_HASHSZ; i++) {
 		struct nf_ct_frag6_queue *q;
+		struct hlist_node *p, *n;
 
-		q = nf_ct_frag6_hash[i];
-		while (q) {
-			struct nf_ct_frag6_queue *next = q->next;
+		hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) {
 			unsigned int hval = ip6qhashfn(q->id,
 						       &q->saddr,
 						       &q->daddr);
-
 			if (hval != i) {
-				/* Unlink. */
-				if (q->next)
-					q->next->pprev = q->pprev;
-				*q->pprev = q->next;
-
+				hlist_del(&q->list);
 				/* Relink to new hash chain. */
-				if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
-					q->next->pprev = &q->next;
-				nf_ct_frag6_hash[hval] = q;
-				q->pprev = &nf_ct_frag6_hash[hval];
+				hlist_add_head(&q->list,
+					       &nf_ct_frag6_hash[hval]);
 			}
-
-			q = next;
 		}
 	}
 	write_unlock(&nf_ct_frag6_lock);
@@ -314,15 +301,17 @@ out:
 
 /* Creation primitives. */
 
-
 static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
 					  struct nf_ct_frag6_queue *fq_in)
 {
 	struct nf_ct_frag6_queue *fq;
+#ifdef CONFIG_SMP
+	struct hlist_node *n;
+#endif
 
 	write_lock(&nf_ct_frag6_lock);
 #ifdef CONFIG_SMP
-	for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
+	hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
 		if (fq->id == fq_in->id && 
 		    !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
 		    !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
@@ -340,10 +329,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
 		atomic_inc(&fq->refcnt);
 
 	atomic_inc(&fq->refcnt);
-	if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
-		fq->next->pprev = &fq->next;
-	nf_ct_frag6_hash[hash] = fq;
-	fq->pprev = &nf_ct_frag6_hash[hash];
+	hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]);
 	INIT_LIST_HEAD(&fq->lru_list);
 	list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
 	nf_ct_frag6_nqueues++;
@@ -371,7 +357,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src,				   struct
 	init_timer(&fq->timer);
 	fq->timer.function = nf_ct_frag6_expire;
 	fq->timer.data = (long) fq;
-	fq->lock = SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
 
 	return nf_ct_frag6_intern(hash, fq);
@@ -384,10 +370,11 @@ static __inline__ struct nf_ct_frag6_queue *
 fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
 {
 	struct nf_ct_frag6_queue *fq;
+	struct hlist_node *n;
 	unsigned int hash = ip6qhashfn(id, src, dst);
 
 	read_lock(&nf_ct_frag6_lock);
-	for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
+	hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
 		if (fq->id == id && 
 		    !ipv6_addr_cmp(src, &fq->saddr) &&
 		    !ipv6_addr_cmp(dst, &fq->daddr)) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2a..66f1d12ea57 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 #include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
+	skb_kill_datagram(sk, skb, flags);
 
 	/* Error for blocking case is chosen to masquerade
 	   as some normal condition.
 	 */
 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
 	/* FIXME: increment a raw6 drops counter here */
-	goto out_free;
+	goto out;
 }
 
 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5d316cb72ec..15e1456b3f1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -581,7 +581,6 @@ err:
  *	the last and the first frames arrived and all the bits are here.
  */
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
-			  unsigned int *nhoffp,
 			  struct net_device *dev)
 {
 	struct sk_buff *fp, *head = fq->fragments;
@@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	head->dev = dev;
 	skb_set_timestamp(head, &fq->stamp);
 	head->nh.ipv6h->payload_len = htons(payload_len);
+	IP6CB(head)->nhoff = nhoff;
 
 	*skb_in = head;
 
@@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
 	fq->fragments = NULL;
-	*nhoffp = nhoff;
 	return 1;
 
 out_oversize:
@@ -678,7 +677,7 @@ out_fail:
 	return -1;
 }
 
-static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_frag_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp; 
 	struct net_device *dev = skb->dev;
@@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 		skb->h.raw += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
 
-		*nhoffp = (u8*)fhdr - skb->nh.raw;
+		IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
 		return 1;
 	}
 
@@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 
 		spin_lock(&fq->lock);
 
-		ip6_frag_queue(fq, skb, fhdr, *nhoffp);
+		ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
 		if (fq->last_in == (FIRST_IN|LAST_IN) &&
 		    fq->meat == fq->len)
-			ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
+			ret = ip6_frag_reasm(fq, skbp, dev);
 
 		spin_unlock(&fq->lock);
 		fq_put(fq, NULL);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a7a537b5059..e0d3ad02ffb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -24,6 +24,7 @@
  *		reachable.  otherwise, round-robin the list.
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -413,11 +414,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
 	rt = ip6_rt_copy(ort);
 
 	if (rt) {
-		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
-
-		if (!(rt->rt6i_flags&RTF_GATEWAY))
+		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
+			if (rt->rt6i_dst.plen != 128 &&
+			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
+				rt->rt6i_flags |= RTF_ANYCAST;
 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+		}
 
+		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
 		rt->u.dst.flags |= DST_HOST;
@@ -829,7 +833,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	}
 
 	rt->u.dst.obsolete = -1;
-	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
+	rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
 	if (nlh && (r = NLMSG_DATA(nlh))) {
 		rt->rt6i_protocol = r->rtm_protocol;
 	} else {
@@ -1413,7 +1417,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->u.dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-	if (!anycast)
+	if (anycast)
+		rt->rt6i_flags |= RTF_ANYCAST;
+	else
 		rt->rt6i_flags |= RTF_LOCAL;
 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (rt->rt6i_nexthop == NULL) {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c3123c9e1a8..c2d3e17beae 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -20,6 +20,7 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -33,6 +34,7 @@
 #include <asm/uaccess.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -183,7 +185,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
 	if (dev == NULL)
 		return NULL;
 
-	nt = dev->priv;
+	nt = netdev_priv(dev);
 	dev->init = ipip6_tunnel_init;
 	nt->parms = *parms;
 
@@ -209,7 +211,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
 		write_unlock_bh(&ipip6_lock);
 		dev_put(dev);
 	} else {
-		ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv);
+		ipip6_tunnel_unlink(netdev_priv(dev));
 		dev_put(dev);
 	}
 }
@@ -345,7 +347,7 @@ out:
 		rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
 
 		if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
-			struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv;
+			struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
 			if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
 				rel_type = ICMPV6_DEST_UNREACH;
 				rel_code = ICMPV6_ADDR_UNREACH;
@@ -380,6 +382,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = skb->data;
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
 		tunnel->stat.rx_packets++;
@@ -422,7 +425,7 @@ static inline u32 try_6to4(struct in6_addr *v6dst)
 
 static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	struct ipv6hdr *iph6 = skb->nh.ipv6h;
@@ -551,6 +554,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags = 0;
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
@@ -607,7 +611,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			t = ipip6_tunnel_locate(&p, 0);
 		}
 		if (t == NULL)
-			t = (struct ip_tunnel*)dev->priv;
+			t = netdev_priv(dev);
 		memcpy(&p, &t->parms, sizeof(p));
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 			err = -EFAULT;
@@ -644,7 +648,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 					err = -EINVAL;
 					break;
 				}
-				t = (struct ip_tunnel*)dev->priv;
+				t = netdev_priv(dev);
 				ipip6_tunnel_unlink(t);
 				t->parms.iph.saddr = p.iph.saddr;
 				t->parms.iph.daddr = p.iph.daddr;
@@ -680,7 +684,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t == ipip6_fb_tunnel_dev->priv)
+			if (t == netdev_priv(ipip6_fb_tunnel_dev))
 				goto done;
 			dev = t->dev;
 		}
@@ -697,7 +701,7 @@ done:
 
 static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
 {
-	return &(((struct ip_tunnel*)dev->priv)->stat);
+	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 }
 
 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -720,7 +724,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_SIT;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -732,7 +736,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
 
-	tunnel = (struct ip_tunnel*)dev->priv;
+	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
@@ -772,7 +776,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 
 static int __init ipip6_fb_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 62c0e5bd931..66d04004afd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -48,6 +48,7 @@
 #include <net/tcp.h>
 #include <net/ndisc.h>
 #include <net/inet6_hashtables.h>
+#include <net/inet6_connection_sock.h>
 #include <net/ipv6.h>
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
@@ -59,232 +60,45 @@
 #include <net/addrconf.h>
 #include <net/snmp.h>
 #include <net/dsfield.h>
+#include <net/timewait_sock.h>
 
 #include <asm/uaccess.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
+/* Socket used for sending RSTs and ACKs */
+static struct socket *tcp6_socket;
+
 static void	tcp_v6_send_reset(struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+static void	tcp_v6_send_check(struct sock *sk, int len, 
 				  struct sk_buff *skb);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
-
-static struct tcp_func ipv6_mapped;
-static struct tcp_func ipv6_specific;
-
-static inline int tcp_v6_bind_conflict(const struct sock *sk,
-				       const struct inet_bind_bucket *tb)
-{
-	const struct sock *sk2;
-	const struct hlist_node *node;
-
-	/* We must walk the whole port owner list in this case. -DaveM */
-	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
-	}
 
-	return node != NULL;
-}
+static struct inet_connection_sock_af_ops ipv6_mapped;
+static struct inet_connection_sock_af_ops ipv6_specific;
 
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- * But it doesn't matter, the recalculation is in the rarest path
- * this function ever takes.
- */
 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	struct inet_bind_hashbucket *head;
-	struct inet_bind_bucket *tb;
-	struct hlist_node *node;
-	int ret;
-
-	local_bh_disable();
-	if (snum == 0) {
-		int low = sysctl_local_port_range[0];
-		int high = sysctl_local_port_range[1];
-		int remaining = (high - low) + 1;
-		int rover = net_random() % (high - low) + low;
-
-		do {
-			head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
-			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
-				if (tb->port == rover)
-					goto next;
-			break;
-		next:
-			spin_unlock(&head->lock);
-			if (++rover > high)
-				rover = low;
-		} while (--remaining > 0);
-
-		/* Exhausted local port range during search?  It is not
-		 * possible for us to be holding one of the bind hash
-		 * locks if this test triggers, because if 'remaining'
-		 * drops to zero, we broke out of the do/while loop at
-		 * the top level, not from the 'break;' statement.
-		 */
-		ret = 1;
-		if (unlikely(remaining <= 0))
-			goto fail;
-
-		/* OK, here is the one we will use. */
-		snum = rover;
-	} else {
-		head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
-		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
-			if (tb->port == snum)
-				goto tb_found;
-	}
-	tb = NULL;
-	goto tb_not_found;
-tb_found:
-	if (tb && !hlist_empty(&tb->owners)) {
-		if (tb->fastreuse > 0 && sk->sk_reuse &&
-		    sk->sk_state != TCP_LISTEN) {
-			goto success;
-		} else {
-			ret = 1;
-			if (tcp_v6_bind_conflict(sk, tb))
-				goto fail_unlock;
-		}
-	}
-tb_not_found:
-	ret = 1;
-	if (tb == NULL) {
-	       	tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
-		if (tb == NULL)
-			goto fail_unlock;
-	}
-	if (hlist_empty(&tb->owners)) {
-		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-			tb->fastreuse = 1;
-		else
-			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
-
-success:
-	if (!inet_csk(sk)->icsk_bind_hash)
-		inet_bind_hash(sk, tb, snum);
-	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
-	ret = 0;
-
-fail_unlock:
-	spin_unlock(&head->lock);
-fail:
-	local_bh_enable();
-	return ret;
+	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+				 inet6_csk_bind_conflict);
 }
 
-static __inline__ void __tcp_v6_hash(struct sock *sk)
-{
-	struct hlist_head *list;
-	rwlock_t *lock;
-
-	BUG_TRAP(sk_unhashed(sk));
-
-	if (sk->sk_state == TCP_LISTEN) {
-		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &tcp_hashinfo.lhash_lock;
-		inet_listen_wlock(&tcp_hashinfo);
-	} else {
-		unsigned int hash;
-		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
-		hash &= (tcp_hashinfo.ehash_size - 1);
-		list = &tcp_hashinfo.ehash[hash].chain;
-		lock = &tcp_hashinfo.ehash[hash].lock;
-		write_lock(lock);
-	}
-
-	__sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(lock);
-}
-
-
 static void tcp_v6_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
-
-		if (tp->af_specific == &ipv6_mapped) {
+		if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
 			tcp_prot.hash(sk);
 			return;
 		}
 		local_bh_disable();
-		__tcp_v6_hash(sk);
+		__inet6_hash(&tcp_hashinfo, sk);
 		local_bh_enable();
 	}
 }
 
-/*
- * Open request hash tables.
- */
-
-static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
-{
-	u32 a, b, c;
-
-	a = raddr->s6_addr32[0];
-	b = raddr->s6_addr32[1];
-	c = raddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += rnd;
-	__jhash_mix(a, b, c);
-
-	a += raddr->s6_addr32[3];
-	b += (u32) rport;
-	__jhash_mix(a, b, c);
-
-	return c & (TCP_SYNQ_HSIZE - 1);
-}
-
-static struct request_sock *tcp_v6_search_req(const struct sock *sk,
-					      struct request_sock ***prevp,
-					      __u16 rport,
-					      struct in6_addr *raddr,
-					      struct in6_addr *laddr,
-					      int iif)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req, **prev;  
-
-	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
-	     (req = *prev) != NULL;
-	     prev = &req->dl_next) {
-		const struct tcp6_request_sock *treq = tcp6_rsk(req);
-
-		if (inet_rsk(req)->rmt_port == rport &&
-		    req->rsk_ops->family == AF_INET6 &&
-		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
-		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
-		    (!treq->iif || treq->iif == iif)) {
-			BUG_TRAP(req->sk == NULL);
-			*prevp = prev;
-			return req;
-		}
-	}
-
-	return NULL;
-}
-
 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 				   struct in6_addr *saddr, 
 				   struct in6_addr *daddr, 
@@ -308,195 +122,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
-
-		tw = inet_twsk(sk2);
-
-		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
-		   sk2->sk_family		== PF_INET6	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp ||
-			     (sysctl_tcp_tw_reuse &&
-			      xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
-				/* See comment in tcp_ipv4.c */
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (!tp->write_seq)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
-				goto unique;
-			} else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-unique:
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sk->sk_hash = hash;
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-static inline u32 tcpv6_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-
-	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					   np->daddr.s6_addr32,
-					   inet->dport);
-}
-
-static int tcp_v6_hash_connect(struct sock *sk)
-{
-	unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + tcpv6_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v6_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__tcp_v6_hash(sk);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__tcp_v6_hash(sk);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v6_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
 			  int addr_len)
 {
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
-	struct inet_sock *inet = inet_sk(sk);
+ 	struct inet_sock *inet = inet_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -571,7 +202,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	 */
 
 	if (addr_type == IPV6_ADDR_MAPPED) {
-		u32 exthdrlen = tp->ext_header_len;
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -583,14 +214,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-		tp->af_specific = &ipv6_mapped;
+		icsk->icsk_af_ops = &ipv6_mapped;
 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 
 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 
 		if (err) {
-			tp->ext_header_len = exthdrlen;
-			tp->af_specific = &ipv6_specific;
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &ipv6_specific;
 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 			goto failure;
 		} else {
@@ -643,16 +274,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 
-	tp->ext_header_len = 0;
+	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
-		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
 	inet->dport = usin->sin6_port;
 
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v6_hash_connect(sk);
+	err = inet6_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto late_failure;
 
@@ -758,7 +390,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
-		if (tp->pmtu_cookie > dst_mtu(dst)) {
+		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			tcp_sync_mss(sk, dst_mtu(dst));
 			tcp_simple_retransmit(sk);
 		} /* else let the usual retransmit timer handle it */
@@ -775,8 +407,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (sock_owned_by_user(sk))
 			goto out;
 
-		req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
-					&hdr->saddr, inet6_iif(skb));
+		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
+					   &hdr->saddr, inet6_iif(skb));
 		if (!req)
 			goto out;
 
@@ -822,7 +454,7 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 			      struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
@@ -888,8 +520,8 @@ done:
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
-	if (tcp6_rsk(req)->pktopts)
-		kfree_skb(tcp6_rsk(req)->pktopts);
+	if (inet6_rsk(req)->pktopts)
+		kfree_skb(inet6_rsk(req)->pktopts);
 }
 
 static struct request_sock_ops tcp6_request_sock_ops = {
@@ -901,26 +533,15 @@ static struct request_sock_ops tcp6_request_sock_ops = {
 	.send_reset	=	tcp_v6_send_reset
 };
 
-static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct inet6_skb_parm *opt = IP6CB(skb);
-
-	if (np->rxopt.all) {
-		if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
-		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
-		    (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
-		    ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
-			return 1;
-	}
-	return 0;
-}
-
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
 
-static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
-			      struct sk_buff *skb)
+static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
@@ -992,13 +613,12 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 
-		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
+		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
+			ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
+			TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+			TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
 			return;
-
-		ip6_xmit(NULL, buff, &fl, NULL, 0);
-		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-		TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
-		return;
+		}
 	}
 
 	kfree_skb(buff);
@@ -1057,11 +677,11 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	fl.fl_ip_sport = t1->source;
 
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
-		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
+		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
+			ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
+			TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
 			return;
-		ip6_xmit(NULL, buff, &fl, NULL, 0);
-		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-		return;
+		}
 	}
 
 	kfree_skb(buff);
@@ -1092,8 +712,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
-	req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
-				&skb->nh.ipv6h->daddr, inet6_iif(skb));
+	req = inet6_csk_search_req(sk, &prev, th->source,
+				   &skb->nh.ipv6h->saddr,
+				   &skb->nh.ipv6h->daddr, inet6_iif(skb));
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
@@ -1117,23 +738,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 	return sk;
 }
 
-static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-
-	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
-	inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
-}
-
-
 /* FIXME: this is substantially similar to the ipv4 code.
  * Can some kind of merge be done? -- erics
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp6_request_sock *treq;
+	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_options_received tmp_opt;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1158,7 +768,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
@@ -1171,7 +781,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	treq = tcp6_rsk(req);
+	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
 	TCP_ECN_create_request(req, skb->h.th);
@@ -1197,8 +807,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (tcp_v6_send_synack(sk, req, NULL))
 		goto drop;
 
-	tcp_v6_synq_add(sk, req);
-
+	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	return 0;
 
 drop:
@@ -1213,7 +822,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct request_sock *req,
 					  struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1248,7 +857,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
-		newtp->af_specific = &ipv6_mapped;
+		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
@@ -1262,10 +871,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		/* It is tricky place. Until this moment IPv4 tcp
-		   worked with IPv6 af_tcp.af_specific.
+		   worked with IPv6 icsk.icsk_af_ops.
 		   Sync it now.
 		 */
-		tcp_sync_mss(newsk, newtp->pmtu_cookie);
+		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
 
 		return newsk;
 	}
@@ -1372,10 +981,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 			sock_kfree_s(sk, opt, opt->tot_len);
 	}
 
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newnp->opt)
-		newtp->ext_header_len = newnp->opt->opt_nflen +
-					newnp->opt->opt_flen;
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1383,7 +992,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
 
-	__tcp_v6_hash(newsk);
+	__inet6_hash(&tcp_hashinfo, newsk);
 	inet_inherit_port(&tcp_hashinfo, sk, newsk);
 
 	return newsk;
@@ -1547,7 +1156,7 @@ ipv6_pktoptions:
 	return 0;
 }
 
-static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int tcp_v6_rcv(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
 	struct tcphdr *th;	
@@ -1680,139 +1289,16 @@ do_time_wait:
 	goto discard_it;
 }
 
-static int tcp_v6_rebuild_header(struct sock *sk)
-{
-	int err;
-	struct dst_entry *dst;
-	struct ipv6_pinfo *np = inet6_sk(sk);
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
-		struct flowi fl;
-
-		memset(&fl, 0, sizeof(fl));
-		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-		fl.fl6_flowlabel = np->flow_label;
-		fl.oif = sk->sk_bound_dev_if;
-		fl.fl_ip_dport = inet->dport;
-		fl.fl_ip_sport = inet->sport;
-
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-
-		err = ip6_dst_lookup(sk, &dst, &fl);
-		if (err) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	return 0;
-}
-
-static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
-{
-	struct sock *sk = skb->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flowi fl;
-	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
-
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.fl6_flowlabel = np->flow_label;
-	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
-	fl.oif = sk->sk_bound_dev_if;
-	fl.fl_ip_sport = inet->sport;
-	fl.fl_ip_dport = inet->dport;
-
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		int err = ip6_dst_lookup(sk, &dst, &fl);
-
-		if (err) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	skb->dst = dst_clone(dst);
-
-	/* Restore final destination back after routing done */
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
-}
-
-static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
-
-	sin6->sin6_family = AF_INET6;
-	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
-	sin6->sin6_port	= inet_sk(sk)->dport;
-	/* We do not store received flowlabel for TCP */
-	sin6->sin6_flowinfo = 0;
-	sin6->sin6_scope_id = 0;
-	if (sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6->sin6_scope_id = sk->sk_bound_dev_if;
-}
-
 static int tcp_v6_remember_stamp(struct sock *sk)
 {
 	/* Alas, not yet... */
 	return 0;
 }
 
-static struct tcp_func ipv6_specific = {
-	.queue_xmit	=	tcp_v6_xmit,
+static struct inet_connection_sock_af_ops ipv6_specific = {
+	.queue_xmit	=	inet6_csk_xmit,
 	.send_check	=	tcp_v6_send_check,
-	.rebuild_header	=	tcp_v6_rebuild_header,
+	.rebuild_header	=	inet6_sk_rebuild_header,
 	.conn_request	=	tcp_v6_conn_request,
 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
 	.remember_stamp	=	tcp_v6_remember_stamp,
@@ -1820,7 +1306,7 @@ static struct tcp_func ipv6_specific = {
 
 	.setsockopt	=	ipv6_setsockopt,
 	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
 
@@ -1828,7 +1314,7 @@ static struct tcp_func ipv6_specific = {
  *	TCP over IPv4 via INET6 API
  */
 
-static struct tcp_func ipv6_mapped = {
+static struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1839,7 +1325,7 @@ static struct tcp_func ipv6_mapped = {
 
 	.setsockopt	=	ipv6_setsockopt,
 	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
 
@@ -1878,8 +1364,9 @@ static int tcp_v6_init_sock(struct sock *sk)
 
 	sk->sk_state = TCP_CLOSE;
 
-	tp->af_specific = &ipv6_specific;
+	icsk->icsk_af_ops = &ipv6_specific;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
@@ -1901,14 +1388,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 static void get_openreq6(struct seq_file *seq, 
 			 struct sock *sk, struct request_sock *req, int i, int uid)
 {
-	struct in6_addr *dest, *src;
 	int ttd = req->expires - jiffies;
+	struct in6_addr *src = &inet6_rsk(req)->loc_addr;
+	struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	src = &tcp6_rsk(req)->loc_addr;
-	dest = &tcp6_rsk(req)->rmt_addr;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -1989,14 +1475,14 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+	struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	int ttd = tw->tw_ttd - jiffies;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	dest = &tcp6tw->tw_v6_daddr;
-	src  = &tcp6tw->tw_v6_rcv_saddr;
+	dest = &tw6->tw_v6_daddr;
+	src  = &tw6->tw_v6_rcv_saddr;
 	destp = ntohs(tw->tw_dport);
 	srcp  = ntohs(tw->tw_sport);
 
@@ -2094,7 +1580,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
+	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 };
 
@@ -2111,13 +1597,27 @@ static struct inet_protosw tcpv6_protosw = {
 	.ops		=	&inet6_stream_ops,
 	.capability	=	-1,
 	.no_check	=	0,
-	.flags		=	INET_PROTOSW_PERMANENT,
+	.flags		=	INET_PROTOSW_PERMANENT |
+				INET_PROTOSW_ICSK,
 };
 
 void __init tcpv6_init(void)
 {
+	int err;
+
 	/* register inet6 protocol */
 	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
 		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
 	inet6_register_protosw(&tcpv6_protosw);
+
+	err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_TCP, &tcp6_socket);
+	if (err < 0)
+		panic("Failed to create the TCPv6 control socket.\n");
+	tcp6_socket->sk->sk_allocation = GFP_ATOMIC;
+
+	/* Unhash it so that IP input processing does not even
+	 * see it, we do not wish this socket to see incoming
+	 * packets.
+	 */
+	tcp6_socket->sk->sk_prot->unhash(tcp6_socket->sk);
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb55..c47648892c0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/init.h>
+#include <linux/skbuff.h>
 #include <asm/uaccess.h>
 
 #include <net/sock.h>
@@ -300,20 +301,7 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (flags & MSG_DONTWAIT) {
 		UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
@@ -447,7 +435,7 @@ out:
 	read_unlock(&udp_hash_lock);
 }
 
-static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int udpv6_rcv(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
 	struct sock *sk;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 28c29d78338..1ca2da68ef6 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
@@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 		IP6_ECN_set_ce(inner_iph);
 }
 
-int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
+int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi)
 {
 	struct sk_buff *skb = *pskb;
 	int err;
@@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
 	int nexthdr;
 	unsigned int nhoff;
 
-	nhoff = *nhoffp;
+	nhoff = IP6CB(skb)->nhoff;
 	nexthdr = skb->nh.raw[nhoff];
 
 	seq = 0;
@@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
 	skb->sp->len += xfrm_nr;
 	skb->ip_summed = CHECKSUM_NONE;
 
+	nf_reset(skb);
+
 	if (decaps) {
 		if (!(skb->dev->flags&IFF_LOOPBACK)) {
 			dst_release(skb->dst);
@@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
 		netif_rx(skb);
 		return -1;
 	} else {
+#ifdef CONFIG_NETFILTER
+		skb->nh.ipv6h->payload_len = htons(skb->len);
+		__skb_push(skb, skb->data - skb->nh.raw);
+
+		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
+		        ip6_rcv_finish);
+		return -1;
+#else
 		return 1;
+#endif
 	}
 
 drop_unlock:
@@ -144,7 +157,7 @@ drop:
 
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
-int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+int xfrm6_rcv(struct sk_buff **pskb)
 {
-	return xfrm6_rcv_spi(pskb, nhoffp, 0);
+	return xfrm6_rcv_spi(pskb, 0);
 }
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6b9867717d1..80242172a5d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -9,9 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/ipv6.h>
@@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 	return ret;
 }
 
-int xfrm6_output(struct sk_buff *skb)
+static int xfrm6_output_one(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x = dst->xfrm;
@@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	spin_lock_bh(&x->lock);
-	err = xfrm_state_check(x, skb);
-	if (err)
-		goto error;
+	do {
+		spin_lock_bh(&x->lock);
+		err = xfrm_state_check(x, skb);
+		if (err)
+			goto error;
 
-	xfrm6_encap(skb);
+		xfrm6_encap(skb);
 
-	err = x->type->output(x, skb);
-	if (err)
-		goto error;
+		err = x->type->output(x, skb);
+		if (err)
+			goto error;
 
-	x->curlft.bytes += skb->len;
-	x->curlft.packets++;
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
 
-	spin_unlock_bh(&x->lock);
+		spin_unlock_bh(&x->lock);
 
-	skb->nh.raw = skb->data;
-	
-	if (!(skb->dst = dst_pop(dst))) {
-		err = -EHOSTUNREACH;
-		goto error_nolock;
-	}
-	err = NET_XMIT_BYPASS;
+		skb->nh.raw = skb->data;
+		
+		if (!(skb->dst = dst_pop(dst))) {
+			err = -EHOSTUNREACH;
+			goto error_nolock;
+		}
+		dst = skb->dst;
+		x = dst->xfrm;
+	} while (x && !x->props.mode);
+
+	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+	err = 0;
 
 out_exit:
 	return err;
@@ -142,3 +150,33 @@ error_nolock:
 	kfree_skb(skb);
 	goto out_exit;
 }
+
+static int xfrm6_output_finish(struct sk_buff *skb)
+{
+	int err;
+
+	while (likely((err = xfrm6_output_one(skb)) == 0)) {
+		nf_reset(skb);
+	
+		err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+			      skb->dst->dev, dst_output);
+		if (unlikely(err != 1))
+			break;
+
+		if (!skb->dst->xfrm)
+			return dst_output(skb);
+
+		err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+			      skb->dst->dev, xfrm6_output_finish);
+		if (unlikely(err != 1))
+			break;
+	}
+
+	return err;
+}
+
+int xfrm6_output(struct sk_buff *skb)
+{
+	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
+		       xfrm6_output_finish);
+}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cf1d91e74c8..69bd957380e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,6 +214,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_UDP:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
 			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
 				u16 *ports = (u16 *)exthdr;
 
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index bf0d0abc387..a5723024d3b 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -15,6 +15,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
 #include <net/ipv6.h>
+#include <net/addrconf.h>
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo;
 
@@ -41,6 +42,22 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
 		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+	if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
+		struct rt6_info *rt;
+		struct flowi fl_tunnel = {
+			.nl_u = {
+				.ip6_u = {
+					.daddr = *(struct in6_addr *)daddr,
+				}
+			}
+		};
+		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
+		                     &fl_tunnel, AF_INET6)) {
+			ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr,
+			               (struct in6_addr *)&x->props.saddr);
+			dst_release(&rt->u.dst);
+		}
+	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET6;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index fbef7826a74..8cfc58b96fc 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -259,8 +259,7 @@ try_next_2:;
 	spi = 0;
 	goto out;
 alloc_spi:
-	X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for "
-			      "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
+	X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for " NIP6_FMT "\n",
 			      __FUNCTION__, 
 			      NIP6(*(struct in6_addr *)saddr));
 	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
@@ -323,9 +322,8 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
 				  list_byaddr)
 	{
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
-			X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
-					      "for %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
-					      "found at %p\n",
+			X6TPRINTK3(KERN_DEBUG "%s(): x6spi object for " NIP6_FMT 
+					      " found at %p\n",
 				   __FUNCTION__, 
 				   NIP6(*(struct in6_addr *)saddr),
 				   x6spi);
@@ -397,7 +395,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
 
 EXPORT_SYMBOL(xfrm6_tunnel_deregister);
 
-static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int xfrm6_tunnel_rcv(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
 	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
@@ -405,11 +403,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 	u32 spi;
 
 	/* device-like_ip6ip6_handler() */
-	if (handler && handler->handler(pskb, nhoffp) == 0)
+	if (handler && handler->handler(pskb) == 0)
 		return 0;
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(pskb, nhoffp, spi);
+	return xfrm6_rcv_spi(pskb, spi);
 }
 
 static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 34b3bb86840..0fb513a34d1 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -29,6 +29,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
@@ -75,7 +76,7 @@ static struct datalink_proto *pEII_datalink;
 static struct datalink_proto *p8023_datalink;
 static struct datalink_proto *pSNAP_datalink;
 
-static struct proto_ops ipx_dgram_ops;
+static const struct proto_ops ipx_dgram_ops;
 
 LIST_HEAD(ipx_interfaces);
 DEFINE_SPINLOCK(ipx_interfaces_lock);
@@ -1884,7 +1885,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		rc = -EINVAL;
 		break;
 	default:
-		rc = dev_ioctl(cmd, argp);
+		rc = -ENOIOCTLCMD;
 		break;
 	}
 
@@ -1901,7 +1902,7 @@ static struct net_proto_family ipx_family_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
 	.family		= PF_IPX,
 	.owner		= THIS_MODULE,
 	.release	= ipx_release,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 6f92f9c6299..75944564866 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -43,6 +43,7 @@
  ********************************************************************/
 
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -62,12 +63,12 @@
 
 static int irda_create(struct socket *sock, int protocol);
 
-static struct proto_ops irda_stream_ops;
-static struct proto_ops irda_seqpacket_ops;
-static struct proto_ops irda_dgram_ops;
+static const struct proto_ops irda_stream_ops;
+static const struct proto_ops irda_seqpacket_ops;
+static const struct proto_ops irda_dgram_ops;
 
 #ifdef CONFIG_IRDA_ULTRA
-static struct proto_ops irda_ultra_ops;
+static const struct proto_ops irda_ultra_ops;
 #define ULTRA_MAX_DATA 382
 #endif /* CONFIG_IRDA_ULTRA */
 
@@ -1438,8 +1439,9 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 			/*
 			 *	POSIX 1003.1g mandates this order.
 			 */
-			if (sk->sk_err)
-				ret = sock_error(sk);
+			ret = sock_error(sk);
+			if (ret)
+				break;
 			else if (sk->sk_shutdown & RCV_SHUTDOWN)
 				;
 			else if (noblock)
@@ -1821,7 +1823,7 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return -EINVAL;
 	default:
 		IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__);
-		return dev_ioctl(cmd, (void __user *) arg);
+		return -ENOIOCTLCMD;
 	}
 
 	/*NOTREACHED*/
@@ -2463,7 +2465,7 @@ static struct net_proto_family irda_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2484,7 +2486,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2505,7 +2507,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2527,7 +2529,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
 };
 
 #ifdef CONFIG_IRDA_ULTRA
-static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 70543d89438..890bac0d4a5 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
+#include <linux/capability.h>
 #include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_arp.h>
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index b8bb78af8b8..254f9074690 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -364,7 +364,7 @@ static void iriap_disconnect_request(struct iriap_cb *self)
 /*
  * Function iriap_getvaluebyclass (addr, name, attr)
  *
- *    Retreive all values from attribute in all objects with given class
+ *    Retrieve all values from attribute in all objects with given class
  *    name
  */
 int iriap_getvaluebyclass_request(struct iriap_cb *self,
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
index 75f2666e863..c6d169fbdce 100644
--- a/net/irda/irias_object.c
+++ b/net/irda/irias_object.c
@@ -82,8 +82,7 @@ struct ias_object *irias_new_object( char *name, int id)
 
 	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
 
-	obj = (struct ias_object *) kmalloc(sizeof(struct ias_object),
-					    GFP_ATOMIC);
+	obj = kmalloc(sizeof(struct ias_object), GFP_ATOMIC);
 	if (obj == NULL) {
 		IRDA_WARNING("%s(), Unable to allocate object!\n",
 			     __FUNCTION__);
@@ -348,8 +347,7 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
 	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
 	IRDA_ASSERT(name != NULL, return;);
 
-	attrib = (struct ias_attrib *) kmalloc(sizeof(struct ias_attrib),
-					       GFP_ATOMIC);
+	attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
 	if (attrib == NULL) {
 		IRDA_WARNING("%s: Unable to allocate attribute!\n",
 			     __FUNCTION__);
@@ -385,8 +383,7 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
 	IRDA_ASSERT(name != NULL, return;);
 	IRDA_ASSERT(octets != NULL, return;);
 
-	attrib = (struct ias_attrib *) kmalloc(sizeof(struct ias_attrib),
-					       GFP_ATOMIC);
+	attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
 	if (attrib == NULL) {
 		IRDA_WARNING("%s: Unable to allocate attribute!\n",
 			     __FUNCTION__);
@@ -420,8 +417,7 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
 	IRDA_ASSERT(name != NULL, return;);
 	IRDA_ASSERT(value != NULL, return;);
 
-	attrib = (struct ias_attrib *) kmalloc(sizeof( struct ias_attrib),
-					       GFP_ATOMIC);
+	attrib = kmalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
 	if (attrib == NULL) {
 		IRDA_WARNING("%s: Unable to allocate attribute!\n",
 			     __FUNCTION__);
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index b391cb3893d..e4fe1e80029 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -248,6 +248,7 @@
 #include <linux/netdevice.h>
 #include <linux/miscdevice.h>
 #include <linux/poll.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/ctype.h>	/* isspace() */
 #include <asm/uaccess.h>
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 39031684b65..43f1ce74187 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/socket.h>
@@ -113,7 +114,7 @@ static __inline__ void pfkey_unlock_table(void)
 }
 
 
-static struct proto_ops pfkey_ops;
+static const struct proto_ops pfkey_ops;
 
 static void pfkey_insert(struct sock *sk)
 {
@@ -297,8 +298,7 @@ static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk)
 		err = EINTR;
 	if (err >= 512)
 		err = EINVAL;
-	if (err <= 0 || err >= 256)
-		BUG();
+	BUG_ON(err <= 0 || err >= 256);
 
 	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
 	pfkey_hdr_dup(hdr, orig);
@@ -336,6 +336,7 @@ static u8 sadb_ext_min_len[] = {
 	[SADB_X_EXT_NAT_T_SPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
+	[SADB_X_EXT_SEC_CTX]		= (u8) sizeof(struct sadb_x_sec_ctx),
 };
 
 /* Verify sadb_address_{len,prefixlen} against sa_family.  */
@@ -383,6 +384,55 @@ static int verify_address_len(void *p)
 	return 0;
 }
 
+static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx)
+{
+	int len = 0;
+
+	len += sizeof(struct sadb_x_sec_ctx);
+	len += sec_ctx->sadb_x_ctx_len;
+	len += sizeof(uint64_t) - 1;
+	len /= sizeof(uint64_t);
+
+	return len;
+}
+
+static inline int verify_sec_ctx_len(void *p)
+{
+	struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p;
+	int len;
+
+	if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE)
+		return -EINVAL;
+
+	len = pfkey_sec_ctx_len(sec_ctx);
+
+	if (sec_ctx->sadb_x_sec_len != len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx)
+{
+	struct xfrm_user_sec_ctx *uctx = NULL;
+	int ctx_size = sec_ctx->sadb_x_ctx_len;
+
+	uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL);
+
+	if (!uctx)
+		return NULL;
+
+	uctx->len = pfkey_sec_ctx_len(sec_ctx);
+	uctx->exttype = sec_ctx->sadb_x_sec_exttype;
+	uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi;
+	uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg;
+	uctx->ctx_len = sec_ctx->sadb_x_ctx_len;
+	memcpy(uctx + 1, sec_ctx + 1,
+	       uctx->ctx_len);
+
+	return uctx;
+}
+
 static int present_and_same_family(struct sadb_address *src,
 				   struct sadb_address *dst)
 {
@@ -438,6 +488,10 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
 				if (verify_address_len(p))
 					return -EINVAL;
 			}				
+			if (ext_type == SADB_X_EXT_SEC_CTX) {
+				if (verify_sec_ctx_len(p))
+					return -EINVAL;
+			}
 			ext_hdrs[ext_type-1] = p;
 		}
 		p   += ext_len;
@@ -586,6 +640,9 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 	struct sadb_key *key;
 	struct sadb_x_sa2 *sa2;
 	struct sockaddr_in *sin;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
+	int ctx_size = 0;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6 *sin6;
 #endif
@@ -609,6 +666,12 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 			sizeof(struct sadb_address)*2 + 
 				sockaddr_size*2 +
 					sizeof(struct sadb_x_sa2);
+
+	if ((xfrm_ctx = x->security)) {
+		ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+		size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
+	}
+
 	/* identity & sensitivity */
 
 	if ((x->props.family == AF_INET &&
@@ -899,6 +962,20 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 		n_port->sadb_x_nat_t_port_reserved = 0;
 	}
 
+	/* security context */
+	if (xfrm_ctx) {
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx->sadb_x_sec_len =
+		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	return skb;
 }
 
@@ -909,6 +986,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 	struct sadb_lifetime *lifetime;
 	struct sadb_sa *sa;
 	struct sadb_key *key;
+	struct sadb_x_sec_ctx *sec_ctx;
 	uint16_t proto;
 	int err;
 	
@@ -993,6 +1071,21 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 		x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
 		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
 	}
+
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx)
+			goto out;
+
+		err = security_xfrm_state_alloc(x, uctx);
+		kfree(uctx);
+
+		if (err)
+			goto out;
+	}
+
 	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
@@ -1720,6 +1813,18 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
 	return 0;
 }
 
+static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
+{
+  struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+
+	if (xfrm_ctx) {
+		int len = sizeof(struct sadb_x_sec_ctx);
+		len += xfrm_ctx->ctx_len;
+		return PFKEY_ALIGN8(len);
+	}
+	return 0;
+}
+
 static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 {
 	int sockaddr_size = pfkey_sockaddr_size(xp->family);
@@ -1733,7 +1838,8 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 		(sockaddr_size * 2) +
 		sizeof(struct sadb_x_policy) +
 		(xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
-				(socklen * 2)));
+				(socklen * 2))) +
+		pfkey_xfrm_policy2sec_ctx_size(xp);
 }
 
 static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
@@ -1757,6 +1863,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 	struct sadb_lifetime *lifetime;
 	struct sadb_x_policy *pol;
 	struct sockaddr_in   *sin;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6  *sin6;
 #endif
@@ -1941,6 +2049,21 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 			}
 		}
 	}
+
+	/* security context */
+	if ((xfrm_ctx = xp->security)) {
+		int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp);
+
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size);
+		sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	hdr->sadb_msg_len = size / sizeof(uint64_t);
 	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
 }
@@ -1976,12 +2099,13 @@ out:
 
 static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
 {
-	int err;
+	int err = 0;
 	struct sadb_lifetime *lifetime;
 	struct sadb_address *sa;
 	struct sadb_x_policy *pol;
 	struct xfrm_policy *xp;
 	struct km_event c;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2028,6 +2152,22 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if (xp->selector.dport)
 		xp->selector.dport_mask = ~0;
 
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx) {
+			err = -ENOBUFS;
+			goto out;
+		}
+
+		err = security_xfrm_policy_alloc(xp, uctx);
+		kfree(uctx);
+
+		if (err)
+			goto out;
+	}
+
 	xp->lft.soft_byte_limit = XFRM_INF;
 	xp->lft.hard_byte_limit = XFRM_INF;
 	xp->lft.soft_packet_limit = XFRM_INF;
@@ -2051,10 +2191,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 
 	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
 				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
-	if (err) {
-		kfree(xp);
-		return err;
-	}
+
+	if (err)
+		goto out;
 
 	if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
 		c.event = XFRM_MSG_UPDPOLICY;
@@ -2069,6 +2208,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 
 out:
+	security_xfrm_policy_free(xp);
 	kfree(xp);
 	return err;
 }
@@ -2078,9 +2218,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	int err;
 	struct sadb_address *sa;
 	struct sadb_x_policy *pol;
-	struct xfrm_policy *xp;
+	struct xfrm_policy *xp, tmp;
 	struct xfrm_selector sel;
 	struct km_event c;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2109,7 +2250,24 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	if (sel.dport)
 		sel.dport_mask = ~0;
 
-	xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1);
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	memset(&tmp, 0, sizeof(struct xfrm_policy));
+
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx)
+			return -ENOMEM;
+
+		err = security_xfrm_policy_alloc(&tmp, uctx);
+		kfree(uctx);
+
+		if (err)
+			return err;
+	}
+
+	xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
+	security_xfrm_policy_free(&tmp);
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -2660,6 +2818,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 {
 	struct xfrm_policy *xp;
 	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	switch (family) {
 	case AF_INET:
@@ -2709,10 +2868,32 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 	    (*dir = parse_ipsecrequests(xp, pol)) < 0)
 		goto out;
 
+	/* security context too */
+	if (len >= (pol->sadb_x_policy_len*8 +
+	    sizeof(struct sadb_x_sec_ctx))) {
+		char *p = (char *)pol;
+		struct xfrm_user_sec_ctx *uctx;
+
+		p += pol->sadb_x_policy_len*8;
+		sec_ctx = (struct sadb_x_sec_ctx *)p;
+		if (len < pol->sadb_x_policy_len*8 +
+		    sec_ctx->sadb_x_sec_len)
+			goto out;
+		if ((*dir = verify_sec_ctx_len(p)))
+			goto out;
+		uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+		*dir = security_xfrm_policy_alloc(xp, uctx);
+		kfree(uctx);
+
+		if (*dir)
+			goto out;
+	}
+
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
 
 out:
+	security_xfrm_policy_free(xp);
 	kfree(xp);
 	return NULL;
 }
@@ -2946,7 +3127,7 @@ out:
 	return err;
 }
 
-static struct proto_ops pfkey_ops = {
+static const struct proto_ops pfkey_ops = {
 	.family		=	PF_KEY,
 	.owner		=	THIS_MODULE,
 	/* Operations that make no sense on pfkey sockets. */
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c3f0b078345..8171c53bc0e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -36,7 +36,7 @@
 static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
 static u16 llc_ui_sap_link_no_max[256];
 static struct sockaddr_llc llc_ui_addrnull;
-static struct proto_ops llc_ui_ops;
+static const struct proto_ops llc_ui_ops;
 
 static int llc_ui_wait_for_conn(struct sock *sk, long timeout);
 static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
@@ -566,10 +566,9 @@ static int llc_wait_data(struct sock *sk, long timeo)
 		/*
 		 * POSIX 1003.1g mandates this order.
 		 */
-		if (sk->sk_err) {
-			rc = sock_error(sk);
+		rc = sock_error(sk);
+		if (rc)
 			break;
-		}
 		rc = 0;
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			break;
@@ -960,7 +959,7 @@ out:
 static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
 			unsigned long arg)
 {
-	return dev_ioctl(cmd, (void __user *)arg);
+	return -ENOIOCTLCMD;
 }
 
 /**
@@ -1099,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-static struct proto_ops llc_ui_ops = {
+static const struct proto_ops llc_ui_ops = {
 	.family	     = PF_LLC,
 	.owner       = THIS_MODULE,
 	.release     = llc_ui_release,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a84f9221e5f..99c0a0fa4a9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -61,8 +61,8 @@ config NF_CONNTRACK_MARK
 	  instead of the individual packets.
 
 config NF_CONNTRACK_EVENTS
-	bool "Connection tracking events"
-	depends on NF_CONNTRACK
+	bool "Connection tracking events (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  provide a notifier chain that can be used by other kernel code
@@ -95,4 +95,269 @@ config NF_CONNTRACK_FTP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CT_NETLINK
+	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
+	depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+	help
+	  This option enables support for a netlink-based userspace interface
+
 endmenu
+
+config NETFILTER_XTABLES
+	tristate "Netfilter Xtables support (required for ip_tables)"
+	help
+	  This is required if you intend to use any of ip_tables,
+	  ip6_tables or arp_tables.
+
+# alphabetically ordered list of targets
+
+config NETFILTER_XT_TARGET_CLASSIFY
+	tristate '"CLASSIFY" target support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `CLASSIFY' target, which enables the user to set
+	  the priority of a packet. Some qdiscs can use this value for
+	  classification, among these are:
+
+  	  atm, cbq, dsmark, pfifo_fast, htb, prio
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_CONNMARK
+	tristate  '"CONNMARK" target support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
+	help
+	  This option adds a `CONNMARK' target, which allows one to manipulate
+	  the connection mark value.  Similar to the MARK target, but
+	  affects the connection mark value rather than the packet mark value.
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  The module will be called
+	  ipt_CONNMARK.o.  If unsure, say `N'.
+
+config NETFILTER_XT_TARGET_MARK
+	tristate '"MARK" target support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `MARK' target, which allows you to create rules
+	  in the `mangle' table which alter the netfilter mark (nfmark) field
+	  associated with the packet prior to routing. This can change
+	  the routing method (see `Use netfilter MARK value as routing
+	  key') and can also be used by other subsystems to change their
+	  behavior.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_NFQUEUE
+	tristate '"NFQUEUE" target Support'
+	depends on NETFILTER_XTABLES
+	help
+	  This Target replaced the old obsolete QUEUE target.
+
+	  As opposed to QUEUE, it supports 65535 different queues,
+	  not just one.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_NOTRACK
+	tristate  '"NOTRACK" target support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	help
+	  The NOTRACK target allows a select rule to specify
+	  which packets *not* to enter the conntrack/NAT
+	  subsystem with all the consequences (no ICMP error tracking,
+	  no protocol helpers for the selected packets).
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_COMMENT
+	tristate  '"comment" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `comment' dummy-match, which allows you to put
+	  comments in your iptables ruleset.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNBYTES
+	tristate  '"connbytes" per-connection counter match support'
+	depends on NETFILTER_XTABLES
+	depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || NF_CT_ACCT
+	help
+	  This option adds a `connbytes' match, which allows you to match the
+	  number of bytes and/or packets for each direction within a connection.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNMARK
+	tristate  '"connmark" connection mark match support'
+	depends on NETFILTER_XTABLES
+	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || NF_CONNTRACK_MARK
+	help
+	  This option adds a `connmark' match, which allows you to match the
+	  connection mark value previously set for the session by `CONNMARK'. 
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  The module will be called
+	  ipt_connmark.o.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNTRACK
+	tristate '"conntrack" connection tracking match support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	help
+	  This is a general conntrack match module, a superset of the state match.
+
+	  It allows matching on additional conntrack information, which is
+	  useful in complex configurations, such as NAT gateways with multiple
+	  internet links or tunnels.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_DCCP
+	tristate  '"DCCP" protocol match support'
+	depends on NETFILTER_XTABLES
+	help
+	  With this option enabled, you will be able to use the iptables
+	  `dccp' match in order to match on DCCP source/destination ports
+	  and DCCP flags.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_HELPER
+	tristate '"helper" match support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	help
+	  Helper matching allows you to match packets in dynamic connections
+	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config NETFILTER_XT_MATCH_LENGTH
+	tristate '"length" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option allows you to match the length of a packet against a
+	  specific value or range of values.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_LIMIT
+	tristate '"limit" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  limit matching allows you to control the rate at which a rule can be
+	  matched: mainly useful in combination with the LOG target ("LOG
+	  target support", below) and to avoid some Denial of Service attacks.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_MAC
+	tristate '"mac" address match support'
+	depends on NETFILTER_XTABLES
+	help
+	  MAC matching allows you to match packets based on the source
+	  Ethernet address of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_MARK
+	tristate '"mark" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  Netfilter mark matching allows you to match packets based on the
+	  `nfmark' value in the packet.  This can be set by the MARK target
+	  (see below).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_PHYSDEV
+	tristate '"physdev" match support'
+	depends on NETFILTER_XTABLES && BRIDGE_NETFILTER
+	help
+	  Physdev packet matching matches against the physical bridge ports
+	  the IP packet arrived on or will leave by.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_PKTTYPE
+	tristate '"pkttype" packet type match support'
+	depends on NETFILTER_XTABLES
+	help
+	  Packet type matching allows you to match a packet by
+	  its "class", eg. BROADCAST, MULTICAST, ...
+
+	  Typical usage:
+	  iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_REALM
+	tristate  '"realm" match support'
+	depends on NETFILTER_XTABLES
+	select NET_CLS_ROUTE
+	help
+	  This option adds a `realm' match, which allows you to use the realm
+	  key from the routing subsystem inside iptables.
+	
+	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
+	  in tc world.
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_SCTP
+	tristate  '"sctp" protocol match support'
+	depends on NETFILTER_XTABLES
+	help
+	  With this option enabled, you will be able to use the 
+	  `sctp' match in order to match on SCTP source/destination ports
+	  and SCTP chunk types.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_STATE
+	tristate '"state" match support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	help
+	  Connection state matching allows you to match packets based on their
+	  relationship to a tracked connection (ie. previous packets).  This
+	  is a powerful tool for packet classification.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_STRING
+	tristate  '"string" match support'
+	depends on NETFILTER_XTABLES
+	select TEXTSEARCH
+	select TEXTSEARCH_KMP
+	select TEXTSEARCH_BM
+	select TEXTSEARCH_FSM
+	help
+	  This option adds a `string' match, which allows you to look for
+	  pattern matchings in packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_TCPMSS
+	tristate '"tcpmss" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `tcpmss' match, which allows you to examine the
+	  MSS value of TCP SYN packets, which control the maximum packet size
+	  for that connection.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 55f019ad2c0..746172ebc91 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,4 +1,5 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+nf_conntrack-objs	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -6,10 +7,43 @@ obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 
-nf_conntrack-objs	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
-
+# connection tracking
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
-obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 
 # SCTP protocol connection tracking
 obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+
+# netlink interface for nf_conntrack
+obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
+
+# connection tracking helpers
+obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
+
+# generic X tables 
+obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+
+# targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
+
+# matches
+obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1da678303d7..62bb509f05d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -82,6 +82,8 @@ unsigned int nf_ct_log_invalid;
 static LIST_HEAD(unconfirmed);
 static int nf_conntrack_vmalloc;
 
+static unsigned int nf_conntrack_next_id = 1;
+static unsigned int nf_conntrack_expect_next_id = 1;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 struct notifier_block *nf_conntrack_chain;
 struct notifier_block *nf_conntrack_expect_chain;
@@ -184,7 +186,7 @@ DECLARE_MUTEX(nf_ct_cache_mutex);
 
 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
 struct nf_conntrack_protocol *
-nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
+__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
 {
 	if (unlikely(nf_ct_protos[l3proto] == NULL))
 		return &nf_conntrack_generic_protocol;
@@ -192,6 +194,50 @@ nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
 	return nf_ct_protos[l3proto][protocol];
 }
 
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct nf_conntrack_protocol *
+nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
+{
+	struct nf_conntrack_protocol *p;
+
+	preempt_disable();
+	p = __nf_ct_proto_find(l3proto, protocol);
+	if (p) {
+		if (!try_module_get(p->me))
+			p = &nf_conntrack_generic_protocol;
+	}
+	preempt_enable();
+	
+	return p;
+}
+
+void nf_ct_proto_put(struct nf_conntrack_protocol *p)
+{
+	module_put(p->me);
+}
+
+struct nf_conntrack_l3proto *
+nf_ct_l3proto_find_get(u_int16_t l3proto)
+{
+	struct nf_conntrack_l3proto *p;
+
+	preempt_disable();
+	p = __nf_ct_l3proto_find(l3proto);
+	if (p) {
+		if (!try_module_get(p->me))
+			p = &nf_conntrack_generic_l3proto;
+	}
+	preempt_enable();
+
+	return p;
+}
+
+void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
+{
+	module_put(p->me);
+}
+
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -384,7 +430,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 }
 
 /* nf_conntrack_expect helper functions */
-static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 {
 	ASSERT_WRITE_LOCK(&nf_conntrack_lock);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
@@ -404,6 +450,33 @@ static void expectation_timed_out(unsigned long ul_expect)
 	nf_conntrack_expect_put(exp);
 }
 
+struct nf_conntrack_expect *
+__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+	
+	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+			atomic_inc(&i->use);
+			return i;
+		}
+	}
+	return NULL;
+}
+
+/* Just find a expectation corresponding to a tuple. */
+struct nf_conntrack_expect *
+nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+	
+	read_lock_bh(&nf_conntrack_lock);
+	i = __nf_conntrack_expect_find(tuple);
+	read_unlock_bh(&nf_conntrack_lock);
+
+	return i;
+}
+
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 static struct nf_conntrack_expect *
@@ -432,7 +505,7 @@ find_expectation(const struct nf_conntrack_tuple *tuple)
 }
 
 /* delete all expectations for this conntrack */
-static void remove_expectations(struct nf_conn *ct)
+void nf_ct_remove_expectations(struct nf_conn *ct)
 {
 	struct nf_conntrack_expect *i, *tmp;
 
@@ -462,7 +535,7 @@ clean_from_lists(struct nf_conn *ct)
 	LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all pending expectations */
-	remove_expectations(ct);
+	nf_ct_remove_expectations(ct);
 }
 
 static void
@@ -482,12 +555,11 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to nf_conntrack_lock!!! -HW */
-	l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
+	l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
 	if (l3proto && l3proto->destroy)
 		l3proto->destroy(ct);
 
-	proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
-				 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+	proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
@@ -499,7 +571,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	 * except TFTP can create an expectation on the first packet,
 	 * before connection is in the list, so we need to clean here,
 	 * too. */
-	remove_expectations(ct);
+	nf_ct_remove_expectations(ct);
 
 	/* We overload first tuple to link into unconfirmed list. */
 	if (!nf_ct_is_confirmed(ct)) {
@@ -540,7 +612,7 @@ conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
 		&& nf_ct_tuple_equal(tuple, &i->tuple);
 }
 
-static struct nf_conntrack_tuple_hash *
+struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack)
 {
@@ -575,6 +647,29 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
 	return h;
 }
 
+static void __nf_conntrack_hash_insert(struct nf_conn *ct,
+				       unsigned int hash,
+				       unsigned int repl_hash) 
+{
+	ct->id = ++nf_conntrack_next_id;
+	list_prepend(&nf_conntrack_hash[hash],
+		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_prepend(&nf_conntrack_hash[repl_hash],
+		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+}
+
+void nf_conntrack_hash_insert(struct nf_conn *ct)
+{
+	unsigned int hash, repl_hash;
+
+	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	write_lock_bh(&nf_conntrack_lock);
+	__nf_conntrack_hash_insert(ct, hash, repl_hash);
+	write_unlock_bh(&nf_conntrack_lock);
+}
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff **pskb)
@@ -621,10 +716,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 		/* Remove from unconfirmed list */
 		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
 
-		list_prepend(&nf_conntrack_hash[hash],
-			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-		list_prepend(&nf_conntrack_hash[repl_hash],
-			     &ct->tuplehash[IP_CT_DIR_REPLY]);
+		__nf_conntrack_hash_insert(ct, hash, repl_hash);
 		/* Timer relative to confirmation time, not original
 		   setting time, otherwise we'd get timer wrap in
 		   weird delay cases. */
@@ -708,13 +800,41 @@ static inline int helper_cmp(const struct nf_conntrack_helper *i,
 }
 
 static struct nf_conntrack_helper *
-nf_ct_find_helper(const struct nf_conntrack_tuple *tuple)
+__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	return LIST_FIND(&helpers, helper_cmp,
 			 struct nf_conntrack_helper *,
 			 tuple);
 }
 
+struct nf_conntrack_helper *
+nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_helper *helper;
+
+	/* need nf_conntrack_lock to assure that helper exists until
+	 * try_module_get() is called */
+	read_lock_bh(&nf_conntrack_lock);
+
+	helper = __nf_ct_helper_find(tuple);
+	if (helper) {
+		/* need to increase module usage count to assure helper will
+		 * not go away while the caller is e.g. busy putting a
+		 * conntrack in the hash that uses the helper */
+		if (!try_module_get(helper->me))
+			helper = NULL;
+	}
+
+	read_unlock_bh(&nf_conntrack_lock);
+
+	return helper;
+}
+
+void nf_ct_helper_put(struct nf_conntrack_helper *helper)
+{
+	module_put(helper->me);
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		     const struct nf_conntrack_tuple *repl,
@@ -744,7 +864,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	/*  find features needed by this conntrack. */
 	features = l3proto->get_features(orig);
 	read_lock_bh(&nf_conntrack_lock);
-	if (nf_ct_find_helper(repl) != NULL)
+	if (__nf_ct_helper_find(repl) != NULL)
 		features |= NF_CT_F_HELP;
 	read_unlock_bh(&nf_conntrack_lock);
 
@@ -794,7 +914,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 {
 	struct nf_conntrack_l3proto *l3proto;
 
-	l3proto = nf_ct_find_l3proto(orig->src.l3num);
+	l3proto = __nf_ct_l3proto_find(orig->src.l3num);
 	return __nf_conntrack_alloc(orig, repl, l3proto);
 }
 
@@ -853,7 +973,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		nf_conntrack_get(&conntrack->master->ct_general);
 		NF_CT_STAT_INC(expect_new);
 	} else {
-		conntrack->helper = nf_ct_find_helper(&repl_tuple);
+		conntrack->helper = __nf_ct_helper_find(&repl_tuple);
 
 		NF_CT_STAT_INC(new);
         }
@@ -947,13 +1067,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 		return NF_ACCEPT;
 	}
 
-	l3proto = nf_ct_find_l3proto((u_int16_t)pf);
+	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
 	if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
 		DEBUGP("not prepared to track yet or error occured\n");
 		return -ret;
 	}
 
-	proto = nf_ct_find_proto((u_int16_t)pf, protonum);
+	proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
 
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
@@ -1002,9 +1122,9 @@ int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 			 const struct nf_conntrack_tuple *orig)
 {
 	return nf_ct_invert_tuple(inverse, orig,
-				  nf_ct_find_l3proto(orig->src.l3num),
-				  nf_ct_find_proto(orig->src.l3num,
-						   orig->dst.protonum));
+				  __nf_ct_l3proto_find(orig->src.l3num),
+				  __nf_ct_proto_find(orig->src.l3num,
+						     orig->dst.protonum));
 }
 
 /* Would two expected things clash? */
@@ -1096,6 +1216,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
 	exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
 	add_timer(&exp->timeout);
 
+	exp->id = ++nf_conntrack_expect_next_id;
 	atomic_inc(&exp->use);
 	NF_CT_STAT_INC(expect_create);
 }
@@ -1129,6 +1250,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 {
 	struct nf_conntrack_expect *i;
+	struct nf_conn *master = expect->master;
 	int ret;
 
 	DEBUGP("nf_conntrack_expect_related %p\n", related_to);
@@ -1149,9 +1271,9 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 		}
 	}
 	/* Will be over limit? */
-	if (expect->master->helper->max_expected && 
-	    expect->master->expecting >= expect->master->helper->max_expected)
-		evict_oldest_expect(expect->master);
+	if (master->helper->max_expected && 
+	    master->expecting >= master->helper->max_expected)
+		evict_oldest_expect(master);
 
 	nf_conntrack_expect_insert(expect);
 	nf_conntrack_expect_event(IPEXP_NEW, expect);
@@ -1175,7 +1297,7 @@ void nf_conntrack_alter_reply(struct nf_conn *conntrack,
 
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
 	if (!conntrack->master && conntrack->expecting == 0)
-		conntrack->helper = nf_ct_find_helper(newreply);
+		conntrack->helper = __nf_ct_helper_find(newreply);
 	write_unlock_bh(&nf_conntrack_lock);
 }
 
@@ -1200,6 +1322,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 	return 0;
 }
 
+struct nf_conntrack_helper *
+__nf_conntrack_helper_find_byname(const char *name)
+{
+	struct nf_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (!strcmp(h->name, name))
+			return h;
+	}
+
+	return NULL;
+}
+
 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 			 const struct nf_conntrack_helper *me)
 {
@@ -1283,6 +1418,51 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 		nf_conntrack_event_cache(event, skb);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
+ * in ip_conntrack_core, since we don't want the protocols to autoload
+ * or depend on ctnetlink */
+int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+			       const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+		&tuple->src.u.tcp.port);
+	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+		&tuple->dst.u.tcp.port);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_SRC_PORT-1]  = sizeof(u_int16_t),
+	[CTA_PROTO_DST_PORT-1]  = sizeof(u_int16_t)
+};
+
+int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+			       struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	t->src.u.tcp.port =
+		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+	t->dst.u.tcp.port =
+		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+
+	return 0;
+}
+#endif
+
 /* Used by ipt_REJECT and ip6t_REJECT. */
 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 {
@@ -1365,6 +1545,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
 			   get_order(sizeof(struct list_head) * size));
 }
 
+void nf_conntrack_flush()
+{
+	nf_ct_iterate_cleanup(kill_all, NULL);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void nf_conntrack_cleanup(void)
@@ -1378,11 +1563,14 @@ void nf_conntrack_cleanup(void)
 
 	nf_ct_event_cache_flush();
  i_see_dead_people:
-	nf_ct_iterate_cleanup(kill_all, NULL);
+	nf_conntrack_flush();
 	if (atomic_read(&nf_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
+	/* wait until all references to nf_conntrack_untracked are dropped */
+	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+		schedule();
 
 	for (i = 0; i < NF_CT_F_NUM; i++) {
 		if (nf_ct_cache[i].use == 0)
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 65080e269f2..ab0c920f0d3 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -44,7 +44,7 @@ static unsigned int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 
 static int loose;
-module_param(loose, int, 0600);
+module_param(loose, bool, 0600);
 
 unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
 				enum ip_conntrack_info ctinfo,
@@ -545,11 +545,11 @@ static int help(struct sk_buff **pskb,
                    different IP address.  Simply don't record it for
                    NAT. */
 		if (cmd.l3num == PF_INET) {
-                	DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
+                	DEBUGP("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT " != " NIPQUAD_FMT "\n",
 			       NIPQUAD(cmd.u3.ip),
 			       NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
 		} else {
-			DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n",
+			DEBUGP("conntrack_ftp: NOT RECORDING: " NIP6_FMT " != " NIP6_FMT "\n",
 			       NIP6(*((struct in6_addr *)cmd.u3.ip6)),
 			       NIP6(*((struct in6_addr *)ct->tuplehash[dir]
 							.tuple.src.u3.ip6)));
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
new file mode 100644
index 00000000000..73ab16bc7d4
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -0,0 +1,1653 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * I've reworked this stuff to use attributes instead of conntrack 
+ * structures. 5.44 am. I need more tea. --pablo 05/07/11.
+ *
+ * Initial connection tracking via netlink development funded and 
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+MODULE_LICENSE("GPL");
+
+static char __initdata version[] = "0.92";
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+
+static inline int
+ctnetlink_dump_tuples_proto(struct sk_buff *skb, 
+			    const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_protocol *proto;
+	int ret = 0;
+
+	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+
+	/* If no protocol helper is found, this function will return the
+	 * generic protocol helper, so proto won't *ever* be NULL */
+	proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+	if (likely(proto->tuple_to_nfattr))
+		ret = proto->tuple_to_nfattr(skb, tuple);
+	
+	nf_ct_proto_put(proto);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_tuples(struct sk_buff *skb, 
+		      const struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *nest_parms;
+	struct nf_conntrack_l3proto *l3proto;
+	int ret = 0;
+	
+	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
+	if (likely(l3proto->tuple_to_nfattr))
+		ret = l3proto->tuple_to_nfattr(skb, tuple);
+	NFA_NEST_END(skb, nest_parms);
+
+	nf_ct_l3proto_put(l3proto);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
+	ret = ctnetlink_dump_tuples_proto(skb, tuple);
+	NFA_NEST_END(skb, nest_parms);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t status = htonl((u_int32_t) ct->status);
+	NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	long timeout_l = ct->timeout.expires - jiffies;
+	u_int32_t timeout;
+
+	if (timeout_l < 0)
+		timeout = 0;
+	else
+		timeout = htonl(timeout_l / HZ);
+	
+	NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+	struct nfattr *nest_proto;
+	int ret;
+
+	if (!proto->to_nfattr) {
+		nf_ct_proto_put(proto);
+		return 0;
+	}
+	
+	nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
+
+	ret = proto->to_nfattr(skb, nest_proto, ct);
+
+	nf_ct_proto_put(proto);
+
+	NFA_NEST_END(skb, nest_proto);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nfattr *nest_helper;
+
+	if (!ct->helper)
+		return 0;
+		
+	nest_helper = NFA_NEST(skb, CTA_HELP);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
+
+	if (ct->helper->to_nfattr)
+		ct->helper->to_nfattr(skb, ct);
+
+	NFA_NEST_END(skb, nest_helper);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static inline int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
+			enum ip_conntrack_dir dir)
+{
+	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+	struct nfattr *nest_count = NFA_NEST(skb, type);
+	u_int32_t tmp;
+
+	tmp = htonl(ct->counters[dir].packets);
+	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
+
+	tmp = htonl(ct->counters[dir].bytes);
+	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+
+	NFA_NEST_END(skb, nest_count);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_counters(a, b, c) (0)
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+static inline int
+ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t mark = htonl(ct->mark);
+
+	NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b) (0)
+#endif
+
+static inline int
+ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t id = htonl(ct->id);
+	NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
+	
+	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+		    int event, int nowait, 
+		    const struct nf_conn *ct)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nfattr *nest_parms;
+	unsigned char *b;
+
+	b = skb->tail;
+
+	event |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+	nfmsg  = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+	nfmsg->nfgen_family = 
+		ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	nfmsg->version      = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+
+	if (ctnetlink_dump_status(skb, ct) < 0 ||
+	    ctnetlink_dump_timeout(skb, ct) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_mark(skb, ct) < 0 ||
+	    ctnetlink_dump_id(skb, ct) < 0 ||
+	    ctnetlink_dump_use(skb, ct) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+                                     unsigned long events, void *ptr)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nfattr *nest_parms;
+	struct nf_conn *ct = (struct nf_conn *)ptr;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned char *b;
+	unsigned int flags = 0, group;
+
+	/* ignore our fake conntrack entry */
+	if (ct == &nf_conntrack_untracked)
+		return NOTIFY_DONE;
+
+	if (events & IPCT_DESTROY) {
+		type = IPCTNL_MSG_CT_DELETE;
+		group = NFNLGRP_CONNTRACK_DESTROY;
+	} else  if (events & (IPCT_NEW | IPCT_RELATED)) {
+		type = IPCTNL_MSG_CT_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+		/* dump everything */
+		events = ~0UL;
+		group = NFNLGRP_CONNTRACK_NEW;
+	} else  if (events & (IPCT_STATUS |
+		      IPCT_PROTOINFO |
+		      IPCT_HELPER |
+		      IPCT_HELPINFO |
+		      IPCT_NATINFO)) {
+		type = IPCTNL_MSG_CT_NEW;
+		group = NFNLGRP_CONNTRACK_UPDATE;
+	} else
+		return NOTIFY_DONE;
+	
+  /* FIXME: Check if there are any listeners before, don't hurt performance */
+	
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return NOTIFY_DONE;
+
+	b = skb->tail;
+
+	type |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nfmsg = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = flags;
+	nfmsg->nfgen_family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	nfmsg->version	= NFNETLINK_V0;
+	nfmsg->res_id	= 0;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	/* NAT stuff is now a status flag */
+	if ((events & IPCT_STATUS || events & IPCT_NATINFO)
+	    && ctnetlink_dump_status(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_REFRESH
+	    && ctnetlink_dump_timeout(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_PROTOINFO
+	    && ctnetlink_dump_protoinfo(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_HELPINFO
+	    && ctnetlink_dump_helpinfo(skb, ct) < 0)
+		goto nfattr_failure;
+
+	if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nfnetlink_send(skb, 0, group, 0);
+	return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+	kfree_skb(skb);
+	return NOTIFY_DONE;
+}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+	DEBUGP("entered %s\n", __FUNCTION__);
+	return 0;
+}
+
+#define L3PROTO(ct) ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
+
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conn *ct = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[1];
+	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, 
+			cb->args[0], *id);
+
+	read_lock_bh(&nf_conntrack_lock);
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
+		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
+			h = (struct nf_conntrack_tuple_hash *) i;
+			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			/* Dump entries of a given L3 protocol number.
+			 * If it is not specified, ie. l3proto == 0,
+			 * then dump everything. */
+			if (l3proto && L3PROTO(ct) != l3proto)
+				continue;
+			if (ct->id <= *id)
+				continue;
+			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		                        	cb->nlh->nlmsg_seq,
+						IPCTNL_MSG_CT_NEW,
+						1, ct) < 0)
+				goto out;
+			*id = ct->id;
+		}
+	}
+out:	
+	read_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+	return skb->len;
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static int
+ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conn *ct = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[1];
+	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;	
+
+	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
+			cb->args[0], *id);
+
+	write_lock_bh(&nf_conntrack_lock);
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
+		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
+			h = (struct nf_conntrack_tuple_hash *) i;
+			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (l3proto && L3PROTO(ct) != l3proto)
+				continue;
+			if (ct->id <= *id)
+				continue;
+			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		                        	cb->nlh->nlmsg_seq,
+						IPCTNL_MSG_CT_NEW,
+						1, ct) < 0)
+				goto out;
+			*id = ct->id;
+
+			memset(&ct->counters, 0, sizeof(ct->counters));
+		}
+	}
+out:	
+	write_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+	return skb->len;
+}
+#endif
+
+static inline int
+ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *tb[CTA_IP_MAX];
+	struct nf_conntrack_l3proto *l3proto;
+	int ret = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_IP_MAX, attr);
+
+	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+
+	if (likely(l3proto->nfattr_to_tuple))
+		ret = l3proto->nfattr_to_tuple(tb, tuple);
+
+	nf_ct_l3proto_put(l3proto);
+
+	DEBUGP("leaving\n");
+
+	return ret;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
+};
+
+static inline int
+ctnetlink_parse_tuple_proto(struct nfattr *attr, 
+			    struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *tb[CTA_PROTO_MAX];
+	struct nf_conntrack_protocol *proto;
+	int ret = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	if (!tb[CTA_PROTO_NUM-1])
+		return -EINVAL;
+	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+
+	proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+
+	if (likely(proto->nfattr_to_tuple))
+		ret = proto->nfattr_to_tuple(tb, tuple);
+
+	nf_ct_proto_put(proto);
+	
+	return ret;
+}
+
+static inline int
+ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple,
+		      enum ctattr_tuple type, u_int8_t l3num)
+{
+	struct nfattr *tb[CTA_TUPLE_MAX];
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	memset(tuple, 0, sizeof(*tuple));
+
+	nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
+
+	if (!tb[CTA_TUPLE_IP-1])
+		return -EINVAL;
+
+	tuple->src.l3num = l3num;
+
+	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_TUPLE_PROTO-1])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
+	if (err < 0)
+		return err;
+
+	/* orig and expect tuples get DIR_ORIGINAL */
+	if (type == CTA_TUPLE_REPLY)
+		tuple->dst.dir = IP_CT_DIR_REPLY;
+	else
+		tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+	NF_CT_DUMP_TUPLE(tuple);
+
+	DEBUGP("leaving\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
+	[CTA_PROTONAT_PORT_MIN-1]       = sizeof(u_int16_t),
+	[CTA_PROTONAT_PORT_MAX-1]       = sizeof(u_int16_t),
+};
+
+static int ctnetlink_parse_nat_proto(struct nfattr *attr,
+				     const struct nf_conn *ct,
+				     struct ip_nat_range *range)
+{
+	struct nfattr *tb[CTA_PROTONAT_MAX];
+	struct ip_nat_protocol *npt;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
+		return -EINVAL;
+
+	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+
+	if (!npt->nfattr_to_range) {
+		ip_nat_proto_put(npt);
+		return 0;
+	}
+
+	/* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
+	if (npt->nfattr_to_range(tb, range) > 0)
+		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+
+	ip_nat_proto_put(npt);
+
+	DEBUGP("leaving\n");
+	return 0;
+}
+
+static const size_t cta_min_nat[CTA_NAT_MAX] = {
+	[CTA_NAT_MINIP-1]       = sizeof(u_int32_t),
+	[CTA_NAT_MAXIP-1]       = sizeof(u_int32_t),
+};
+
+static inline int
+ctnetlink_parse_nat(struct nfattr *cda[],
+		    const struct nf_conn *ct, struct ip_nat_range *range)
+{
+	struct nfattr *tb[CTA_NAT_MAX];
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	memset(range, 0, sizeof(*range));
+	
+	nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
+
+	if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
+		return -EINVAL;
+
+	if (tb[CTA_NAT_MINIP-1])
+		range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+
+	if (!tb[CTA_NAT_MAXIP-1])
+		range->max_ip = range->min_ip;
+	else
+		range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+
+	if (range->min_ip)
+		range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+	if (!tb[CTA_NAT_PROTO-1])
+		return 0;
+
+	err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
+	if (err < 0)
+		return err;
+
+	DEBUGP("leaving\n");
+	return 0;
+}
+#endif
+
+static inline int
+ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
+{
+	struct nfattr *tb[CTA_HELP_MAX];
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
+
+	if (!tb[CTA_HELP_NAME-1])
+		return -EINVAL;
+
+	*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
+
+	return 0;
+}
+
+static const size_t cta_min[CTA_MAX] = {
+	[CTA_STATUS-1] 		= sizeof(u_int32_t),
+	[CTA_TIMEOUT-1] 	= sizeof(u_int32_t),
+	[CTA_MARK-1]		= sizeof(u_int32_t),
+	[CTA_USE-1]		= sizeof(u_int32_t),
+	[CTA_ID-1]		= sizeof(u_int32_t)
+};
+
+static int
+ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+	else {
+		/* Flush the whole table */
+		nf_conntrack_flush();
+		return 0;
+	}
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		DEBUGP("tuple not found in conntrack hash\n");
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	
+	if (cda[CTA_ID-1]) {
+		u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+		if (ct->id != id) {
+			nf_ct_put(ct);
+			return -ENOENT;
+		}
+	}	
+	if (del_timer(&ct->timeout))
+		ct->timeout.function((unsigned long)ct);
+
+	nf_ct_put(ct);
+	DEBUGP("leaving\n");
+
+	return 0;
+}
+
+static int
+ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	struct sk_buff *skb2 = NULL;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		u32 rlen;
+
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
+					IPCTNL_MSG_CT_GET_CTRZERO) {
+#ifdef CONFIG_NF_CT_ACCT
+			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+						ctnetlink_dump_table_w,
+						ctnetlink_done)) != 0)
+				return -EINVAL;
+#else
+			return -ENOTSUPP;
+#endif
+		} else {
+			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+		      		                        ctnetlink_dump_table,
+		                                	ctnetlink_done)) != 0)
+			return -EINVAL;
+		}
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return 0;
+	}
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		DEBUGP("tuple not found in conntrack hash");
+		return -ENOENT;
+	}
+	DEBUGP("tuple found\n");
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	err = -ENOMEM;
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2) {
+		nf_ct_put(ct);
+		return -ENOMEM;
+	}
+	NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+
+	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 
+				  IPCTNL_MSG_CT_NEW, 1, ct);
+	nf_ct_put(ct);
+	if (err <= 0)
+		goto free;
+
+	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	if (err < 0)
+		goto out;
+
+	DEBUGP("leaving\n");
+	return 0;
+
+free:
+	kfree_skb(skb2);
+out:
+	return err;
+}
+
+static inline int
+ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[])
+{
+	unsigned long d;
+	unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
+	d = ct->status ^ status;
+
+	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+		/* unchangeable */
+		return -EINVAL;
+	
+	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+		/* SEEN_REPLY bit can only be set */
+		return -EINVAL;
+
+	
+	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+		/* ASSURED bit can only be set */
+		return -EINVAL;
+
+	if (cda[CTA_NAT-1]) {
+#ifndef CONFIG_IP_NF_NAT_NEEDED
+		return -EINVAL;
+#else
+		unsigned int hooknum;
+		struct ip_nat_range range;
+
+		if (ctnetlink_parse_nat(cda, ct, &range) < 0)
+			return -EINVAL;
+
+		DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", 
+		       NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
+		       htons(range.min.all), htons(range.max.all));
+		
+		/* This is tricky but it works. ip_nat_setup_info needs the
+		 * hook number as parameter, so let's do the correct 
+		 * conversion and run away */
+		if (status & IPS_SRC_NAT_DONE)
+			hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
+		else if (status & IPS_DST_NAT_DONE)
+			hooknum = NF_IP_PRE_ROUTING;  /* IP_NAT_MANIP_DST */
+		else 
+			return -EINVAL; /* Missing NAT flags */
+
+		DEBUGP("NAT status: %lu\n", 
+		       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+		
+		if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			return -EEXIST;
+		ip_nat_setup_info(ct, &range, hooknum);
+
+                DEBUGP("NAT status after setup_info: %lu\n",
+                       ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+#endif
+	}
+
+	/* Be careful here, modifying NAT bits can screw up things,
+	 * so don't let users modify them directly if they don't pass
+	 * ip_nat_range. */
+	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+	return 0;
+}
+
+
+static inline int
+ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
+{
+	struct nf_conntrack_helper *helper;
+	char *helpname;
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	/* don't change helper of sibling connections */
+	if (ct->master)
+		return -EINVAL;
+
+	err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
+	if (err < 0)
+		return err;
+
+	helper = __nf_conntrack_helper_find_byname(helpname);
+	if (!helper) {
+		if (!strcmp(helpname, ""))
+			helper = NULL;
+		else
+			return -EINVAL;
+	}
+
+	if (ct->helper) {
+		if (!helper) {
+			/* we had a helper before ... */
+			nf_ct_remove_expectations(ct);
+			ct->helper = NULL;
+		} else {
+			/* need to zero data of old helper */
+			memset(&ct->help, 0, sizeof(ct->help));
+		}
+	}
+	
+	ct->helper = helper;
+
+	return 0;
+}
+
+static inline int
+ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[])
+{
+	u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+	
+	if (!del_timer(&ct->timeout))
+		return -ETIME;
+
+	ct->timeout.expires = jiffies + timeout * HZ;
+	add_timer(&ct->timeout);
+
+	return 0;
+}
+
+static inline int
+ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[])
+{
+	struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
+	struct nf_conntrack_protocol *proto;
+	u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+	u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	int err = 0;
+
+	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
+
+	proto = nf_ct_proto_find_get(l3num, npt);
+
+	if (proto->from_nfattr)
+		err = proto->from_nfattr(tb, ct);
+	nf_ct_proto_put(proto); 
+
+	return err;
+}
+
+static int
+ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[])
+{
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (cda[CTA_HELP-1]) {
+		err = ctnetlink_change_helper(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TIMEOUT-1]) {
+		err = ctnetlink_change_timeout(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_STATUS-1]) {
+		err = ctnetlink_change_status(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_PROTOINFO-1]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
+	DEBUGP("all done\n");
+	return 0;
+}
+
+static int
+ctnetlink_create_conntrack(struct nfattr *cda[], 
+			   struct nf_conntrack_tuple *otuple,
+			   struct nf_conntrack_tuple *rtuple)
+{
+	struct nf_conn *ct;
+	int err = -EINVAL;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	ct = nf_conntrack_alloc(otuple, rtuple);
+	if (ct == NULL || IS_ERR(ct))
+		return -ENOMEM;	
+
+	if (!cda[CTA_TIMEOUT-1])
+		goto err;
+	ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+
+	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+	ct->status |= IPS_CONFIRMED;
+
+	err = ctnetlink_change_status(ct, cda);
+	if (err < 0)
+		goto err;
+
+	if (cda[CTA_PROTOINFO-1]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
+	ct->helper = nf_ct_helper_find_get(rtuple);
+
+	add_timer(&ct->timeout);
+	nf_conntrack_hash_insert(ct);
+
+	if (ct->helper)
+		nf_ct_helper_put(ct->helper);
+
+	DEBUGP("conntrack with id %u inserted\n", ct->id);
+	return 0;
+
+err:	
+	nf_conntrack_free(ct);
+	return err;
+}
+
+static int 
+ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1]) {
+		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TUPLE_REPLY-1]) {
+		err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+		if (err < 0)
+			return err;
+	}
+
+	write_lock_bh(&nf_conntrack_lock);
+	if (cda[CTA_TUPLE_ORIG-1])
+		h = __nf_conntrack_find(&otuple, NULL);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		h = __nf_conntrack_find(&rtuple, NULL);
+
+	if (h == NULL) {
+		write_unlock_bh(&nf_conntrack_lock);
+		DEBUGP("no such conntrack, create new\n");
+		err = -ENOENT;
+		if (nlh->nlmsg_flags & NLM_F_CREATE)
+			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+		return err;
+	}
+	/* implicit 'else' */
+
+	/* we only allow nat config for new conntracks */
+	if (cda[CTA_NAT-1]) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* We manipulate the conntrack inside the global conntrack table lock,
+	 * so there's no need to increase the refcount */
+	DEBUGP("conntrack found\n");
+	err = -EEXIST;
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda);
+
+out_unlock:
+	write_unlock_bh(&nf_conntrack_lock);
+	return err;
+}
+
+/*********************************************************************** 
+ * EXPECT 
+ ***********************************************************************/ 
+
+static inline int
+ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+			 const struct nf_conntrack_tuple *tuple,
+			 enum ctattr_expect type)
+{
+	struct nfattr *nest_parms = NFA_NEST(skb, type);
+	
+	if (ctnetlink_dump_tuples(skb, tuple) < 0)
+		goto nfattr_failure;
+
+	NFA_NEST_END(skb, nest_parms);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}			
+
+static inline int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+                          const struct nf_conntrack_expect *exp)
+{
+	struct nf_conn *master = exp->master;
+	u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+	u_int32_t id = htonl(exp->id);
+
+	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+		goto nfattr_failure;
+	if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
+		goto nfattr_failure;
+	if (ctnetlink_exp_dump_tuple(skb,
+				 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				 CTA_EXPECT_MASTER) < 0)
+		goto nfattr_failure;
+	
+	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
+	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+
+	return 0;
+	
+nfattr_failure:
+	return -1;
+}
+
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+		    int event, 
+		    int nowait, 
+		    const struct nf_conntrack_expect *exp)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned char *b;
+
+	b = skb->tail;
+
+	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+	nfmsg  = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+	nfmsg->nfgen_family = exp->tuple.src.l3num;
+	nfmsg->version	    = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_expect_event(struct notifier_block *this,
+				  unsigned long events, void *ptr)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned char *b;
+	int flags = 0;
+
+	if (events & IPEXP_NEW) {
+		type = IPCTNL_MSG_EXP_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+	} else
+		return NOTIFY_DONE;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return NOTIFY_DONE;
+
+	b = skb->tail;
+
+	type |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nfmsg = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = flags;
+	nfmsg->nfgen_family = exp->tuple.src.l3num;
+	nfmsg->version	    = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+	return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+	kfree_skb(skb);
+	return NOTIFY_DONE;
+}
+#endif
+
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_expect *exp = NULL;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[0];
+	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+
+	read_lock_bh(&nf_conntrack_lock);
+	list_for_each_prev(i, &nf_conntrack_expect_list) {
+		exp = (struct nf_conntrack_expect *) i;
+		if (l3proto && exp->tuple.src.l3num != l3proto)
+			continue;
+		if (exp->id <= *id)
+			continue;
+		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    IPCTNL_MSG_EXP_NEW,
+					    1, exp) < 0)
+			goto out;
+		*id = exp->id;
+	}
+out:	
+	read_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last id=%llu\n", *id);
+
+	return skb->len;
+}
+
+static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
+	[CTA_EXPECT_TIMEOUT-1]          = sizeof(u_int32_t),
+	[CTA_EXPECT_ID-1]               = sizeof(u_int32_t)
+};
+
+static int
+ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct sk_buff *skb2;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		u32 rlen;
+
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+		    				ctnetlink_exp_dump_table,
+						ctnetlink_done)) != 0)
+			return -EINVAL;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return 0;
+	}
+
+	if (cda[CTA_EXPECT_MASTER-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	exp = nf_conntrack_expect_find(&tuple);
+	if (!exp)
+		return -ENOENT;
+
+	if (cda[CTA_EXPECT_ID-1]) {
+		u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+		if (exp->id != ntohl(id)) {
+			nf_conntrack_expect_put(exp);
+			return -ENOENT;
+		}
+	}	
+
+	err = -ENOMEM;
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		goto out;
+	NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+	
+	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 
+				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+				      1, exp);
+	if (err <= 0)
+		goto free;
+
+	nf_conntrack_expect_put(exp);
+
+	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+free:
+	kfree_skb(skb2);
+out:
+	nf_conntrack_expect_put(exp);
+	return err;
+}
+
+static int
+ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, 
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_expect *exp, *tmp;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_helper *h;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err;
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (cda[CTA_EXPECT_TUPLE-1]) {
+		/* delete a single expect by tuple */
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+		if (err < 0)
+			return err;
+
+		/* bump usage count to 2 */
+		exp = nf_conntrack_expect_find(&tuple);
+		if (!exp)
+			return -ENOENT;
+
+		if (cda[CTA_EXPECT_ID-1]) {
+			u_int32_t id = 
+				*(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+			if (exp->id != ntohl(id)) {
+				nf_conntrack_expect_put(exp);
+				return -ENOENT;
+			}
+		}
+
+		/* after list removal, usage count == 1 */
+		nf_conntrack_unexpect_related(exp);
+		/* have to put what we 'get' above. 
+		 * after this line usage count == 0 */
+		nf_conntrack_expect_put(exp);
+	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
+		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
+
+		/* delete all expectations for this helper */
+		write_lock_bh(&nf_conntrack_lock);
+		h = __nf_conntrack_helper_find_byname(name);
+		if (!h) {
+			write_unlock_bh(&nf_conntrack_lock);
+			return -EINVAL;
+		}
+		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
+					 list) {
+			if (exp->master->helper == h 
+			    && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_conntrack_expect_put(exp);
+			}
+		}
+		write_unlock_bh(&nf_conntrack_lock);
+	} else {
+		/* This basically means we have to flush everything*/
+		write_lock_bh(&nf_conntrack_lock);
+		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
+					 list) {
+			if (del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_conntrack_expect_put(exp);
+			}
+		}
+		write_unlock_bh(&nf_conntrack_lock);
+	}
+
+	return 0;
+}
+static int
+ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[])
+{
+	return -EOPNOTSUPP;
+}
+
+static int
+ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
+{
+	struct nf_conntrack_tuple tuple, mask, master_tuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn *ct;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	/* caller guarantees that those three CTA_EXPECT_* exist */
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+	if (err < 0)
+		return err;
+
+	/* Look for master conntrack of this expectation */
+	h = nf_conntrack_find_get(&master_tuple, NULL);
+	if (!h)
+		return -ENOENT;
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	if (!ct->helper) {
+		/* such conntrack hasn't got any helper, abort */
+		err = -EINVAL;
+		goto out;
+	}
+
+	exp = nf_conntrack_expect_alloc(ct);
+	if (!exp) {
+		err = -ENOMEM;
+		goto out;
+	}
+	
+	exp->expectfn = NULL;
+	exp->flags = 0;
+	exp->master = ct;
+	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
+	memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
+
+	err = nf_conntrack_expect_related(exp);
+	nf_conntrack_expect_put(exp);
+
+out:	
+	nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+	return err;
+}
+
+static int
+ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);	
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (!cda[CTA_EXPECT_TUPLE-1]
+	    || !cda[CTA_EXPECT_MASK-1]
+	    || !cda[CTA_EXPECT_MASTER-1])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+
+	write_lock_bh(&nf_conntrack_lock);
+	exp = __nf_conntrack_expect_find(&tuple);
+
+	if (!exp) {
+		write_unlock_bh(&nf_conntrack_lock);
+		err = -ENOENT;
+		if (nlh->nlmsg_flags & NLM_F_CREATE)
+			err = ctnetlink_create_expect(cda, u3);
+		return err;
+	}
+
+	err = -EEXIST;
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_expect(exp, cda);
+	write_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving\n");
+	
+	return err;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static struct notifier_block ctnl_notifier = {
+	.notifier_call	= ctnetlink_conntrack_event,
+};
+
+static struct notifier_block ctnl_notifier_exp = {
+	.notifier_call	= ctnetlink_expect_event,
+};
+#endif
+
+static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
+					    .attr_count = CTA_MAX, },
+};
+
+static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+};
+
+static struct nfnetlink_subsystem ctnl_subsys = {
+	.name				= "conntrack",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK,
+	.cb_count			= IPCTNL_MSG_MAX,
+	.cb				= ctnl_cb,
+};
+
+static struct nfnetlink_subsystem ctnl_exp_subsys = {
+	.name				= "conntrack_expect",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK_EXP,
+	.cb_count			= IPCTNL_MSG_EXP_MAX,
+	.cb				= ctnl_exp_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+
+static int __init ctnetlink_init(void)
+{
+	int ret;
+
+	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+	ret = nfnetlink_subsys_register(&ctnl_subsys);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+
+	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+		goto err_unreg_subsys;
+	}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	ret = nf_conntrack_register_notifier(&ctnl_notifier);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register notifier.\n");
+		goto err_unreg_exp_subsys;
+	}
+
+	ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot expect register notifier.\n");
+		goto err_unreg_notifier;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+	nf_conntrack_unregister_notifier(&ctnl_notifier);
+err_unreg_exp_subsys:
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+#endif
+err_unreg_subsys:
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+	return ret;
+}
+
+static void __exit ctnetlink_exit(void)
+{
+	printk("ctnetlink: unregistering from nfnetlink.\n");
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
+	nf_conntrack_unregister_notifier(&ctnl_notifier);
+#endif
+
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+	return;
+}
+
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 36425f6c833..46bc27e2756 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack_protocol.h>
 
-unsigned long nf_ct_generic_timeout = 600*HZ;
+unsigned int nf_ct_generic_timeout = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3a600f77b4e..cf798e61e37 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -62,15 +62,15 @@ static const char *sctp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned long nf_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned long nf_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned long nf_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned long nf_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned long nf_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned long nf_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
-
-static unsigned long * sctp_timeouts[]
+static unsigned int nf_ct_sctp_timeout_closed            =  10 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_wait       =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_echoed     =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_established       =   5 DAYS;
+static unsigned int nf_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+
+static unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
     &nf_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
     &nf_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 5a6fcf349bd..df99138c3b3 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -93,21 +93,21 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned long nf_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned long nf_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned long nf_ct_tcp_timeout_established =   5 DAYS;
-unsigned long nf_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned long nf_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned long nf_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned long nf_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned long nf_ct_tcp_timeout_close =        10 SECS;
+unsigned int nf_ct_tcp_timeout_syn_sent =      2 MINS;
+unsigned int nf_ct_tcp_timeout_syn_recv =     60 SECS;
+unsigned int nf_ct_tcp_timeout_established =   5 DAYS;
+unsigned int nf_ct_tcp_timeout_fin_wait =      2 MINS;
+unsigned int nf_ct_tcp_timeout_close_wait =   60 SECS;
+unsigned int nf_ct_tcp_timeout_last_ack =     30 SECS;
+unsigned int nf_ct_tcp_timeout_time_wait =     2 MINS;
+unsigned int nf_ct_tcp_timeout_close =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned long nf_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int nf_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static unsigned long * tcp_timeouts[]
+static unsigned int * tcp_timeouts[]
 = { NULL,                              /* TCP_CONNTRACK_NONE */
     &nf_ct_tcp_timeout_syn_sent,       /* TCP_CONNTRACK_SYN_SENT, */
     &nf_ct_tcp_timeout_syn_recv,       /* TCP_CONNTRACK_SYN_RECV, */
@@ -280,9 +280,9 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sCL -> sCL
  */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
- *	sSS -> sIV	Might be a half-open connection.
+ *	sSS -> sIG	Might be a half-open connection.
  *	sSR -> sSR	Might answer late resent SYN.
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
@@ -912,8 +912,12 @@ static int tcp_packet(struct nf_conn *conntrack,
 
 	switch (new_state) {
 	case TCP_CONNTRACK_IGNORE:
-		/* Either SYN in ORIGINAL
-		 * or SYN/ACK in REPLY. */
+		/* Ignored packets:
+		 *
+		 * a) SYN in ORIGINAL
+		 * b) SYN/ACK in REPLY
+		 * c) ACK in reply direction after initial SYN in original. 
+		 */
 		if (index == TCP_SYNACK_SET
 		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
 		    && conntrack->proto.tcp.last_dir != dir
@@ -979,16 +983,23 @@ static int tcp_packet(struct nf_conn *conntrack,
 		}
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
-		    && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+		         && conntrack->proto.tcp.last_index == TCP_SYN_SET)
+		        || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+		            && conntrack->proto.tcp.last_index == TCP_ACK_SET))
 		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
-			/* RST sent to invalid SYN we had let trough
-			 * SYN was in window then, tear down connection.
+			/* RST sent to invalid SYN or ACK we had let through
+			 * at a) and c) above:
+			 *
+			 * a) SYN was in window then
+			 * c) we hold a half-open connection.
+			 *
+			 * Delete our connection entry.
 			 * We skip window checking, because packet might ACK
-			 * segments we ignored in the SYN. */
+			 * segments we ignored. */
 			goto in_window;
 		}
-		/* Just fall trough */
+		/* Just fall through */
 	default:
 		/* Keep compilers happy. */
 		break;
@@ -1136,6 +1147,63 @@ static int tcp_new(struct nf_conn *conntrack,
 		receiver->td_scale);
 	return 1;
 }
+
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+			 const struct nf_conn *ct)
+{
+	struct nfattr *nest_parms;
+	
+	read_lock_bh(&tcp_lock);
+	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
+		&ct->proto.tcp.state);
+	read_unlock_bh(&tcp_lock);
+
+	NFA_NEST_END(skb, nest_parms);
+
+	return 0;
+
+nfattr_failure:
+	read_unlock_bh(&tcp_lock);
+	return -1;
+}
+
+static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
+	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+};
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
+{
+	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+	/* updates could not contain anything about the private
+	 * protocol info, in that case skip the parsing */
+	if (!attr)
+		return 0;
+
+        nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
+		return -EINVAL;
+
+	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+		return -EINVAL;
+
+	write_lock_bh(&tcp_lock);
+	ct->proto.tcp.state = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+	write_unlock_bh(&tcp_lock);
+
+	return 0;
+}
+#endif
   
 struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
 {
@@ -1149,6 +1217,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
 	.packet 		= tcp_packet,
 	.new 			= tcp_new,
 	.error			= tcp_error4,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
@@ -1163,6 +1238,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
 	.packet 		= tcp_packet,
 	.new 			= tcp_new,
 	.error			= tcp_error6,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3cae7ce420d..4264dd079a1 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -27,8 +27,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_protocol.h>
 
-unsigned long nf_ct_udp_timeout = 30*HZ;
-unsigned long nf_ct_udp_timeout_stream = 180*HZ;
+unsigned int nf_ct_udp_timeout = 30*HZ;
+unsigned int nf_ct_udp_timeout_stream = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
@@ -196,6 +196,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error4,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
@@ -210,6 +215,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error6,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5af381f9fe3..617599aeeea 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -161,14 +161,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (NF_CT_DIRECTION(hash))
 		return 0;
 
-	l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				     .tuple.src.l3num);
+	l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				       .tuple.src.l3num);
 
 	NF_CT_ASSERT(l3proto);
-	proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				 .tuple.src.l3num,
-				 conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				 .tuple.dst.protonum);
+	proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				   .tuple.src.l3num,
+				   conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				   .tuple.dst.protonum);
 	NF_CT_ASSERT(proto);
 
 	if (seq_printf(s, "%-8s %u %-8s %u %ld ",
@@ -307,9 +307,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    nf_ct_find_l3proto(expect->tuple.src.l3num),
-		    nf_ct_find_proto(expect->tuple.src.l3num,
-				     expect->tuple.dst.protonum));
+		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
+		    __nf_ct_proto_find(expect->tuple.src.l3num,
+				       expect->tuple.dst.protonum));
 	return seq_putc(s, '\n');
 }
 
@@ -431,25 +431,25 @@ extern int nf_conntrack_max;
 extern unsigned int nf_conntrack_htable_size;
 
 /* From nf_conntrack_proto_tcp.c */
-extern unsigned long nf_ct_tcp_timeout_syn_sent;
-extern unsigned long nf_ct_tcp_timeout_syn_recv;
-extern unsigned long nf_ct_tcp_timeout_established;
-extern unsigned long nf_ct_tcp_timeout_fin_wait;
-extern unsigned long nf_ct_tcp_timeout_close_wait;
-extern unsigned long nf_ct_tcp_timeout_last_ack;
-extern unsigned long nf_ct_tcp_timeout_time_wait;
-extern unsigned long nf_ct_tcp_timeout_close;
-extern unsigned long nf_ct_tcp_timeout_max_retrans;
+extern unsigned int nf_ct_tcp_timeout_syn_sent;
+extern unsigned int nf_ct_tcp_timeout_syn_recv;
+extern unsigned int nf_ct_tcp_timeout_established;
+extern unsigned int nf_ct_tcp_timeout_fin_wait;
+extern unsigned int nf_ct_tcp_timeout_close_wait;
+extern unsigned int nf_ct_tcp_timeout_last_ack;
+extern unsigned int nf_ct_tcp_timeout_time_wait;
+extern unsigned int nf_ct_tcp_timeout_close;
+extern unsigned int nf_ct_tcp_timeout_max_retrans;
 extern int nf_ct_tcp_loose;
 extern int nf_ct_tcp_be_liberal;
 extern int nf_ct_tcp_max_retrans;
 
 /* From nf_conntrack_proto_udp.c */
-extern unsigned long nf_ct_udp_timeout;
-extern unsigned long nf_ct_udp_timeout_stream;
+extern unsigned int nf_ct_udp_timeout;
+extern unsigned int nf_ct_udp_timeout_stream;
 
 /* From nf_conntrack_proto_generic.c */
-extern unsigned long nf_ct_generic_timeout;
+extern unsigned int nf_ct_generic_timeout;
 
 /* Log invalid packets of a given protocol */
 static int log_invalid_proto_min = 0;
@@ -821,7 +821,7 @@ module_exit(fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
    They should call this. */
-void need_nf_conntrack(void)
+void need_conntrack(void)
 {
 }
 
@@ -841,13 +841,17 @@ EXPORT_SYMBOL(nf_conntrack_protocol_unregister);
 EXPORT_SYMBOL(nf_ct_invert_tuplepr);
 EXPORT_SYMBOL(nf_conntrack_alter_reply);
 EXPORT_SYMBOL(nf_conntrack_destroyed);
-EXPORT_SYMBOL(need_nf_conntrack);
+EXPORT_SYMBOL(need_conntrack);
 EXPORT_SYMBOL(nf_conntrack_helper_register);
 EXPORT_SYMBOL(nf_conntrack_helper_unregister);
 EXPORT_SYMBOL(nf_ct_iterate_cleanup);
 EXPORT_SYMBOL(__nf_ct_refresh_acct);
 EXPORT_SYMBOL(nf_ct_protos);
-EXPORT_SYMBOL(nf_ct_find_proto);
+EXPORT_SYMBOL(__nf_ct_proto_find);
+EXPORT_SYMBOL(nf_ct_proto_find_get);
+EXPORT_SYMBOL(nf_ct_proto_put);
+EXPORT_SYMBOL(nf_ct_l3proto_find_get);
+EXPORT_SYMBOL(nf_ct_l3proto_put);
 EXPORT_SYMBOL(nf_ct_l3protos);
 EXPORT_SYMBOL(nf_conntrack_expect_alloc);
 EXPORT_SYMBOL(nf_conntrack_expect_put);
@@ -867,3 +871,21 @@ EXPORT_SYMBOL(nf_ct_get_tuple);
 EXPORT_SYMBOL(nf_ct_invert_tuple);
 EXPORT_SYMBOL(nf_conntrack_in);
 EXPORT_SYMBOL(__nf_conntrack_attach);
+EXPORT_SYMBOL(nf_conntrack_alloc);
+EXPORT_SYMBOL(nf_conntrack_free);
+EXPORT_SYMBOL(nf_conntrack_flush);
+EXPORT_SYMBOL(nf_ct_remove_expectations);
+EXPORT_SYMBOL(nf_ct_helper_find_get);
+EXPORT_SYMBOL(nf_ct_helper_put);
+EXPORT_SYMBOL(__nf_conntrack_helper_find_byname);
+EXPORT_SYMBOL(__nf_conntrack_find);
+EXPORT_SYMBOL(nf_ct_unlink_expect);
+EXPORT_SYMBOL(nf_conntrack_hash_insert);
+EXPORT_SYMBOL(__nf_conntrack_expect_find);
+EXPORT_SYMBOL(nf_conntrack_expect_find);
+EXPORT_SYMBOL(nf_conntrack_expect_list);
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr);
+EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple);
+#endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a60c59b9763..f6063e8f005 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -162,7 +162,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 		return -EINVAL;
 	}
 
-	min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg));
+	min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
 	if (unlikely(nlh->nlmsg_len < min_len))
 		return -EINVAL;
 
@@ -212,7 +212,7 @@ int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
 }
 
 /* Process one complete nfnetlink message. */
-static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
+static int nfnetlink_rcv_msg(struct sk_buff *skb,
 				    struct nlmsghdr *nlh, int *errp)
 {
 	struct nfnl_callback *nc;
@@ -236,8 +236,7 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
 	}
 
 	/* All the messages must at least contain nfgenmsg */
-	if (nlh->nlmsg_len < 
-			NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) {
+	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
 		DEBUGP("received message was too short\n");
 		return 0;
 	}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cba63729313..e10512e229b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -151,7 +151,7 @@ instance_create(u_int16_t group_num, int pid)
 		goto out_unlock;
 
 	INIT_HLIST_NODE(&inst->hlist);
-	inst->lock = SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&inst->lock);
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f28460b61e4..18ed9c5d209 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -148,7 +148,7 @@ instance_create(u_int16_t queue_num, int pid)
 	atomic_set(&inst->id_sequence, 0);
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
-	inst->lock = SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
 
 	if (!try_module_get(THIS_MODULE))
@@ -345,6 +345,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct nfqnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
+	struct nf_info *entinf = entry->info;
+	struct sk_buff *entskb = entry->skb;
+	struct net_device *indev;
+	struct net_device *outdev;
 	unsigned int tmp_uint;
 
 	QDEBUG("entered\n");
@@ -361,6 +365,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		+ NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw))
 		+ NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp));
 
+	outdev = entinf->outdev;
+
 	spin_lock_bh(&queue->lock);
 	
 	switch (queue->copy_mode) {
@@ -370,15 +376,15 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 	
 	case NFQNL_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if (entskb->ip_summed == CHECKSUM_HW &&
+		    (*errp = skb_checksum_help(entskb,
+		                               outdev == NULL))) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
 		}
 		if (queue->copy_range == 0 
-		    || queue->copy_range > entry->skb->len)
-			data_len = entry->skb->len;
+		    || queue->copy_range > entskb->len)
+			data_len = entskb->len;
 		else
 			data_len = queue->copy_range;
 		
@@ -402,29 +408,30 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
 	nfmsg = NLMSG_DATA(nlh);
-	nfmsg->nfgen_family = entry->info->pf;
+	nfmsg->nfgen_family = entinf->pf;
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
 	pmsg.packet_id 		= htonl(entry->id);
-	pmsg.hw_protocol	= htons(entry->skb->protocol);
-	pmsg.hook		= entry->info->hook;
+	pmsg.hw_protocol	= htons(entskb->protocol);
+	pmsg.hook		= entinf->hook;
 
 	NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
 
-	if (entry->info->indev) {
-		tmp_uint = htonl(entry->info->indev->ifindex);
+	indev = entinf->indev;
+	if (indev) {
+		tmp_uint = htonl(indev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
 		NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
 #else
-		if (entry->info->pf == PF_BRIDGE) {
+		if (entinf->pf == PF_BRIDGE) {
 			/* Case 1: indev is physical input device, we need to
 			 * look for bridge group (when called from 
 			 * netfilter_bridge) */
 			NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 
 				&tmp_uint);
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex);
+			tmp_uint = htonl(indev->br_port->br->dev->ifindex);
 			NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
 				&tmp_uint);
 		} else {
@@ -432,9 +439,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			 * physical device (when called from ipv4) */
 			NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
 				&tmp_uint);
-			if (entry->skb->nf_bridge
-			    && entry->skb->nf_bridge->physindev) {
-				tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex);
+			if (entskb->nf_bridge
+			    && entskb->nf_bridge->physindev) {
+				tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex);
 				NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
 					sizeof(tmp_uint), &tmp_uint);
 			}
@@ -442,19 +449,19 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 	}
 
-	if (entry->info->outdev) {
-		tmp_uint = htonl(entry->info->outdev->ifindex);
+	if (outdev) {
+		tmp_uint = htonl(outdev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
 		NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
 #else
-		if (entry->info->pf == PF_BRIDGE) {
+		if (entinf->pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical output device, we need to
 			 * look for bridge group (when called from 
 			 * netfilter_bridge) */
 			NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
 				&tmp_uint);
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex);
+			tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
 			NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
 				&tmp_uint);
 		} else {
@@ -462,9 +469,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			 * physical output device (when called from ipv4) */
 			NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
 				&tmp_uint);
-			if (entry->skb->nf_bridge
-			    && entry->skb->nf_bridge->physoutdev) {
-				tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex);
+			if (entskb->nf_bridge
+			    && entskb->nf_bridge->physoutdev) {
+				tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex);
 				NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
 					sizeof(tmp_uint), &tmp_uint);
 			}
@@ -472,27 +479,27 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 	}
 
-	if (entry->skb->nfmark) {
-		tmp_uint = htonl(entry->skb->nfmark);
+	if (entskb->nfmark) {
+		tmp_uint = htonl(entskb->nfmark);
 		NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
 	}
 
-	if (entry->info->indev && entry->skb->dev
-	    && entry->skb->dev->hard_header_parse) {
+	if (indev && entskb->dev
+	    && entskb->dev->hard_header_parse) {
 		struct nfqnl_msg_packet_hw phw;
 
 		phw.hw_addrlen =
-			entry->skb->dev->hard_header_parse(entry->skb,
+			entskb->dev->hard_header_parse(entskb,
 			                                   phw.hw_addr);
 		phw.hw_addrlen = htons(phw.hw_addrlen);
 		NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
 	}
 
-	if (entry->skb->tstamp.off_sec) {
+	if (entskb->tstamp.off_sec) {
 		struct nfqnl_msg_packet_timestamp ts;
 
-		ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec);
+		ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
+		ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
 
 		NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
 	}
@@ -510,7 +517,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		nfa->nfa_type = NFQA_PAYLOAD;
 		nfa->nfa_len = size;
 
-		if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len))
+		if (skb_copy_bits(entskb, 0, NFA_DATA(nfa), data_len))
 			BUG();
 	}
 		
@@ -667,12 +674,14 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
 static int
 dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
 {
-	if (entry->info->indev)
-		if (entry->info->indev->ifindex == ifindex)
+	struct nf_info *entinf = entry->info;
+	
+	if (entinf->indev)
+		if (entinf->indev->ifindex == ifindex)
 			return 1;
 			
-	if (entry->info->outdev)
-		if (entry->info->outdev->ifindex == ifindex)
+	if (entinf->outdev)
+		if (entinf->outdev->ifindex == ifindex)
 			return 1;
 
 	return 0;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
new file mode 100644
index 00000000000..d7817afc6b9
--- /dev/null
+++ b/net/netfilter/x_tables.c
@@ -0,0 +1,624 @@
+/*
+ * x_tables core - Backend for {ip,ip6,arp}_tables
+ *
+ * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
+ *
+ * Based on existing ip_tables code which is
+ *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *   Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_arp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module");
+
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+struct xt_af {
+	struct semaphore mutex;
+	struct list_head match;
+	struct list_head target;
+	struct list_head tables;
+};
+
+static struct xt_af *xt;
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+enum {
+	TABLE,
+	TARGET,
+	MATCH,
+};
+
+/* Registration hooks for targets. */
+int
+xt_register_target(int af, struct xt_target *target)
+{
+	int ret;
+
+	ret = down_interruptible(&xt[af].mutex);
+	if (ret != 0)
+		return ret;
+	list_add(&target->list, &xt[af].target);
+	up(&xt[af].mutex);
+	return ret;
+}
+EXPORT_SYMBOL(xt_register_target);
+
+void
+xt_unregister_target(int af, struct xt_target *target)
+{
+	down(&xt[af].mutex);
+	LIST_DELETE(&xt[af].target, target);
+	up(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_target);
+
+int
+xt_register_match(int af, struct xt_match *match)
+{
+	int ret;
+
+	ret = down_interruptible(&xt[af].mutex);
+	if (ret != 0)
+		return ret;
+
+	list_add(&match->list, &xt[af].match);
+	up(&xt[af].mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(xt_register_match);
+
+void
+xt_unregister_match(int af, struct xt_match *match)
+{
+	down(&xt[af].mutex);
+	LIST_DELETE(&xt[af].match, match);
+	up(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_match);
+
+
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use try_then_request_module().
+ */
+
+/* Find match, grabs ref.  Returns ERR_PTR() on error. */
+struct xt_match *xt_find_match(int af, const char *name, u8 revision)
+{
+	struct xt_match *m;
+	int err = 0;
+
+	if (down_interruptible(&xt[af].mutex) != 0)
+		return ERR_PTR(-EINTR);
+
+	list_for_each_entry(m, &xt[af].match, list) {
+		if (strcmp(m->name, name) == 0) {
+			if (m->revision == revision) {
+				if (try_module_get(m->me)) {
+					up(&xt[af].mutex);
+					return m;
+				}
+			} else
+				err = -EPROTOTYPE; /* Found something. */
+		}
+	}
+	up(&xt[af].mutex);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(xt_find_match);
+
+/* Find target, grabs ref.  Returns ERR_PTR() on error. */
+struct xt_target *xt_find_target(int af, const char *name, u8 revision)
+{
+	struct xt_target *t;
+	int err = 0;
+
+	if (down_interruptible(&xt[af].mutex) != 0)
+		return ERR_PTR(-EINTR);
+
+	list_for_each_entry(t, &xt[af].target, list) {
+		if (strcmp(t->name, name) == 0) {
+			if (t->revision == revision) {
+				if (try_module_get(t->me)) {
+					up(&xt[af].mutex);
+					return t;
+				}
+			} else
+				err = -EPROTOTYPE; /* Found something. */
+		}
+	}
+	up(&xt[af].mutex);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(xt_find_target);
+
+static const char *xt_prefix[NPROTO] = {
+	[AF_INET] 	= "ipt_%s",
+	[AF_INET6] 	= "ip6t_%s",
+	[NF_ARP]	= "arpt_%s",
+};
+
+struct xt_target *xt_request_find_target(int af, const char *name, u8 revision)
+{
+	struct xt_target *target;
+
+	target = try_then_request_module(xt_find_target(af, name, revision),
+					 xt_prefix[af], name);
+	if (IS_ERR(target) || !target)
+		return NULL;
+	return target;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_target);
+
+static int match_revfn(int af, const char *name, u8 revision, int *bestp)
+{
+	struct xt_match *m;
+	int have_rev = 0;
+
+	list_for_each_entry(m, &xt[af].match, list) {
+		if (strcmp(m->name, name) == 0) {
+			if (m->revision > *bestp)
+				*bestp = m->revision;
+			if (m->revision == revision)
+				have_rev = 1;
+		}
+	}
+	return have_rev;
+}
+
+static int target_revfn(int af, const char *name, u8 revision, int *bestp)
+{
+	struct xt_target *t;
+	int have_rev = 0;
+
+	list_for_each_entry(t, &xt[af].target, list) {
+		if (strcmp(t->name, name) == 0) {
+			if (t->revision > *bestp)
+				*bestp = t->revision;
+			if (t->revision == revision)
+				have_rev = 1;
+		}
+	}
+	return have_rev;
+}
+
+/* Returns true or false (if no such extension at all) */
+int xt_find_revision(int af, const char *name, u8 revision, int target,
+		     int *err)
+{
+	int have_rev, best = -1;
+
+	if (down_interruptible(&xt[af].mutex) != 0) {
+		*err = -EINTR;
+		return 1;
+	}
+	if (target == 1)
+		have_rev = target_revfn(af, name, revision, &best);
+	else
+		have_rev = match_revfn(af, name, revision, &best);
+	up(&xt[af].mutex);
+
+	/* Nothing at all?  Return 0 to try loading module. */
+	if (best == -1) {
+		*err = -ENOENT;
+		return 0;
+	}
+
+	*err = best;
+	if (!have_rev)
+		*err = -EPROTONOSUPPORT;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(xt_find_revision);
+
+struct xt_table_info *xt_alloc_table_info(unsigned int size)
+{
+	struct xt_table_info *newinfo;
+	int cpu;
+
+	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+		return NULL;
+
+	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
+	if (!newinfo)
+		return NULL;
+
+	newinfo->size = size;
+
+	for_each_cpu(cpu) {
+		if (size <= PAGE_SIZE)
+			newinfo->entries[cpu] = kmalloc_node(size,
+							GFP_KERNEL,
+							cpu_to_node(cpu));
+		else
+			newinfo->entries[cpu] = vmalloc_node(size,
+							cpu_to_node(cpu));
+
+		if (newinfo->entries[cpu] == NULL) {
+			xt_free_table_info(newinfo);
+			return NULL;
+		}
+	}
+
+	return newinfo;
+}
+EXPORT_SYMBOL(xt_alloc_table_info);
+
+void xt_free_table_info(struct xt_table_info *info)
+{
+	int cpu;
+
+	for_each_cpu(cpu) {
+		if (info->size <= PAGE_SIZE)
+			kfree(info->entries[cpu]);
+		else
+			vfree(info->entries[cpu]);
+	}
+	kfree(info);
+}
+EXPORT_SYMBOL(xt_free_table_info);
+
+/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
+struct xt_table *xt_find_table_lock(int af, const char *name)
+{
+	struct xt_table *t;
+
+	if (down_interruptible(&xt[af].mutex) != 0)
+		return ERR_PTR(-EINTR);
+
+	list_for_each_entry(t, &xt[af].tables, list)
+		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+			return t;
+	up(&xt[af].mutex);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xt_find_table_lock);
+
+void xt_table_unlock(struct xt_table *table)
+{
+	up(&xt[table->af].mutex);
+}
+EXPORT_SYMBOL_GPL(xt_table_unlock);
+
+
+struct xt_table_info *
+xt_replace_table(struct xt_table *table,
+	      unsigned int num_counters,
+	      struct xt_table_info *newinfo,
+	      int *error)
+{
+	struct xt_table_info *oldinfo, *private;
+
+	/* Do the substitution. */
+	write_lock_bh(&table->lock);
+	private = table->private;
+	/* Check inside lock: is the old number correct? */
+	if (num_counters != private->number) {
+		duprintf("num_counters != table->private->number (%u/%u)\n",
+			 num_counters, private->number);
+		write_unlock_bh(&table->lock);
+		*error = -EAGAIN;
+		return NULL;
+	}
+	oldinfo = private;
+	table->private = newinfo;
+	newinfo->initial_entries = oldinfo->initial_entries;
+	write_unlock_bh(&table->lock);
+
+	return oldinfo;
+}
+EXPORT_SYMBOL_GPL(xt_replace_table);
+
+int xt_register_table(struct xt_table *table,
+		      struct xt_table_info *bootstrap,
+		      struct xt_table_info *newinfo)
+{
+	int ret;
+	struct xt_table_info *private;
+
+	ret = down_interruptible(&xt[table->af].mutex);
+	if (ret != 0)
+		return ret;
+
+	/* Don't autoload: we'd eat our tail... */
+	if (list_named_find(&xt[table->af].tables, table->name)) {
+		ret = -EEXIST;
+		goto unlock;
+	}
+
+	/* Simplifies replace_table code. */
+	table->private = bootstrap;
+	if (!xt_replace_table(table, 0, newinfo, &ret))
+		goto unlock;
+
+	private = table->private;
+	duprintf("table->private->number = %u\n", private->number);
+
+	/* save number of initial entries */
+	private->initial_entries = private->number;
+
+	rwlock_init(&table->lock);
+	list_prepend(&xt[table->af].tables, table);
+
+	ret = 0;
+ unlock:
+	up(&xt[table->af].mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xt_register_table);
+
+void *xt_unregister_table(struct xt_table *table)
+{
+	struct xt_table_info *private;
+
+	down(&xt[table->af].mutex);
+	private = table->private;
+	LIST_DELETE(&xt[table->af].tables, table);
+	up(&xt[table->af].mutex);
+
+	return private;
+}
+EXPORT_SYMBOL_GPL(xt_unregister_table);
+
+#ifdef CONFIG_PROC_FS
+static char *xt_proto_prefix[NPROTO] = {
+	[AF_INET]	= "ip",
+	[AF_INET6]	= "ip6",
+	[NF_ARP]	= "arp",
+};
+
+static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos)
+{
+	struct list_head *head = list->next;
+
+	if (!head || list_empty(list))
+		return NULL;
+
+	while (pos && (head = head->next)) {
+		if (head == list)
+			return NULL;
+		pos--;
+	}
+	return pos ? NULL : head;
+}
+
+static struct list_head *type2list(u_int16_t af, u_int16_t type)
+{
+	struct list_head *list;
+
+	switch (type) {
+	case TARGET:
+		list = &xt[af].target;
+		break;
+	case MATCH:
+		list = &xt[af].match;
+		break;
+	case TABLE:
+		list = &xt[af].tables;
+		break;
+	default:
+		list = NULL;
+		break;
+	}
+
+	return list;
+}
+
+static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private;
+	u_int16_t af = (unsigned long)pde->data & 0xffff;
+	u_int16_t type = (unsigned long)pde->data >> 16;
+	struct list_head *list;
+
+	if (af >= NPROTO)
+		return NULL;
+
+	list = type2list(af, type);
+	if (!list)
+		return NULL;
+
+	if (down_interruptible(&xt[af].mutex) != 0)
+		return NULL;
+	
+	return xt_get_idx(list, seq, *pos);
+}
+
+static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct proc_dir_entry *pde = seq->private;
+	u_int16_t af = (unsigned long)pde->data & 0xffff;
+	u_int16_t type = (unsigned long)pde->data >> 16;
+	struct list_head *list;
+
+	if (af >= NPROTO)
+		return NULL;
+	
+	list = type2list(af, type);
+	if (!list)
+		return NULL;
+
+	(*pos)++;
+	return xt_get_idx(list, seq, *pos);
+}
+
+static void xt_tgt_seq_stop(struct seq_file *seq, void *v)
+{
+	struct proc_dir_entry *pde = seq->private;
+	u_int16_t af = (unsigned long)pde->data & 0xffff;
+
+	up(&xt[af].mutex);
+}
+
+static int xt_name_seq_show(struct seq_file *seq, void *v)
+{
+	char *name = (char *)v + sizeof(struct list_head);
+
+	if (strlen(name))
+		return seq_printf(seq, "%s\n", name);
+	else
+		return 0;
+}
+
+static struct seq_operations xt_tgt_seq_ops = {
+	.start	= xt_tgt_seq_start,
+	.next	= xt_tgt_seq_next,
+	.stop	= xt_tgt_seq_stop,
+	.show	= xt_name_seq_show,
+};
+
+static int xt_tgt_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	ret = seq_open(file, &xt_tgt_seq_ops);
+	if (!ret) {
+		struct seq_file *seq = file->private_data;
+		struct proc_dir_entry *pde = PDE(inode);
+
+		seq->private = pde;
+	}
+
+	return ret;
+}
+
+static struct file_operations xt_file_ops = {
+	.owner	 = THIS_MODULE,
+	.open	 = xt_tgt_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+#define FORMAT_TABLES	"_tables_names"
+#define	FORMAT_MATCHES	"_tables_matches"
+#define FORMAT_TARGETS 	"_tables_targets"
+
+#endif /* CONFIG_PROC_FS */
+
+int xt_proto_init(int af)
+{
+#ifdef CONFIG_PROC_FS
+	char buf[XT_FUNCTION_MAXNAMELEN];
+	struct proc_dir_entry *proc;
+#endif
+
+	if (af >= NPROTO)
+		return -EINVAL;
+
+
+#ifdef CONFIG_PROC_FS
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
+	if (!proc)
+		goto out;
+	proc->data = (void *) ((unsigned long) af | (TABLE << 16));
+
+
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
+	if (!proc)
+		goto out_remove_tables;
+	proc->data = (void *) ((unsigned long) af | (MATCH << 16));
+
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
+	if (!proc)
+		goto out_remove_matches;
+	proc->data = (void *) ((unsigned long) af | (TARGET << 16));
+#endif
+
+	return 0;
+
+#ifdef CONFIG_PROC_FS
+out_remove_matches:
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+	proc_net_remove(buf);
+
+out_remove_tables:
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+	proc_net_remove(buf);
+out:
+	return -1;
+#endif
+}
+EXPORT_SYMBOL_GPL(xt_proto_init);
+
+void xt_proto_fini(int af)
+{
+#ifdef CONFIG_PROC_FS
+	char buf[XT_FUNCTION_MAXNAMELEN];
+
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+	proc_net_remove(buf);
+
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+	proc_net_remove(buf);
+
+	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+	proc_net_remove(buf);
+#endif /*CONFIG_PROC_FS*/
+}
+EXPORT_SYMBOL_GPL(xt_proto_fini);
+
+
+static int __init xt_init(void)
+{
+	int i;
+
+	xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
+	if (!xt)
+		return -ENOMEM;
+
+	for (i = 0; i < NPROTO; i++) {
+		init_MUTEX(&xt[i].mutex);
+		INIT_LIST_HEAD(&xt[i].target);
+		INIT_LIST_HEAD(&xt[i].match);
+		INIT_LIST_HEAD(&xt[i].tables);
+	}
+	return 0;
+}
+
+static void __exit xt_fini(void)
+{
+	kfree(xt);
+}
+
+module_init(xt_init);
+module_exit(xt_fini);
+
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index dab78d8bd49..78ee266a12e 100644
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -15,12 +15,13 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CLASSIFY.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("iptables qdisc classification target module");
+MODULE_ALIAS("ipt_CLASSIFY");
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -30,25 +31,25 @@ target(struct sk_buff **pskb,
        const void *targinfo,
        void *userinfo)
 {
-	const struct ipt_classify_target_info *clinfo = targinfo;
+	const struct xt_classify_target_info *clinfo = targinfo;
 
-	if((*pskb)->priority != clinfo->priority) 
+	if ((*pskb)->priority != clinfo->priority)
 		(*pskb)->priority = clinfo->priority;
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_entry *e,
+           const void *e,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
 {
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+	if (targinfosize != XT_ALIGN(sizeof(struct xt_classify_target_info))){
 		printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
 		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+		       XT_ALIGN(sizeof(struct xt_classify_target_info)));
 		return 0;
 	}
 	
@@ -69,21 +70,39 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_classify_reg = { 
+static struct xt_target classify_reg = { 
+	.name 		= "CLASSIFY", 
+	.target 	= target,
+	.checkentry	= checkentry,
+	.me 		= THIS_MODULE,
+};
+static struct xt_target classify6_reg = { 
 	.name 		= "CLASSIFY", 
 	.target 	= target,
 	.checkentry	= checkentry,
 	.me 		= THIS_MODULE,
 };
 
+
 static int __init init(void)
 {
-	return ipt_register_target(&ipt_classify_reg);
+	int ret;
+
+	ret = xt_register_target(AF_INET, &classify_reg);
+	if (ret)
+		return ret;
+
+	ret = xt_register_target(AF_INET6, &classify6_reg);
+	if (ret)
+		xt_unregister_target(AF_INET, &classify_reg);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_classify_reg);
+	xt_unregister_target(AF_INET, &classify_reg);
+	xt_unregister_target(AF_INET6, &classify6_reg);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 8acac5a40a9..22506e376be 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -26,9 +26,10 @@
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
 MODULE_DESCRIPTION("IP tables CONNMARK matching module");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_CONNMARK");
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CONNMARK.h>
 #include <net/netfilter/nf_conntrack_compat.h>
 
 static unsigned int
@@ -39,7 +40,7 @@ target(struct sk_buff **pskb,
        const void *targinfo,
        void *userinfo)
 {
-	const struct ipt_connmark_target_info *markinfo = targinfo;
+	const struct xt_connmark_target_info *markinfo = targinfo;
 	u_int32_t diff;
 	u_int32_t nfmark;
 	u_int32_t newmark;
@@ -48,17 +49,17 @@ target(struct sk_buff **pskb,
 
 	if (ctmark) {
 	    switch(markinfo->mode) {
-	    case IPT_CONNMARK_SET:
+	    case XT_CONNMARK_SET:
 		newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
 		if (newmark != *ctmark)
 		    *ctmark = newmark;
 		break;
-	    case IPT_CONNMARK_SAVE:
+	    case XT_CONNMARK_SAVE:
 		newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
 		if (*ctmark != newmark)
 		    *ctmark = newmark;
 		break;
-	    case IPT_CONNMARK_RESTORE:
+	    case XT_CONNMARK_RESTORE:
 		nfmark = (*pskb)->nfmark;
 		diff = (*ctmark ^ nfmark) & markinfo->mask;
 		if (diff != 0)
@@ -67,25 +68,25 @@ target(struct sk_buff **pskb,
 	    }
 	}
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *entry,
 	   void *targinfo,
 	   unsigned int targinfosize,
 	   unsigned int hook_mask)
 {
-	struct ipt_connmark_target_info *matchinfo = targinfo;
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
+	struct xt_connmark_target_info *matchinfo = targinfo;
+	if (targinfosize != XT_ALIGN(sizeof(struct xt_connmark_target_info))) {
 		printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
 		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
+		       XT_ALIGN(sizeof(struct xt_connmark_target_info)));
 		return 0;
 	}
 
-	if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
+	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
 	    if (strcmp(tablename, "mangle") != 0) {
 		    printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
 		    return 0;
@@ -100,7 +101,13 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_connmark_reg = {
+static struct xt_target connmark_reg = {
+	.name = "CONNMARK",
+	.target = &target,
+	.checkentry = &checkentry,
+	.me = THIS_MODULE
+};
+static struct xt_target connmark6_reg = {
 	.name = "CONNMARK",
 	.target = &target,
 	.checkentry = &checkentry,
@@ -109,13 +116,25 @@ static struct ipt_target ipt_connmark_reg = {
 
 static int __init init(void)
 {
-	need_ip_conntrack();
-	return ipt_register_target(&ipt_connmark_reg);
+	int ret;
+
+	need_conntrack();
+
+	ret = xt_register_target(AF_INET, &connmark_reg);
+	if (ret)
+		return ret;
+
+	ret = xt_register_target(AF_INET6, &connmark6_reg);
+	if (ret)
+		xt_unregister_target(AF_INET, &connmark_reg);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_connmark_reg);
+	xt_unregister_target(AF_INET, &connmark_reg);
+	xt_unregister_target(AF_INET6, &connmark6_reg);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/netfilter/xt_MARK.c
index 52b4f2c296b..0c11ee9550f 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -12,12 +12,14 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_MARK.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_MARK.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables MARK modification module");
+MODULE_DESCRIPTION("ip[6]tables MARK modification module");
+MODULE_ALIAS("ipt_MARK");
+MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
 target_v0(struct sk_buff **pskb,
@@ -27,12 +29,12 @@ target_v0(struct sk_buff **pskb,
 	  const void *targinfo,
 	  void *userinfo)
 {
-	const struct ipt_mark_target_info *markinfo = targinfo;
+	const struct xt_mark_target_info *markinfo = targinfo;
 
 	if((*pskb)->nfmark != markinfo->mark)
 		(*pskb)->nfmark = markinfo->mark;
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static unsigned int
@@ -43,19 +45,19 @@ target_v1(struct sk_buff **pskb,
 	  const void *targinfo,
 	  void *userinfo)
 {
-	const struct ipt_mark_target_info_v1 *markinfo = targinfo;
+	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 	int mark = 0;
 
 	switch (markinfo->mode) {
-	case IPT_MARK_SET:
+	case XT_MARK_SET:
 		mark = markinfo->mark;
 		break;
 		
-	case IPT_MARK_AND:
+	case XT_MARK_AND:
 		mark = (*pskb)->nfmark & markinfo->mark;
 		break;
 		
-	case IPT_MARK_OR:
+	case XT_MARK_OR:
 		mark = (*pskb)->nfmark | markinfo->mark;
 		break;
 	}
@@ -63,23 +65,23 @@ target_v1(struct sk_buff **pskb,
 	if((*pskb)->nfmark != mark)
 		(*pskb)->nfmark = mark;
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 
 static int
 checkentry_v0(const char *tablename,
-	      const struct ipt_entry *e,
+	      const void *entry,
 	      void *targinfo,
 	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
-	struct ipt_mark_target_info *markinfo = targinfo;
+	struct xt_mark_target_info *markinfo = targinfo;
 
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
+	if (targinfosize != XT_ALIGN(sizeof(struct xt_mark_target_info))) {
 		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
 		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
+		       XT_ALIGN(sizeof(struct xt_mark_target_info)));
 		return 0;
 	}
 
@@ -98,17 +100,17 @@ checkentry_v0(const char *tablename,
 
 static int
 checkentry_v1(const char *tablename,
-	      const struct ipt_entry *e,
+	      const void *entry,
 	      void *targinfo,
 	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
-	struct ipt_mark_target_info_v1 *markinfo = targinfo;
+	struct xt_mark_target_info_v1 *markinfo = targinfo;
 
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1))){
+	if (targinfosize != XT_ALIGN(sizeof(struct xt_mark_target_info_v1))){
 		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
 		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1)));
+		       XT_ALIGN(sizeof(struct xt_mark_target_info_v1)));
 		return 0;
 	}
 
@@ -117,9 +119,9 @@ checkentry_v1(const char *tablename,
 		return 0;
 	}
 
-	if (markinfo->mode != IPT_MARK_SET
-	    && markinfo->mode != IPT_MARK_AND
-	    && markinfo->mode != IPT_MARK_OR) {
+	if (markinfo->mode != XT_MARK_SET
+	    && markinfo->mode != XT_MARK_AND
+	    && markinfo->mode != XT_MARK_OR) {
 		printk(KERN_WARNING "MARK: unknown mode %u\n",
 		       markinfo->mode);
 		return 0;
@@ -133,7 +135,7 @@ checkentry_v1(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_mark_reg_v0 = {
+static struct xt_target ipt_mark_reg_v0 = {
 	.name		= "MARK",
 	.target		= target_v0,
 	.checkentry	= checkentry_v0,
@@ -141,7 +143,7 @@ static struct ipt_target ipt_mark_reg_v0 = {
 	.revision	= 0,
 };
 
-static struct ipt_target ipt_mark_reg_v1 = {
+static struct xt_target ipt_mark_reg_v1 = {
 	.name		= "MARK",
 	.target		= target_v1,
 	.checkentry	= checkentry_v1,
@@ -149,23 +151,40 @@ static struct ipt_target ipt_mark_reg_v1 = {
 	.revision	= 1,
 };
 
+static struct xt_target ip6t_mark_reg_v0 = {
+	.name		= "MARK",
+	.target		= target_v0,
+	.checkentry	= checkentry_v0,
+	.me		= THIS_MODULE,
+	.revision	= 0,
+};
+
 static int __init init(void)
 {
 	int err;
 
-	err = ipt_register_target(&ipt_mark_reg_v0);
-	if (!err) {
-		err = ipt_register_target(&ipt_mark_reg_v1);
-		if (err)
-			ipt_unregister_target(&ipt_mark_reg_v0);
+	err = xt_register_target(AF_INET, &ipt_mark_reg_v0);
+	if (err)
+		return err;
+
+	err = xt_register_target(AF_INET, &ipt_mark_reg_v1);
+	if (err)
+		xt_unregister_target(AF_INET, &ipt_mark_reg_v0);
+
+	err = xt_register_target(AF_INET6, &ip6t_mark_reg_v0);
+	if (err) {
+		xt_unregister_target(AF_INET, &ipt_mark_reg_v0);
+		xt_unregister_target(AF_INET, &ipt_mark_reg_v1);
 	}
+
 	return err;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_mark_reg_v0);
-	ipt_unregister_target(&ipt_mark_reg_v1);
+	xt_unregister_target(AF_INET, &ipt_mark_reg_v0);
+	xt_unregister_target(AF_INET, &ipt_mark_reg_v1);
+	xt_unregister_target(AF_INET6, &ip6t_mark_reg_v0);
 }
 
 module_init(init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
new file mode 100644
index 00000000000..8b76b6f8d1e
--- /dev/null
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -0,0 +1,107 @@
+/* iptables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ * 
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_NFQUEUE.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("[ip,ip6,arp]_tables NFQUEUE target");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NFQUEUE");
+MODULE_ALIAS("ip6t_NFQUEUE");
+MODULE_ALIAS("arpt_NFQUEUE");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct xt_NFQ_info *tinfo = targinfo;
+
+	return NF_QUEUE_NR(tinfo->queuenum);
+}
+
+static int
+checkentry(const char *tablename,
+	   const void *entry,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	if (targinfosize != XT_ALIGN(sizeof(struct xt_NFQ_info))) {
+		printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       XT_ALIGN(sizeof(struct xt_NFQ_info)));
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct xt_target ipt_NFQ_reg = {
+	.name		= "NFQUEUE",
+	.target		= target,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_target ip6t_NFQ_reg = {
+	.name		= "NFQUEUE",
+	.target		= target,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_target arpt_NFQ_reg = {
+	.name		= "NFQUEUE",
+	.target		= target,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int ret;
+	ret = xt_register_target(AF_INET, &ipt_NFQ_reg);
+	if (ret)
+		return ret;
+	ret = xt_register_target(AF_INET6, &ip6t_NFQ_reg);
+	if (ret)
+		goto out_ip;
+	ret = xt_register_target(NF_ARP, &arpt_NFQ_reg);
+	if (ret)
+		goto out_ip6;
+
+	return ret;
+out_ip6:
+	xt_unregister_target(AF_INET6, &ip6t_NFQ_reg);
+out_ip:
+	xt_unregister_target(AF_INET, &ipt_NFQ_reg);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	xt_unregister_target(NF_ARP, &arpt_NFQ_reg);
+	xt_unregister_target(AF_INET6, &ip6t_NFQ_reg);
+	xt_unregister_target(AF_INET, &ipt_NFQ_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index e3c69d072c6..24d477afa93 100644
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -4,9 +4,12 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_conntrack_compat.h>
 
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NOTRACK");
+
 static unsigned int
 target(struct sk_buff **pskb,
        const struct net_device *in,
@@ -17,7 +20,7 @@ target(struct sk_buff **pskb,
 {
 	/* Previously seen (loopback)? Ignore. */
 	if ((*pskb)->nfct != NULL)
-		return IPT_CONTINUE;
+		return XT_CONTINUE;
 
 	/* Attach fake conntrack entry. 
 	   If there is a real ct entry correspondig to this packet, 
@@ -27,12 +30,12 @@ target(struct sk_buff **pskb,
 	(*pskb)->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get((*pskb)->nfct);
 
-	return IPT_CONTINUE;
+	return XT_CONTINUE;
 }
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *entry,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
@@ -51,26 +54,39 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_target ipt_notrack_reg = { 
+static struct xt_target notrack_reg = { 
 	.name = "NOTRACK", 
 	.target = target, 
 	.checkentry = checkentry,
-	.me = THIS_MODULE 
+	.me = THIS_MODULE,
+};
+static struct xt_target notrack6_reg = { 
+	.name = "NOTRACK", 
+	.target = target, 
+	.checkentry = checkentry,
+	.me = THIS_MODULE,
 };
 
 static int __init init(void)
 {
-	if (ipt_register_target(&ipt_notrack_reg))
-		return -EINVAL;
+	int ret;
+
+	ret = xt_register_target(AF_INET, &notrack_reg);
+	if (ret)
+		return ret;
 
-	return 0;
+	ret = xt_register_target(AF_INET6, &notrack6_reg);
+	if (ret)
+		xt_unregister_target(AF_INET, &notrack_reg);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_target(&ipt_notrack_reg);
+	xt_unregister_target(AF_INET6, &notrack6_reg);
+	xt_unregister_target(AF_INET, &notrack_reg);
 }
 
 module_init(init);
 module_exit(fini);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_comment.c b/net/netfilter/xt_comment.c
index 6b76a1ea524..4ba6fd65c6e 100644
--- a/net/ipv4/netfilter/ipt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -6,12 +6,14 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_comment.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_comment.h>
 
 MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
 MODULE_DESCRIPTION("iptables comment match module");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_comment");
+MODULE_ALIAS("ip6t_comment");
 
 static int
 match(const struct sk_buff *skb,
@@ -19,6 +21,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protooff,
       int *hotdrop)
 {
 	/* We always match */
@@ -27,18 +30,25 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
 {
 	/* Check the size */
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_comment_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_comment_info)))
 		return 0;
 	return 1;
 }
 
-static struct ipt_match comment_match = {
+static struct xt_match comment_match = {
+	.name		= "comment",
+	.match		= match,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE
+};
+
+static struct xt_match comment6_match = {
 	.name		= "comment",
 	.match		= match,
 	.checkentry	= checkentry,
@@ -47,12 +57,23 @@ static struct ipt_match comment_match = {
 
 static int __init init(void)
 {
-	return ipt_register_match(&comment_match);
+	int ret;
+
+	ret = xt_register_match(AF_INET, &comment_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &comment6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &comment_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&comment_match);
+	xt_unregister_match(AF_INET, &comment_match);
+	xt_unregister_match(AF_INET6, &comment6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/netfilter/xt_connbytes.c
index d68a048b717..150d2a4b0f7 100644
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -6,13 +6,15 @@
  * 	- add functionality to match number of packets
  * 	- add functionality to match average packet size
  * 	- add support to match directions seperately
+ * 2005-10-16 Harald Welte <laforge@netfilter.org>
+ * 	- Port to x_tables
  *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack_compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_connbytes.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connbytes.h>
 
 #include <asm/div64.h>
 #include <asm/bitops.h>
@@ -20,6 +22,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
+MODULE_ALIAS("ipt_connbytes");
 
 /* 64bit divisor, dividend and result. dynamic precision */
 static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
@@ -43,9 +46,10 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_connbytes_info *sinfo = matchinfo;
+	const struct xt_connbytes_info *sinfo = matchinfo;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
 	const struct ip_conntrack_counter *counters;
 
@@ -53,45 +57,45 @@ match(const struct sk_buff *skb,
 		return 0; /* no match */
 
 	switch (sinfo->what) {
-	case IPT_CONNBYTES_PKTS:
+	case XT_CONNBYTES_PKTS:
 		switch (sinfo->direction) {
-		case IPT_CONNBYTES_DIR_ORIGINAL:
+		case XT_CONNBYTES_DIR_ORIGINAL:
 			what = counters[IP_CT_DIR_ORIGINAL].packets;
 			break;
-		case IPT_CONNBYTES_DIR_REPLY:
+		case XT_CONNBYTES_DIR_REPLY:
 			what = counters[IP_CT_DIR_REPLY].packets;
 			break;
-		case IPT_CONNBYTES_DIR_BOTH:
+		case XT_CONNBYTES_DIR_BOTH:
 			what = counters[IP_CT_DIR_ORIGINAL].packets;
 			what += counters[IP_CT_DIR_REPLY].packets;
 			break;
 		}
 		break;
-	case IPT_CONNBYTES_BYTES:
+	case XT_CONNBYTES_BYTES:
 		switch (sinfo->direction) {
-		case IPT_CONNBYTES_DIR_ORIGINAL:
+		case XT_CONNBYTES_DIR_ORIGINAL:
 			what = counters[IP_CT_DIR_ORIGINAL].bytes;
 			break;
-		case IPT_CONNBYTES_DIR_REPLY:
+		case XT_CONNBYTES_DIR_REPLY:
 			what = counters[IP_CT_DIR_REPLY].bytes;
 			break;
-		case IPT_CONNBYTES_DIR_BOTH:
+		case XT_CONNBYTES_DIR_BOTH:
 			what = counters[IP_CT_DIR_ORIGINAL].bytes;
 			what += counters[IP_CT_DIR_REPLY].bytes;
 			break;
 		}
 		break;
-	case IPT_CONNBYTES_AVGPKT:
+	case XT_CONNBYTES_AVGPKT:
 		switch (sinfo->direction) {
-		case IPT_CONNBYTES_DIR_ORIGINAL:
+		case XT_CONNBYTES_DIR_ORIGINAL:
 			what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
 					counters[IP_CT_DIR_ORIGINAL].packets);
 			break;
-		case IPT_CONNBYTES_DIR_REPLY:
+		case XT_CONNBYTES_DIR_REPLY:
 			what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
 					counters[IP_CT_DIR_REPLY].packets);
 			break;
-		case IPT_CONNBYTES_DIR_BOTH:
+		case XT_CONNBYTES_DIR_BOTH:
 			{
 				u_int64_t bytes;
 				u_int64_t pkts;
@@ -117,30 +121,36 @@ match(const struct sk_buff *skb,
 }
 
 static int check(const char *tablename,
-		 const struct ipt_ip *ip,
+		 const void *ip,
 		 void *matchinfo,
 		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
-	const struct ipt_connbytes_info *sinfo = matchinfo;
+	const struct xt_connbytes_info *sinfo = matchinfo;
 
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_connbytes_info)))
 		return 0;
 
-	if (sinfo->what != IPT_CONNBYTES_PKTS &&
-	    sinfo->what != IPT_CONNBYTES_BYTES &&
-	    sinfo->what != IPT_CONNBYTES_AVGPKT)
+	if (sinfo->what != XT_CONNBYTES_PKTS &&
+	    sinfo->what != XT_CONNBYTES_BYTES &&
+	    sinfo->what != XT_CONNBYTES_AVGPKT)
 		return 0;
 
-	if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL &&
-	    sinfo->direction != IPT_CONNBYTES_DIR_REPLY &&
-	    sinfo->direction != IPT_CONNBYTES_DIR_BOTH)
+	if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
+	    sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
+	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
 		return 0;
 
 	return 1;
 }
 
-static struct ipt_match state_match = {
+static struct xt_match connbytes_match = {
+	.name		= "connbytes",
+	.match		= &match,
+	.checkentry	= &check,
+	.me		= THIS_MODULE
+};
+static struct xt_match connbytes6_match = {
 	.name		= "connbytes",
 	.match		= &match,
 	.checkentry	= &check,
@@ -149,12 +159,21 @@ static struct ipt_match state_match = {
 
 static int __init init(void)
 {
-	return ipt_register_match(&state_match);
+	int ret;
+	ret = xt_register_match(AF_INET, &connbytes_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &connbytes6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &connbytes_match);
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&state_match);
+	xt_unregister_match(AF_INET, &connbytes_match);
+	xt_unregister_match(AF_INET6, &connbytes6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/netfilter/xt_connmark.c
index 5306ef293b9..d06e925032d 100644
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -25,9 +25,10 @@
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
 MODULE_DESCRIPTION("IP tables connmark match module");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_connmark");
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_connmark.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
 #include <net/netfilter/nf_conntrack_compat.h>
 
 static int
@@ -36,9 +37,10 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_connmark_info *info = matchinfo;
+	const struct xt_connmark_info *info = matchinfo;
 	u_int32_t ctinfo;
 	const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
 	if (!ctmark)
@@ -49,14 +51,14 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
-	struct ipt_connmark_info *cm = 
-				(struct ipt_connmark_info *)matchinfo;
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
+	struct xt_connmark_info *cm = 
+				(struct xt_connmark_info *)matchinfo;
+	if (matchsize != XT_ALIGN(sizeof(struct xt_connmark_info)))
 		return 0;
 
 	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
@@ -67,21 +69,40 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_match connmark_match = {
+static struct xt_match connmark_match = {
+	.name = "connmark",
+	.match = &match,
+	.checkentry = &checkentry,
+	.me = THIS_MODULE
+};
+static struct xt_match connmark6_match = {
 	.name = "connmark",
 	.match = &match,
 	.checkentry = &checkentry,
 	.me = THIS_MODULE
 };
 
+
 static int __init init(void)
 {
-	return ipt_register_match(&connmark_match);
+	int ret;
+
+	need_conntrack();
+
+	ret = xt_register_match(AF_INET, &connmark_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &connmark6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &connmark_match);
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&connmark_match);
+	xt_unregister_match(AF_INET6, &connmark6_match);
+	xt_unregister_match(AF_INET, &connmark_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/netfilter/xt_conntrack.c
index c8d18705469..ffdebc95eb9 100644
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -18,12 +18,13 @@
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_conntrack.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
+MODULE_ALIAS("ipt_conntrack");
 
 #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 
@@ -33,9 +34,10 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_conntrack_info *sinfo = matchinfo;
+	const struct xt_conntrack_info *sinfo = matchinfo;
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
@@ -45,58 +47,58 @@ match(const struct sk_buff *skb,
 #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
 
 	if (ct == &ip_conntrack_untracked)
-		statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+		statebit = XT_CONNTRACK_STATE_UNTRACKED;
 	else if (ct)
- 		statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+ 		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
  	else
- 		statebit = IPT_CONNTRACK_STATE_INVALID;
+ 		statebit = XT_CONNTRACK_STATE_INVALID;
  
-	if(sinfo->flags & IPT_CONNTRACK_STATE) {
+	if(sinfo->flags & XT_CONNTRACK_STATE) {
 		if (ct) {
 			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
 			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
-				statebit |= IPT_CONNTRACK_STATE_SNAT;
+				statebit |= XT_CONNTRACK_STATE_SNAT;
 
 			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
 			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
-				statebit |= IPT_CONNTRACK_STATE_DNAT;
+				statebit |= XT_CONNTRACK_STATE_DNAT;
 		}
 
-		if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+		if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+	if(sinfo->flags & XT_CONNTRACK_PROTO) {
+		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
                 	return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+	if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+	if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+	if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+	if(sinfo->flags & XT_CONNTRACK_REPLDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+	if(sinfo->flags & XT_CONNTRACK_STATUS) {
+		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
 		unsigned long expires;
 
 		if(!ct)
@@ -104,7 +106,7 @@ match(const struct sk_buff *skb,
 
 		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
 
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
 			return 0;
 	}
 
@@ -118,9 +120,10 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_conntrack_info *sinfo = matchinfo;
+	const struct xt_conntrack_info *sinfo = matchinfo;
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
@@ -130,58 +133,58 @@ match(const struct sk_buff *skb,
 #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
 
 	if (ct == &nf_conntrack_untracked)
-		statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+		statebit = XT_CONNTRACK_STATE_UNTRACKED;
 	else if (ct)
- 		statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+ 		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
  	else
- 		statebit = IPT_CONNTRACK_STATE_INVALID;
+ 		statebit = XT_CONNTRACK_STATE_INVALID;
  
-	if(sinfo->flags & IPT_CONNTRACK_STATE) {
+	if(sinfo->flags & XT_CONNTRACK_STATE) {
 		if (ct) {
 			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
 			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
-				statebit |= IPT_CONNTRACK_STATE_SNAT;
+				statebit |= XT_CONNTRACK_STATE_SNAT;
 
 			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
 			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
-				statebit |= IPT_CONNTRACK_STATE_DNAT;
+				statebit |= XT_CONNTRACK_STATE_DNAT;
 		}
 
-		if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+		if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+	if(sinfo->flags & XT_CONNTRACK_PROTO) {
+		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
                 	return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+	if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+	if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+	if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+	if(sinfo->flags & XT_CONNTRACK_REPLDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+	if(sinfo->flags & XT_CONNTRACK_STATUS) {
+		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
 			return 0;
 	}
 
-	if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
 		unsigned long expires;
 
 		if(!ct)
@@ -189,7 +192,7 @@ match(const struct sk_buff *skb,
 
 		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
 
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
 			return 0;
 	}
 
@@ -199,18 +202,18 @@ match(const struct sk_buff *skb,
 #endif /* CONFIG_NF_IP_CONNTRACK */
 
 static int check(const char *tablename,
-		 const struct ipt_ip *ip,
+		 const void *ip,
 		 void *matchinfo,
 		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_conntrack_info)))
 		return 0;
 
 	return 1;
 }
 
-static struct ipt_match conntrack_match = {
+static struct xt_match conntrack_match = {
 	.name		= "conntrack",
 	.match		= &match,
 	.checkentry	= &check,
@@ -219,13 +222,16 @@ static struct ipt_match conntrack_match = {
 
 static int __init init(void)
 {
-	need_ip_conntrack();
-	return ipt_register_match(&conntrack_match);
+	int ret;
+	need_conntrack();
+	ret = xt_register_match(AF_INET, &conntrack_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&conntrack_match);
+	xt_unregister_match(AF_INET, &conntrack_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/netfilter/xt_dccp.c
index ad3278bba6c..779f42fc952 100644
--- a/net/ipv4/netfilter/ipt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -14,8 +14,16 @@
 #include <net/ip.h>
 #include <linux/dccp.h>
 
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_dccp.h>
+
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_dccp.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Match for DCCP protocol packets");
+MODULE_ALIAS("ipt_dccp");
 
 #define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
 		                  || (!!((invflag) & (option)) ^ (cond)))
@@ -26,6 +34,7 @@ static DEFINE_SPINLOCK(dccp_buflock);
 static inline int
 dccp_find_option(u_int8_t option,
 		 const struct sk_buff *skb,
+		 unsigned int protoff,
 		 const struct dccp_hdr *dh,
 		 int *hotdrop)
 {
@@ -44,9 +53,7 @@ dccp_find_option(u_int8_t option,
 		return 0;
 
 	spin_lock_bh(&dccp_buflock);
-	op = skb_header_pointer(skb,
-				skb->nh.iph->ihl*4 + optoff,
-				optlen, dccp_optbuf);
+	op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf);
 	if (op == NULL) {
 		/* If we don't have the whole header, drop packet. */
 		spin_unlock_bh(&dccp_buflock);
@@ -78,10 +85,10 @@ match_types(const struct dccp_hdr *dh, u_int16_t typemask)
 }
 
 static inline int
-match_option(u_int8_t option, const struct sk_buff *skb,
+match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 	     const struct dccp_hdr *dh, int *hotdrop)
 {
-	return dccp_find_option(option, skb, dh, hotdrop);
+	return dccp_find_option(option, skb, protoff, dh, hotdrop);
 }
 
 static int
@@ -90,16 +97,17 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_dccp_info *info = 
-				(const struct ipt_dccp_info *)matchinfo;
+	const struct xt_dccp_info *info = 
+				(const struct xt_dccp_info *)matchinfo;
 	struct dccp_hdr _dh, *dh;
 
 	if (offset)
 		return 0;
 	
-	dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh);
+	dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
 		*hotdrop = 1;
 		return 0;
@@ -107,42 +115,73 @@ match(const struct sk_buff *skb,
 
 	return  DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) 
 			&& (ntohs(dh->dccph_sport) <= info->spts[1])), 
-		   	IPT_DCCP_SRC_PORTS, info->flags, info->invflags)
+		   	XT_DCCP_SRC_PORTS, info->flags, info->invflags)
 		&& DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) 
 			&& (ntohs(dh->dccph_dport) <= info->dpts[1])), 
-			IPT_DCCP_DEST_PORTS, info->flags, info->invflags)
+			XT_DCCP_DEST_PORTS, info->flags, info->invflags)
 		&& DCCHECK(match_types(dh, info->typemask),
-			   IPT_DCCP_TYPE, info->flags, info->invflags)
-		&& DCCHECK(match_option(info->option, skb, dh, hotdrop),
-			   IPT_DCCP_OPTION, info->flags, info->invflags);
+			   XT_DCCP_TYPE, info->flags, info->invflags)
+		&& DCCHECK(match_option(info->option, skb, protoff, dh,
+					hotdrop),
+			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *inf,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
-	const struct ipt_dccp_info *info;
+	const struct ipt_ip *ip = inf;
+	const struct xt_dccp_info *info;
 
-	info = (const struct ipt_dccp_info *)matchinfo;
+	info = (const struct xt_dccp_info *)matchinfo;
 
 	return ip->proto == IPPROTO_DCCP
-		&& !(ip->invflags & IPT_INV_PROTO)
-		&& matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info))
-		&& !(info->flags & ~IPT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~IPT_DCCP_VALID_FLAGS)
+		&& !(ip->invflags & XT_INV_PROTO)
+		&& matchsize == XT_ALIGN(sizeof(struct xt_dccp_info))
+		&& !(info->flags & ~XT_DCCP_VALID_FLAGS)
+		&& !(info->invflags & ~XT_DCCP_VALID_FLAGS)
 		&& !(info->invflags & ~info->flags);
 }
 
-static struct ipt_match dccp_match = 
+static int
+checkentry6(const char *tablename,
+	   const void *inf,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	const struct ip6t_ip6 *ip = inf;
+	const struct xt_dccp_info *info;
+
+	info = (const struct xt_dccp_info *)matchinfo;
+
+	return ip->proto == IPPROTO_DCCP
+		&& !(ip->invflags & XT_INV_PROTO)
+		&& matchsize == XT_ALIGN(sizeof(struct xt_dccp_info))
+		&& !(info->flags & ~XT_DCCP_VALID_FLAGS)
+		&& !(info->invflags & ~XT_DCCP_VALID_FLAGS)
+		&& !(info->invflags & ~info->flags);
+}
+
+
+static struct xt_match dccp_match = 
 { 
 	.name 		= "dccp",
 	.match		= &match,
 	.checkentry	= &checkentry,
 	.me 		= THIS_MODULE,
 };
+static struct xt_match dccp6_match = 
+{ 
+	.name 		= "dccp",
+	.match		= &match,
+	.checkentry	= &checkentry6,
+	.me 		= THIS_MODULE,
+};
+
 
 static int __init init(void)
 {
@@ -154,23 +193,29 @@ static int __init init(void)
 	dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
 	if (!dccp_optbuf)
 		return -ENOMEM;
-	ret = ipt_register_match(&dccp_match);
+	ret = xt_register_match(AF_INET, &dccp_match);
 	if (ret)
-		kfree(dccp_optbuf);
+		goto out_kfree;
+	ret = xt_register_match(AF_INET6, &dccp6_match);
+	if (ret)
+		goto out_unreg;
+
+	return ret;
+
+out_unreg:
+	xt_unregister_match(AF_INET, &dccp_match);
+out_kfree:
+	kfree(dccp_optbuf);
 
 	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&dccp_match);
+	xt_unregister_match(AF_INET6, &dccp6_match);
+	xt_unregister_match(AF_INET, &dccp_match);
 	kfree(dccp_optbuf);
 }
 
 module_init(init);
 module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Match for DCCP protocol packets");
-
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/netfilter/xt_helper.c
index bf14e1c7798..38b6715e1db 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -22,12 +22,14 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #endif
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_helper.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_helper.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
 MODULE_DESCRIPTION("iptables helper match module");
+MODULE_ALIAS("ipt_helper");
+MODULE_ALIAS("ip6t_helper");
 
 #if 0
 #define DEBUGP printk
@@ -42,27 +44,28 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_helper_info *info = matchinfo;
+	const struct xt_helper_info *info = matchinfo;
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
 	int ret = info->invert;
 	
 	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
 	if (!ct) {
-		DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
+		DEBUGP("xt_helper: Eek! invalid conntrack?\n");
 		return ret;
 	}
 
 	if (!ct->master) {
-		DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
+		DEBUGP("xt_helper: conntrack %p has no master\n", ct);
 		return ret;
 	}
 
 	read_lock_bh(&ip_conntrack_lock);
 	if (!ct->master->helper) {
-		DEBUGP("ipt_helper: master ct %p has no helper\n", 
+		DEBUGP("xt_helper: master ct %p has no helper\n", 
 			exp->expectant);
 		goto out_unlock;
 	}
@@ -88,27 +91,28 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_helper_info *info = matchinfo;
+	const struct xt_helper_info *info = matchinfo;
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	int ret = info->invert;
 	
 	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
 	if (!ct) {
-		DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
+		DEBUGP("xt_helper: Eek! invalid conntrack?\n");
 		return ret;
 	}
 
 	if (!ct->master) {
-		DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
+		DEBUGP("xt_helper: conntrack %p has no master\n", ct);
 		return ret;
 	}
 
 	read_lock_bh(&nf_conntrack_lock);
 	if (!ct->master->helper) {
-		DEBUGP("ipt_helper: master ct %p has no helper\n", 
+		DEBUGP("xt_helper: master ct %p has no helper\n", 
 			exp->expectant);
 		goto out_unlock;
 	}
@@ -128,23 +132,29 @@ out_unlock:
 #endif
 
 static int check(const char *tablename,
-		 const struct ipt_ip *ip,
+		 const void *inf,
 		 void *matchinfo,
 		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
-	struct ipt_helper_info *info = matchinfo;
+	struct xt_helper_info *info = matchinfo;
 
 	info->name[29] = '\0';
 
 	/* verify size */
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_helper_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_helper_info)))
 		return 0;
 
 	return 1;
 }
 
-static struct ipt_match helper_match = {
+static struct xt_match helper_match = {
+	.name		= "helper",
+	.match		= &match,
+	.checkentry	= &check,
+	.me		= THIS_MODULE,
+};
+static struct xt_match helper6_match = {
 	.name		= "helper",
 	.match		= &match,
 	.checkentry	= &check,
@@ -153,13 +163,24 @@ static struct ipt_match helper_match = {
 
 static int __init init(void)
 {
-	need_ip_conntrack();
-	return ipt_register_match(&helper_match);
+	int ret;
+	need_conntrack();
+
+	ret = xt_register_match(AF_INET, &helper_match);
+	if (ret < 0)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &helper6_match);
+	if (ret < 0)
+		xt_unregister_match(AF_INET, &helper_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&helper_match);
+	xt_unregister_match(AF_INET, &helper_match);
+	xt_unregister_match(AF_INET6, &helper6_match);
 }
 
 module_init(init);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
new file mode 100644
index 00000000000..39c8faea63d
--- /dev/null
+++ b/net/netfilter/xt_length.c
@@ -0,0 +1,99 @@
+/* Kernel module to match packet length. */
+/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+
+#include <linux/netfilter/xt_length.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("IP tables packet length matching module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_length");
+MODULE_ALIAS("ip6t_length");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	const struct xt_length_info *info = matchinfo;
+	u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
+	
+	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static int
+match6(const struct sk_buff *skb,
+       const struct net_device *in,
+       const struct net_device *out,
+       const void *matchinfo,
+       int offset,
+       unsigned int protoff,
+       int *hotdrop)
+{
+	const struct xt_length_info *info = matchinfo;
+	u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+	
+	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+           const void *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != XT_ALIGN(sizeof(struct xt_length_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct xt_match length_match = {
+	.name		= "length",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+static struct xt_match length6_match = {
+	.name		= "length",
+	.match		= &match6,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int ret;
+	ret = xt_register_match(AF_INET, &length_match);
+	if (ret)
+		return ret;
+	ret = xt_register_match(AF_INET6, &length6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &length_match);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	xt_unregister_match(AF_INET, &length_match);
+	xt_unregister_match(AF_INET6, &length6_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/netfilter/xt_limit.c
index 0c24dcc703a..15e40506bc3 100644
--- a/net/ipv4/netfilter/ipt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -18,12 +18,14 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_limit.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_limit.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
 MODULE_DESCRIPTION("iptables rate limit match");
+MODULE_ALIAS("ipt_limit");
+MODULE_ALIAS("ip6t_limit");
 
 /* The algorithm used is the Simple Token Bucket Filter (TBF)
  * see net/sched/sch_tbf.c in the linux source tree
@@ -68,9 +70,10 @@ ipt_limit_match(const struct sk_buff *skb,
 		const struct net_device *out,
 		const void *matchinfo,
 		int offset,
+		unsigned int protoff,
 		int *hotdrop)
 {
-	struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master;
+	struct xt_rateinfo *r = ((struct xt_rateinfo *)matchinfo)->master;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
@@ -96,32 +99,32 @@ user2credits(u_int32_t user)
 	/* If multiplying would overflow... */
 	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
 		/* Divide first. */
-		return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+		return (user / XT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
 
-	return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE;
+	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
 static int
 ipt_limit_checkentry(const char *tablename,
-		     const struct ipt_ip *ip,
+		     const void *inf,
 		     void *matchinfo,
 		     unsigned int matchsize,
 		     unsigned int hook_mask)
 {
-	struct ipt_rateinfo *r = matchinfo;
+	struct xt_rateinfo *r = matchinfo;
 
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_rateinfo)))
 		return 0;
 
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in ipt_limit, try lower: %u/%u\n",
+		printk("Overflow in xt_limit, try lower: %u/%u\n",
 		       r->avg, r->burst);
 		return 0;
 	}
 
-	/* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies *
+	/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
 	   128. */
 	r->prev = jiffies;
 	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
@@ -134,7 +137,13 @@ ipt_limit_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_match ipt_limit_reg = {
+static struct xt_match ipt_limit_reg = {
+	.name		= "limit",
+	.match		= ipt_limit_match,
+	.checkentry	= ipt_limit_checkentry,
+	.me		= THIS_MODULE,
+};
+static struct xt_match limit6_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.checkentry	= ipt_limit_checkentry,
@@ -143,14 +152,23 @@ static struct ipt_match ipt_limit_reg = {
 
 static int __init init(void)
 {
-	if (ipt_register_match(&ipt_limit_reg))
-		return -EINVAL;
-	return 0;
+	int ret;
+	
+	ret = xt_register_match(AF_INET, &ipt_limit_reg);
+	if (ret)
+		return ret;
+	
+	ret = xt_register_match(AF_INET6, &limit6_reg);
+	if (ret)
+		xt_unregister_match(AF_INET, &ipt_limit_reg);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&ipt_limit_reg);
+	xt_unregister_match(AF_INET, &ipt_limit_reg);
+	xt_unregister_match(AF_INET6, &limit6_reg);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/netfilter/xt_mac.c
index 11a459e33f2..0461dcb5fc7 100644
--- a/net/ipv4/netfilter/ipt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -11,13 +11,17 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
 
-#include <linux/netfilter_ipv4/ipt_mac.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/xt_mac.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables mac matching module");
+MODULE_ALIAS("ipt_mac");
+MODULE_ALIAS("ip6t_mac");
 
 static int
 match(const struct sk_buff *skb,
@@ -25,21 +29,22 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-    const struct ipt_mac_info *info = matchinfo;
+    const struct xt_mac_info *info = matchinfo;
 
     /* Is mac pointer valid? */
     return (skb->mac.raw >= skb->head
 	    && (skb->mac.raw + ETH_HLEN) <= skb->data
 	    /* If so, compare... */
-	    && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
-		== 0) ^ info->invert));
+	    && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
+		^ info->invert));
 }
 
 static int
 ipt_mac_checkentry(const char *tablename,
-		   const struct ipt_ip *ip,
+		   const void *inf,
 		   void *matchinfo,
 		   unsigned int matchsize,
 		   unsigned int hook_mask)
@@ -48,17 +53,23 @@ ipt_mac_checkentry(const char *tablename,
 	if (hook_mask
 	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
 		| (1 << NF_IP_FORWARD))) {
-		printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
+		printk("xt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
 		return 0;
 	}
 
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_mac_info)))
 		return 0;
 
 	return 1;
 }
 
-static struct ipt_match mac_match = {
+static struct xt_match mac_match = {
+	.name		= "mac",
+	.match		= &match,
+	.checkentry	= &ipt_mac_checkentry,
+	.me		= THIS_MODULE,
+};
+static struct xt_match mac6_match = {
 	.name		= "mac",
 	.match		= &match,
 	.checkentry	= &ipt_mac_checkentry,
@@ -67,12 +78,22 @@ static struct ipt_match mac_match = {
 
 static int __init init(void)
 {
-	return ipt_register_match(&mac_match);
+	int ret;
+	ret = xt_register_match(AF_INET, &mac_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &mac6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &mac_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&mac_match);
+	xt_unregister_match(AF_INET, &mac_match);
+	xt_unregister_match(AF_INET6, &mac6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/netfilter/xt_mark.c
index 00bef6cdd3f..2a0ac62b72c 100644
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -10,12 +10,14 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
-#include <linux/netfilter_ipv4/ipt_mark.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/xt_mark.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables mark matching module");
+MODULE_ALIAS("ipt_mark");
+MODULE_ALIAS("ip6t_mark");
 
 static int
 match(const struct sk_buff *skb,
@@ -23,23 +25,24 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_mark_info *info = matchinfo;
+	const struct xt_mark_info *info = matchinfo;
 
 	return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
 }
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *entry,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
 {
-	struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
+	struct xt_mark_info *minfo = (struct xt_mark_info *) matchinfo;
 
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_mark_info)))
 		return 0;
 
 	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
@@ -50,7 +53,14 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_match mark_match = {
+static struct xt_match mark_match = {
+	.name		= "mark",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_match mark6_match = {
 	.name		= "mark",
 	.match		= &match,
 	.checkentry	= &checkentry,
@@ -59,12 +69,22 @@ static struct ipt_match mark_match = {
 
 static int __init init(void)
 {
-	return ipt_register_match(&mark_match);
+	int ret;
+	ret = xt_register_match(AF_INET, &mark_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &mark6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &mark_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&mark_match);
+	xt_unregister_match(AF_INET, &mark_match);
+	xt_unregister_match(AF_INET6, &mark6_match);
 }
 
 module_init(init);
diff --git a/net/ipv6/netfilter/ip6t_physdev.c b/net/netfilter/xt_physdev.c
index 71515c86ece..19bb57c14df 100644
--- a/net/ipv6/netfilter/ip6t_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -10,8 +10,8 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv6/ip6t_physdev.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/xt_physdev.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge.h>
 #define MATCH   1
 #define NOMATCH 0
@@ -19,6 +19,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("iptables bridge physical device match module");
+MODULE_ALIAS("ipt_physdev");
+MODULE_ALIAS("ip6t_physdev");
 
 static int
 match(const struct sk_buff *skb,
@@ -31,7 +33,7 @@ match(const struct sk_buff *skb,
 {
 	int i;
 	static const char nulldevname[IFNAMSIZ];
-	const struct ip6t_physdev_info *info = matchinfo;
+	const struct xt_physdev_info *info = matchinfo;
 	unsigned int ret;
 	const char *indev, *outdev;
 	struct nf_bridge_info *nf_bridge;
@@ -41,37 +43,37 @@ match(const struct sk_buff *skb,
 	 * the destination device will be a bridge. */
 	if (!(nf_bridge = skb->nf_bridge)) {
 		/* Return MATCH if the invert flags of the used options are on */
-		if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_BRIDGED))
+		if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
+		    !(info->invert & XT_PHYSDEV_OP_BRIDGED))
 			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_ISIN))
+		if ((info->bitmask & XT_PHYSDEV_OP_ISIN) &&
+		    !(info->invert & XT_PHYSDEV_OP_ISIN))
 			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_ISOUT) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_ISOUT))
+		if ((info->bitmask & XT_PHYSDEV_OP_ISOUT) &&
+		    !(info->invert & XT_PHYSDEV_OP_ISOUT))
 			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_IN) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_IN))
+		if ((info->bitmask & XT_PHYSDEV_OP_IN) &&
+		    !(info->invert & XT_PHYSDEV_OP_IN))
 			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_OUT) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_OUT))
+		if ((info->bitmask & XT_PHYSDEV_OP_OUT) &&
+		    !(info->invert & XT_PHYSDEV_OP_OUT))
 			return NOMATCH;
 		return MATCH;
 	}
 
 	/* This only makes sense in the FORWARD and POSTROUTING chains */
-	if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
+	if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
 	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
-	    !(info->invert & IP6T_PHYSDEV_OP_BRIDGED)))
+	    !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
 		return NOMATCH;
 
-	if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN &&
-	    (!nf_bridge->physindev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISIN))) ||
-	    (info->bitmask & IP6T_PHYSDEV_OP_ISOUT &&
-	    (!nf_bridge->physoutdev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISOUT))))
+	if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
+	    (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+	    (info->bitmask & XT_PHYSDEV_OP_ISOUT &&
+	    (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
 		return NOMATCH;
 
-	if (!(info->bitmask & IP6T_PHYSDEV_OP_IN))
+	if (!(info->bitmask & XT_PHYSDEV_OP_IN))
 		goto match_outdev;
 	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
 	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
@@ -80,11 +82,11 @@ match(const struct sk_buff *skb,
 			& ((const unsigned int *)info->in_mask)[i];
 	}
 
-	if ((ret == 0) ^ !(info->invert & IP6T_PHYSDEV_OP_IN))
+	if ((ret == 0) ^ !(info->invert & XT_PHYSDEV_OP_IN))
 		return NOMATCH;
 
 match_outdev:
-	if (!(info->bitmask & IP6T_PHYSDEV_OP_OUT))
+	if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
 		return MATCH;
 	outdev = nf_bridge->physoutdev ?
 		 nf_bridge->physoutdev->name : nulldevname;
@@ -94,27 +96,34 @@ match_outdev:
 			& ((const unsigned int *)info->out_mask)[i];
 	}
 
-	return (ret != 0) ^ !(info->invert & IP6T_PHYSDEV_OP_OUT);
+	return (ret != 0) ^ !(info->invert & XT_PHYSDEV_OP_OUT);
 }
 
 static int
 checkentry(const char *tablename,
-		       const struct ip6t_ip6 *ip,
+		       const void *ip,
 		       void *matchinfo,
 		       unsigned int matchsize,
 		       unsigned int hook_mask)
 {
-	const struct ip6t_physdev_info *info = matchinfo;
+	const struct xt_physdev_info *info = matchinfo;
 
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_physdev_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_physdev_info)))
 		return 0;
-	if (!(info->bitmask & IP6T_PHYSDEV_OP_MASK) ||
-	    info->bitmask & ~IP6T_PHYSDEV_OP_MASK)
+	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
+	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
 		return 0;
 	return 1;
 }
 
-static struct ip6t_match physdev_match = {
+static struct xt_match physdev_match = {
+	.name		= "physdev",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_match physdev6_match = {
 	.name		= "physdev",
 	.match		= &match,
 	.checkentry	= &checkentry,
@@ -123,12 +132,23 @@ static struct ip6t_match physdev_match = {
 
 static int __init init(void)
 {
-	return ip6t_register_match(&physdev_match);
+	int ret;
+
+	ret = xt_register_match(AF_INET, &physdev_match);
+	if (ret < 0)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &physdev6_match);
+	if (ret < 0)
+		xt_unregister_match(AF_INET, &physdev_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ip6t_unregister_match(&physdev_match);
+	xt_unregister_match(AF_INET, &physdev_match);
+	xt_unregister_match(AF_INET6, &physdev6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_pkttype.c b/net/netfilter/xt_pkttype.c
index 8ddb1dc5e5a..ab1b2630f97 100644
--- a/net/ipv4/netfilter/ipt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -10,60 +10,72 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 
-#include <linux/netfilter_ipv4/ipt_pkttype.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/xt_pkttype.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
 MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
+MODULE_ALIAS("ipt_pkttype");
+MODULE_ALIAS("ip6t_pkttype");
 
 static int match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-    const struct ipt_pkttype_info *info = matchinfo;
+	const struct xt_pkttype_info *info = matchinfo;
 
-    return (skb->pkt_type == info->pkttype) ^ info->invert;
+	return (skb->pkt_type == info->pkttype) ^ info->invert;
 }
 
 static int checkentry(const char *tablename,
-		   const struct ipt_ip *ip,
+		   const void *ip,
 		   void *matchinfo,
 		   unsigned int matchsize,
 		   unsigned int hook_mask)
 {
-/*
-	if (hook_mask
-	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
-		| (1 << NF_IP_FORWARD))) {
-		printk("ipt_pkttype: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
-		return 0;
-	}
-*/
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_pkttype_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_pkttype_info)))
 		return 0;
 
 	return 1;
 }
 
-static struct ipt_match pkttype_match = {
+static struct xt_match pkttype_match = {
 	.name		= "pkttype",
 	.match		= &match,
 	.checkentry	= &checkentry,
 	.me		= THIS_MODULE,
 };
+static struct xt_match pkttype6_match = {
+	.name		= "pkttype",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
 
 static int __init init(void)
 {
-	return ipt_register_match(&pkttype_match);
+	int ret;
+	ret = xt_register_match(AF_INET, &pkttype_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &pkttype6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &pkttype_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&pkttype_match);
+	xt_unregister_match(AF_INET, &pkttype_match);
+	xt_unregister_match(AF_INET6, &pkttype6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_realm.c b/net/netfilter/xt_realm.c
index 54a6897ebaa..2b7e1781d34 100644
--- a/net/ipv4/netfilter/ipt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -14,12 +14,14 @@
 #include <linux/netdevice.h>
 #include <net/route.h>
 
-#include <linux/netfilter_ipv4/ipt_realm.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/xt_realm.h>
+#include <linux/netfilter/x_tables.h>
 
 MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables realm match");
+MODULE_DESCRIPTION("X_tables realm match");
+MODULE_ALIAS("ipt_realm");
 
 static int
 match(const struct sk_buff *skb,
@@ -27,16 +29,17 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_realm_info *info = matchinfo;
+	const struct xt_realm_info *info = matchinfo;
 	struct dst_entry *dst = skb->dst;
     
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
 }
 
 static int check(const char *tablename,
-                 const struct ipt_ip *ip,
+                 const void *ip,
                  void *matchinfo,
                  unsigned int matchsize,
                  unsigned int hook_mask)
@@ -44,18 +47,18 @@ static int check(const char *tablename,
 	if (hook_mask
 	    & ~((1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) |
 	        (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN))) {
-		printk("ipt_realm: only valid for POST_ROUTING, LOCAL_OUT, "
+		printk("xt_realm: only valid for POST_ROUTING, LOCAL_OUT, "
 		       "LOCAL_IN or FORWARD.\n");
 		return 0;
 	}
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_realm_info))) {
-		printk("ipt_realm: invalid matchsize.\n");
+	if (matchsize != XT_ALIGN(sizeof(struct xt_realm_info))) {
+		printk("xt_realm: invalid matchsize.\n");
 		return 0;
 	}
 	return 1;
 }
 
-static struct ipt_match realm_match = {
+static struct xt_match realm_match = {
 	.name		= "realm",
 	.match		= match, 
 	.checkentry	= check,
@@ -64,12 +67,12 @@ static struct ipt_match realm_match = {
 
 static int __init init(void)
 {
-	return ipt_register_match(&realm_match);
+	return xt_register_match(AF_INET, &realm_match);
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&realm_match);
+	xt_unregister_match(AF_INET, &realm_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/netfilter/xt_sctp.c
index fe2b327bcaa..10fbfc5ba75 100644
--- a/net/ipv4/netfilter/ipt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -1,10 +1,18 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/ip.h>
+#include <net/ipv6.h>
 #include <linux/sctp.h>
 
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_sctp.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_sctp.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Match for SCTP protocol packets");
+MODULE_ALIAS("ipt_sctp");
 
 #ifdef DEBUG_SCTP
 #define duprintf(format, args...) printk(format , ## args)
@@ -16,7 +24,7 @@
 					      || (!!((invflag) & (option)) ^ (cond)))
 
 static int
-match_flags(const struct ipt_sctp_flag_info *flag_info,
+match_flags(const struct xt_sctp_flag_info *flag_info,
 	    const int flag_count,
 	    u_int8_t chunktype,
 	    u_int8_t chunkflags)
@@ -32,15 +40,15 @@ match_flags(const struct ipt_sctp_flag_info *flag_info,
 	return 1;
 }
 
-static int
+static inline int
 match_packet(const struct sk_buff *skb,
+	     unsigned int offset,
 	     const u_int32_t *chunkmap,
 	     int chunk_match_type,
-	     const struct ipt_sctp_flag_info *flag_info,
+	     const struct xt_sctp_flag_info *flag_info,
 	     const int flag_count,
 	     int *hotdrop)
 {
-	int offset;
 	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
 	sctp_chunkhdr_t _sch, *sch;
 
@@ -52,7 +60,6 @@ match_packet(const struct sk_buff *skb,
 		SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
 	}
 
-	offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
 	do {
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
 		if (sch == NULL) {
@@ -118,19 +125,20 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_sctp_info *info;
+	const struct xt_sctp_info *info;
 	sctp_sctphdr_t _sh, *sh;
 
-	info = (const struct ipt_sctp_info *)matchinfo;
+	info = (const struct xt_sctp_info *)matchinfo;
 
 	if (offset) {
 		duprintf("Dropping non-first fragment.. FIXME\n");
 		return 0;
 	}
 	
-	sh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_sh), &_sh);
+	sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
 		*hotdrop = 1;
@@ -140,64 +148,103 @@ match(const struct sk_buff *skb,
 
 	return  SCCHECK(((ntohs(sh->source) >= info->spts[0]) 
 			&& (ntohs(sh->source) <= info->spts[1])), 
-		   	IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
+		   	XT_SCTP_SRC_PORTS, info->flags, info->invflags)
 		&& SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) 
 			&& (ntohs(sh->dest) <= info->dpts[1])), 
-			IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
-		&& SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
+			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
+		&& SCCHECK(match_packet(skb, protoff,
+					info->chunkmap, info->chunk_match_type,
  					info->flag_info, info->flag_count, 
 					hotdrop),
-			   IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *inf,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	const struct xt_sctp_info *info;
+	const struct ipt_ip *ip = inf;
+
+	info = (const struct xt_sctp_info *)matchinfo;
+
+	return ip->proto == IPPROTO_SCTP
+		&& !(ip->invflags & XT_INV_PROTO)
+		&& matchsize == XT_ALIGN(sizeof(struct xt_sctp_info))
+		&& !(info->flags & ~XT_SCTP_VALID_FLAGS)
+		&& !(info->invflags & ~XT_SCTP_VALID_FLAGS)
+		&& !(info->invflags & ~info->flags)
+		&& ((!(info->flags & XT_SCTP_CHUNK_TYPES)) || 
+			(info->chunk_match_type &
+				(SCTP_CHUNK_MATCH_ALL 
+				| SCTP_CHUNK_MATCH_ANY
+				| SCTP_CHUNK_MATCH_ONLY)));
+}
+
+static int
+checkentry6(const char *tablename,
+	   const void *inf,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
-	const struct ipt_sctp_info *info;
+	const struct xt_sctp_info *info;
+	const struct ip6t_ip6 *ip = inf;
 
-	info = (const struct ipt_sctp_info *)matchinfo;
+	info = (const struct xt_sctp_info *)matchinfo;
 
 	return ip->proto == IPPROTO_SCTP
-		&& !(ip->invflags & IPT_INV_PROTO)
-		&& matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
-		&& !(info->flags & ~IPT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
+		&& !(ip->invflags & XT_INV_PROTO)
+		&& matchsize == XT_ALIGN(sizeof(struct xt_sctp_info))
+		&& !(info->flags & ~XT_SCTP_VALID_FLAGS)
+		&& !(info->invflags & ~XT_SCTP_VALID_FLAGS)
 		&& !(info->invflags & ~info->flags)
-		&& ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) || 
+		&& ((!(info->flags & XT_SCTP_CHUNK_TYPES)) || 
 			(info->chunk_match_type &
 				(SCTP_CHUNK_MATCH_ALL 
 				| SCTP_CHUNK_MATCH_ANY
 				| SCTP_CHUNK_MATCH_ONLY)));
 }
 
-static struct ipt_match sctp_match = 
+
+static struct xt_match sctp_match = 
 { 
-	.list = { NULL, NULL},
 	.name = "sctp",
 	.match = &match,
 	.checkentry = &checkentry,
-	.destroy = NULL,
+	.me = THIS_MODULE
+};
+static struct xt_match sctp6_match = 
+{ 
+	.name = "sctp",
+	.match = &match,
+	.checkentry = &checkentry6,
 	.me = THIS_MODULE
 };
 
+
 static int __init init(void)
 {
-	return ipt_register_match(&sctp_match);
+	int ret;
+	ret = xt_register_match(AF_INET, &sctp_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &sctp6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &sctp_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&sctp_match);
+	xt_unregister_match(AF_INET6, &sctp6_match);
+	xt_unregister_match(AF_INET, &sctp_match);
 }
 
 module_init(init);
 module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Match for SCTP protocol packets");
-
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/netfilter/xt_state.c
index 4d7f16b70ce..39ce808d40e 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/netfilter/xt_state.c
@@ -1,7 +1,7 @@
 /* Kernel module to match connection tracking information. */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,12 +11,14 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack_compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_state.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_state.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("iptables connection tracking state match module");
+MODULE_DESCRIPTION("ip[6]_tables connection tracking state match module");
+MODULE_ALIAS("ipt_state");
+MODULE_ALIAS("ip6t_state");
 
 static int
 match(const struct sk_buff *skb,
@@ -24,35 +26,43 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_state_info *sinfo = matchinfo;
+	const struct xt_state_info *sinfo = matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
 
 	if (nf_ct_is_untracked(skb))
-		statebit = IPT_STATE_UNTRACKED;
+		statebit = XT_STATE_UNTRACKED;
 	else if (!nf_ct_get_ctinfo(skb, &ctinfo))
-		statebit = IPT_STATE_INVALID;
+		statebit = XT_STATE_INVALID;
 	else
-		statebit = IPT_STATE_BIT(ctinfo);
+		statebit = XT_STATE_BIT(ctinfo);
 
 	return (sinfo->statemask & statebit);
 }
 
 static int check(const char *tablename,
-		 const struct ipt_ip *ip,
+		 const void *ip,
 		 void *matchinfo,
 		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_state_info)))
 		return 0;
 
 	return 1;
 }
 
-static struct ipt_match state_match = {
+static struct xt_match state_match = {
+	.name		= "state",
+	.match		= &match,
+	.checkentry	= &check,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_match state6_match = {
 	.name		= "state",
 	.match		= &match,
 	.checkentry	= &check,
@@ -61,13 +71,25 @@ static struct ipt_match state_match = {
 
 static int __init init(void)
 {
-	need_ip_conntrack();
-	return ipt_register_match(&state_match);
+	int ret;
+
+	need_conntrack();
+
+	ret = xt_register_match(AF_INET, &state_match);
+	if (ret < 0)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &state6_match);
+	if (ret < 0)
+		xt_unregister_match(AF_INET,&state_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&state_match);
+	xt_unregister_match(AF_INET, &state_match);
+	xt_unregister_match(AF_INET6, &state6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/netfilter/xt_string.c
index b5def204d79..7c7d5c8807d 100644
--- a/net/ipv4/netfilter/ipt_string.c
+++ b/net/netfilter/xt_string.c
@@ -11,23 +11,26 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_string.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_string.h>
 #include <linux/textsearch.h>
 
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
 MODULE_DESCRIPTION("IP tables string match module");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_string");
+MODULE_ALIAS("ip6t_string");
 
 static int match(const struct sk_buff *skb,
 		 const struct net_device *in,
 		 const struct net_device *out,
 		 const void *matchinfo,
 		 int offset,
+		 unsigned int protoff,
 		 int *hotdrop)
 {
 	struct ts_state state;
-	struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
+	struct xt_string_info *conf = (struct xt_string_info *) matchinfo;
 
 	memset(&state, 0, sizeof(struct ts_state));
 
@@ -36,18 +39,18 @@ static int match(const struct sk_buff *skb,
 			     != UINT_MAX) && !conf->invert;
 }
 
-#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
+#define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
 
 static int checkentry(const char *tablename,
-		      const struct ipt_ip *ip,
+		      const void *ip,
 		      void *matchinfo,
 		      unsigned int matchsize,
 		      unsigned int hook_mask)
 {
-	struct ipt_string_info *conf = matchinfo;
+	struct xt_string_info *conf = matchinfo;
 	struct ts_config *ts_conf;
 
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
+	if (matchsize != XT_ALIGN(sizeof(struct xt_string_info)))
 		return 0;
 
 	/* Damn, can't handle this case properly with iptables... */
@@ -69,7 +72,14 @@ static void destroy(void *matchinfo, unsigned int matchsize)
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
 
-static struct ipt_match string_match = {
+static struct xt_match string_match = {
+	.name 		= "string",
+	.match 		= match,
+	.checkentry	= checkentry,
+	.destroy 	= destroy,
+	.me 		= THIS_MODULE
+};
+static struct xt_match string6_match = {
 	.name 		= "string",
 	.match 		= match,
 	.checkentry	= checkentry,
@@ -79,12 +89,22 @@ static struct ipt_match string_match = {
 
 static int __init init(void)
 {
-	return ipt_register_match(&string_match);
+	int ret;
+
+	ret = xt_register_match(AF_INET, &string_match);
+	if (ret)
+		return ret;
+	ret = xt_register_match(AF_INET6, &string6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &string_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&string_match);
+	xt_unregister_match(AF_INET, &string_match);
+	xt_unregister_match(AF_INET6, &string6_match);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 4dc9b16ab4a..acf7f533e9f 100644
--- a/net/ipv4/netfilter/ipt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -1,6 +1,7 @@
 /* Kernel module to match TCP MSS values. */
 
 /* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ * Portions (C) 2005 by Harald Welte <laforge@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,19 +12,24 @@
 #include <linux/skbuff.h>
 #include <net/tcp.h>
 
-#include <linux/netfilter_ipv4/ipt_tcpmss.h>
+#include <linux/netfilter/xt_tcpmss.h>
+#include <linux/netfilter/x_tables.h>
+
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
 
 #define TH_SYN 0x02
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables TCP MSS match module");
+MODULE_ALIAS("ipt_tcpmss");
 
 /* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
 static inline int
 mssoption_match(u_int16_t min, u_int16_t max,
 		const struct sk_buff *skb,
+		unsigned int protoff,
 		int invert,
 		int *hotdrop)
 {
@@ -33,8 +39,7 @@ mssoption_match(u_int16_t min, u_int16_t max,
 	unsigned int i, optlen;
 
 	/* If we don't have the whole header, drop packet. */
-	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		goto dropit;
 
@@ -47,8 +52,7 @@ mssoption_match(u_int16_t min, u_int16_t max,
 		goto out;
 
 	/* Truncated options. */
-	op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th),
-				optlen, _opt);
+	op = skb_header_pointer(skb, protoff + sizeof(*th), optlen, _opt);
 	if (op == NULL)
 		goto dropit;
 
@@ -79,22 +83,24 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
-	const struct ipt_tcpmss_match_info *info = matchinfo;
+	const struct xt_tcpmss_match_info *info = matchinfo;
 
-	return mssoption_match(info->mss_min, info->mss_max, skb,
+	return mssoption_match(info->mss_min, info->mss_max, skb, protoff,
 			       info->invert, hotdrop);
 }
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ipinfo,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
 {
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)))
+	const struct ipt_ip *ip = ipinfo;
+	if (matchsize != XT_ALIGN(sizeof(struct xt_tcpmss_match_info)))
 		return 0;
 
 	/* Must specify -p tcp */
@@ -106,21 +112,60 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ipt_match tcpmss_match = {
+static int
+checkentry6(const char *tablename,
+	   const void *ipinfo,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	const struct ip6t_ip6 *ip = ipinfo;
+
+	if (matchsize != XT_ALIGN(sizeof(struct xt_tcpmss_match_info)))
+		return 0;
+
+	/* Must specify -p tcp */
+	if (ip->proto != IPPROTO_TCP || (ip->invflags & XT_INV_PROTO)) {
+		printk("tcpmss: Only works on TCP packets\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct xt_match tcpmss_match = {
 	.name		= "tcpmss",
 	.match		= &match,
 	.checkentry	= &checkentry,
 	.me		= THIS_MODULE,
 };
 
+static struct xt_match tcpmss6_match = {
+	.name		= "tcpmss",
+	.match		= &match,
+	.checkentry	= &checkentry6,
+	.me		= THIS_MODULE,
+};
+
+
 static int __init init(void)
 {
-	return ipt_register_match(&tcpmss_match);
+	int ret;
+	ret = xt_register_match(AF_INET, &tcpmss_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &tcpmss6_match);
+	if (ret)
+		xt_unregister_match(AF_INET, &tcpmss_match);
+
+	return ret;
 }
 
 static void __exit fini(void)
 {
-	ipt_unregister_match(&tcpmss_match);
+	xt_unregister_match(AF_INET6, &tcpmss6_match);
+	xt_unregister_match(AF_INET, &tcpmss_match);
 }
 
 module_init(init);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
new file mode 100644
index 00000000000..669c8113cc6
--- /dev/null
+++ b/net/netfilter/xt_tcpudp.c
@@ -0,0 +1,334 @@
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_DESCRIPTION("x_tables match for TCP and UDP, supports IPv4 and IPv6");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xt_tcp");
+MODULE_ALIAS("xt_udp");
+MODULE_ALIAS("ipt_udp");
+MODULE_ALIAS("ipt_tcp");
+MODULE_ALIAS("ip6t_udp");
+MODULE_ALIAS("ip6t_tcp");
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline int
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+{
+	int ret;
+
+	ret = (port >= min && port <= max) ^ invert;
+	return ret;
+}
+
+static int
+tcp_find_option(u_int8_t option,
+		const struct sk_buff *skb,
+		unsigned int protoff,
+		unsigned int optlen,
+		int invert,
+		int *hotdrop)
+{
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+	unsigned int i;
+
+	duprintf("tcp_match: finding option\n");
+
+	if (!optlen)
+		return invert;
+
+	/* If we don't have the whole header, drop packet. */
+	op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr),
+				optlen, _opt);
+	if (op == NULL) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == option) return !invert;
+		if (op[i] < 2) i++;
+		else i += op[i+1]?:1;
+	}
+
+	return invert;
+}
+
+static int
+tcp_match(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const void *matchinfo,
+	  int offset,
+	  unsigned int protoff,
+	  int *hotdrop)
+{
+	struct tcphdr _tcph, *th;
+	const struct xt_tcp *tcpinfo = matchinfo;
+
+	if (offset) {
+		/* To quote Alan:
+
+		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
+		   causes this. Its a cracker trying to break in by doing a
+		   flag overwrite to pass the direction checks.
+		*/
+		if (offset == 1) {
+			duprintf("Dropping evil TCP offset=1 frag.\n");
+			*hotdrop = 1;
+		}
+		/* Must not be a fragment. */
+		return 0;
+	}
+
+#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+			ntohs(th->source),
+			!!(tcpinfo->invflags & XT_TCP_INV_SRCPT)))
+		return 0;
+	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+			ntohs(th->dest),
+			!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
+		return 0;
+	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
+		      == tcpinfo->flg_cmp,
+		      XT_TCP_INV_FLAGS))
+		return 0;
+	if (tcpinfo->option) {
+		if (th->doff * 4 < sizeof(_tcph)) {
+			*hotdrop = 1;
+			return 0;
+		}
+		if (!tcp_find_option(tcpinfo->option, skb, protoff,
+				     th->doff*4 - sizeof(_tcph),
+				     tcpinfo->invflags & XT_TCP_INV_OPTION,
+				     hotdrop))
+			return 0;
+	}
+	return 1;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp_checkentry(const char *tablename,
+	       const void *info,
+	       void *matchinfo,
+	       unsigned int matchsize,
+	       unsigned int hook_mask)
+{
+	const struct ipt_ip *ip = info;
+	const struct xt_tcp *tcpinfo = matchinfo;
+
+	/* Must specify proto == TCP, and no unknown invflags */
+	return ip->proto == IPPROTO_TCP
+		&& !(ip->invflags & XT_INV_PROTO)
+		&& matchsize == XT_ALIGN(sizeof(struct xt_tcp))
+		&& !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp6_checkentry(const char *tablename,
+	       const void *entry,
+	       void *matchinfo,
+	       unsigned int matchsize,
+	       unsigned int hook_mask)
+{
+	const struct ip6t_ip6 *ipv6 = entry;
+	const struct xt_tcp *tcpinfo = matchinfo;
+
+	/* Must specify proto == TCP, and no unknown invflags */
+	return ipv6->proto == IPPROTO_TCP
+		&& !(ipv6->invflags & XT_INV_PROTO)
+		&& matchsize == XT_ALIGN(sizeof(struct xt_tcp))
+		&& !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
+}
+
+
+static int
+udp_match(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const void *matchinfo,
+	  int offset,
+	  unsigned int protoff,
+	  int *hotdrop)
+{
+	struct udphdr _udph, *uh;
+	const struct xt_udp *udpinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil UDP tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return port_match(udpinfo->spts[0], udpinfo->spts[1],
+			  ntohs(uh->source),
+			  !!(udpinfo->invflags & XT_UDP_INV_SRCPT))
+		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+			      ntohs(uh->dest),
+			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+udp_checkentry(const char *tablename,
+	       const void *info,
+	       void *matchinfo,
+	       unsigned int matchinfosize,
+	       unsigned int hook_mask)
+{
+	const struct ipt_ip *ip = info;
+	const struct xt_udp *udpinfo = matchinfo;
+
+	/* Must specify proto == UDP, and no unknown invflags */
+	if (ip->proto != IPPROTO_UDP || (ip->invflags & XT_INV_PROTO)) {
+		duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
+			 IPPROTO_UDP);
+		return 0;
+	}
+	if (matchinfosize != XT_ALIGN(sizeof(struct xt_udp))) {
+		duprintf("ipt_udp: matchsize %u != %u\n",
+			 matchinfosize, XT_ALIGN(sizeof(struct xt_udp)));
+		return 0;
+	}
+	if (udpinfo->invflags & ~XT_UDP_INV_MASK) {
+		duprintf("ipt_udp: unknown flags %X\n",
+			 udpinfo->invflags);
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+udp6_checkentry(const char *tablename,
+	       const void *entry,
+	       void *matchinfo,
+	       unsigned int matchinfosize,
+	       unsigned int hook_mask)
+{
+	const struct ip6t_ip6 *ipv6 = entry;
+	const struct xt_udp *udpinfo = matchinfo;
+
+	/* Must specify proto == UDP, and no unknown invflags */
+	if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & XT_INV_PROTO)) {
+		duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
+			 IPPROTO_UDP);
+		return 0;
+	}
+	if (matchinfosize != XT_ALIGN(sizeof(struct xt_udp))) {
+		duprintf("ip6t_udp: matchsize %u != %u\n",
+			 matchinfosize, XT_ALIGN(sizeof(struct xt_udp)));
+		return 0;
+	}
+	if (udpinfo->invflags & ~XT_UDP_INV_MASK) {
+		duprintf("ip6t_udp: unknown flags %X\n",
+			 udpinfo->invflags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct xt_match tcp_matchstruct = {
+	.name		= "tcp",
+	.match		= &tcp_match,
+	.checkentry	= &tcp_checkentry,
+	.me		= THIS_MODULE,
+};
+static struct xt_match tcp6_matchstruct = {
+	.name		= "tcp",
+	.match		= &tcp_match,
+	.checkentry	= &tcp6_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_match udp_matchstruct = {
+	.name		= "udp",
+	.match		= &udp_match,
+	.checkentry	= &udp_checkentry,
+	.me		= THIS_MODULE,
+};
+static struct xt_match udp6_matchstruct = {
+	.name		= "udp",
+	.match		= &udp_match,
+	.checkentry	= &udp6_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int ret;
+	ret = xt_register_match(AF_INET, &tcp_matchstruct);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(AF_INET6, &tcp6_matchstruct);
+	if (ret)
+		goto out_unreg_tcp;
+
+	ret = xt_register_match(AF_INET, &udp_matchstruct);
+	if (ret)
+		goto out_unreg_tcp6;
+	
+	ret = xt_register_match(AF_INET6, &udp6_matchstruct);
+	if (ret)
+		goto out_unreg_udp;
+
+	return ret;
+
+out_unreg_udp:
+	xt_unregister_match(AF_INET, &tcp_matchstruct);
+out_unreg_tcp6:
+	xt_unregister_match(AF_INET6, &tcp6_matchstruct);
+out_unreg_tcp:
+	xt_unregister_match(AF_INET, &tcp_matchstruct);
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	xt_unregister_match(AF_INET6, &udp6_matchstruct);
+	xt_unregister_match(AF_INET, &udp_matchstruct);
+	xt_unregister_match(AF_INET6, &tcp6_matchstruct);
+	xt_unregister_match(AF_INET, &tcp_matchstruct);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 96020d7087e..2101b45d2ec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -24,6 +24,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 
+#include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/signal.h>
@@ -293,7 +294,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
 	return 0;
 }
 
-static struct proto_ops netlink_ops;
+static const struct proto_ops netlink_ops;
 
 static int netlink_insert(struct sock *sk, u32 pid)
 {
@@ -402,7 +403,7 @@ static int netlink_create(struct socket *sock, int protocol)
 	groups = nl_table[protocol].groups;
 	netlink_unlock_table();
 
-	if ((err = __netlink_create(sock, protocol) < 0))
+	if ((err = __netlink_create(sock, protocol)) < 0)
 		goto out_module;
 
 	nlk = nlk_sk(sock->sk);
@@ -1422,7 +1423,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 	while (skb->len >= nlmsg_total_size(0)) {
 		nlh = (struct nlmsghdr *) skb->data;
 
-		if (skb->len < nlh->nlmsg_len)
+		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
 
 		total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
@@ -1656,7 +1657,7 @@ int netlink_unregister_notifier(struct notifier_block *nb)
 	return notifier_chain_unregister(&netlink_chain, nb);
 }
                 
-static struct proto_ops netlink_ops = {
+static const struct proto_ops netlink_ops = {
 	.family =	PF_NETLINK,
 	.owner =	THIS_MODULE,
 	.release =	netlink_release,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 287cfcc5695..4ae1538c54a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -222,11 +222,6 @@ int genl_register_family(struct genl_family *family)
 		goto errout_locked;
 	}
 
-	if (!try_module_get(family->owner)) {
-		err = -EBUSY;
-		goto errout_locked;
-	}
-
 	if (family->id == GENL_ID_GENERATE) {
 		u16 newid = genl_generate_id();
 
@@ -283,7 +278,6 @@ int genl_unregister_family(struct genl_family *family)
 		INIT_LIST_HEAD(&family->ops_list);
 		genl_unlock();
 
-		module_put(family->owner);
 		kfree(family->attrbuf);
 		genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
 		return 0;
@@ -441,7 +435,7 @@ errout:
 }
 
 static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
-				      int seq, int cmd)
+				      int seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int err;
@@ -535,7 +529,6 @@ static struct genl_family genl_ctrl = {
 	.name = "nlctrl",
 	.version = 0x1,
 	.maxattr = CTRL_ATTR_MAX,
-	.owner = THIS_MODULE,
 };
 
 static int __init genl_init(void)
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index e5d82d711ca..d44981f5a61 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -11,6 +11,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -63,7 +64,7 @@ static unsigned short circuit = 0x101;
 static HLIST_HEAD(nr_list);
 static DEFINE_SPINLOCK(nr_list_lock);
 
-static struct proto_ops nr_proto_ops;
+static const struct proto_ops nr_proto_ops;
 
 /*
  *	Socket removal during an interrupt is now safe.
@@ -1166,10 +1167,11 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	int ret;
 
-	lock_sock(sk);
 	switch (cmd) {
 	case TIOCOUTQ: {
 		long amount;
+
+		lock_sock(sk);
 		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
 		if (amount < 0)
 			amount = 0;
@@ -1180,6 +1182,8 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case TIOCINQ: {
 		struct sk_buff *skb;
 		long amount = 0L;
+
+		lock_sock(sk);
 		/* These two are safe on a single CPU system as only user tasks fiddle here */
 		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
 			amount = skb->len;
@@ -1188,6 +1192,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	}
 
 	case SIOCGSTAMP:
+		lock_sock(sk);
 		ret = sock_get_timestamp(sk, argp);
 		release_sock(sk);
 		return ret;
@@ -1202,21 +1207,17 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFNETMASK:
 	case SIOCGIFMETRIC:
 	case SIOCSIFMETRIC:
-		release_sock(sk);
 		return -EINVAL;
 
 	case SIOCADDRT:
 	case SIOCDELRT:
 	case SIOCNRDECOBS:
-		release_sock(sk);
 		if (!capable(CAP_NET_ADMIN)) return -EPERM;
 		return nr_rt_ioctl(cmd, argp);
 
 	default:
-		release_sock(sk);
-		return dev_ioctl(cmd, argp);
+		return -ENOIOCTLCMD;
 	}
-	release_sock(sk);
 
 	return 0;
 }
@@ -1337,7 +1338,7 @@ static struct net_proto_family nr_family_ops = {
 	.owner		=	THIS_MODULE,
 };
 
-static struct proto_ops nr_proto_ops = {
+static const struct proto_ops nr_proto_ops = {
 	.family		=	PF_NETROM,
 	.owner		=	THIS_MODULE,
 	.release	=	nr_release,
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 004e8599b8f..a7d88b5ad75 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -99,7 +99,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
 		break;
 
 	case NR_RESET:
-		if (sysctl_netrom_reset_circuit);
+		if (sysctl_netrom_reset_circuit)
 			nr_disconnect(sk, ECONNRESET);
 		break;
 
@@ -130,7 +130,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
 		break;
 
 	case NR_RESET:
-		if (sysctl_netrom_reset_circuit);
+		if (sysctl_netrom_reset_circuit)
 			nr_disconnect(sk, ECONNRESET);
 		break;
 
@@ -265,7 +265,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
 		break;
 
 	case NR_RESET:
-		if (sysctl_netrom_reset_circuit);
+		if (sysctl_netrom_reset_circuit)
 			nr_disconnect(sk, ECONNRESET);
 		break;
 
diff --git a/net/nonet.c b/net/nonet.c
index e5241dceaa5..1230f0ae832 100644
--- a/net/nonet.c
+++ b/net/nonet.c
@@ -14,11 +14,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 
-void __init sock_init(void)
-{
-	printk(KERN_INFO "Linux NoNET1.0 for Linux 2.6\n");
-}
-
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 {
 	return -ENXIO;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 499ae3df4a4..ee93abc71cb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -53,6 +53,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/capability.h>
 #include <linux/fcntl.h>
 #include <linux/socket.h>
 #include <linux/in.h>
@@ -251,10 +252,10 @@ static void packet_sock_destruct(struct sock *sk)
 }
 
 
-static struct proto_ops packet_ops;
+static const struct proto_ops packet_ops;
 
 #ifdef CONFIG_SOCK_PACKET
-static struct proto_ops packet_ops_spkt;
+static const struct proto_ops packet_ops_spkt;
 
 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
 {
@@ -1237,7 +1238,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 		goto done;
 
 	err = -ENOBUFS;
-	i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
+	i = kmalloc(sizeof(*i), GFP_KERNEL);
 	if (i == NULL)
 		goto done;
 
@@ -1521,7 +1522,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 #endif
 
 		default:
-			return dev_ioctl(cmd, (void __user *)arg);
+			return -ENOIOCTLCMD;
 	}
 	return 0;
 }
@@ -1587,23 +1588,47 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
 	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
 }
 
-static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
+static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
 {
 	int i;
 
-	for (i=0; i<len; i++) {
-		if (pg_vec[i]) {
-			struct page *page, *pend;
-
-			pend = pg_vec_endpage(pg_vec[i], order);
-			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
-				ClearPageReserved(page);
-			free_pages((unsigned long)pg_vec[i], order);
-		}
+	for (i = 0; i < len; i++) {
+		if (likely(pg_vec[i]))
+			free_pages((unsigned long) pg_vec[i], order);
 	}
 	kfree(pg_vec);
 }
 
+static inline char *alloc_one_pg_vec_page(unsigned long order)
+{
+	return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
+					 order);
+}
+
+static char **alloc_pg_vec(struct tpacket_req *req, int order)
+{
+	unsigned int block_nr = req->tp_block_nr;
+	char **pg_vec;
+	int i;
+
+	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+	if (unlikely(!pg_vec))
+		goto out;
+
+	for (i = 0; i < block_nr; i++) {
+		pg_vec[i] = alloc_one_pg_vec_page(order);
+		if (unlikely(!pg_vec[i]))
+			goto out_free_pgvec;
+	}
+
+out:
+	return pg_vec;
+
+out_free_pgvec:
+	free_pg_vec(pg_vec, order, block_nr);
+	pg_vec = NULL;
+	goto out;
+}
 
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
 {
@@ -1617,64 +1642,46 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 
 		/* Sanity tests and some calculations */
 
-		if (po->pg_vec)
+		if (unlikely(po->pg_vec))
 			return -EBUSY;
 
-		if ((int)req->tp_block_size <= 0)
+		if (unlikely((int)req->tp_block_size <= 0))
 			return -EINVAL;
-		if (req->tp_block_size&(PAGE_SIZE-1))
+		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			return -EINVAL;
-		if (req->tp_frame_size < TPACKET_HDRLEN)
+		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
 			return -EINVAL;
-		if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
+		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			return -EINVAL;
 
 		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
-		if (po->frames_per_block <= 0)
+		if (unlikely(po->frames_per_block <= 0))
 			return -EINVAL;
-		if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
+		if (unlikely((po->frames_per_block * req->tp_block_nr) !=
+			     req->tp_frame_nr))
 			return -EINVAL;
-		/* OK! */
-
-		/* Allocate page vector */
-		while ((PAGE_SIZE<<order) < req->tp_block_size)
-			order++;
 
 		err = -ENOMEM;
-
-		pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
-		if (pg_vec == NULL)
+		order = get_order(req->tp_block_size);
+		pg_vec = alloc_pg_vec(req, order);
+		if (unlikely(!pg_vec))
 			goto out;
-		memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
-
-		for (i=0; i<req->tp_block_nr; i++) {
-			struct page *page, *pend;
-			pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
-			if (!pg_vec[i])
-				goto out_free_pgvec;
-
-			pend = pg_vec_endpage(pg_vec[i], order);
-			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
-				SetPageReserved(page);
-		}
-		/* Page vector is allocated */
 
 		l = 0;
-		for (i=0; i<req->tp_block_nr; i++) {
+		for (i = 0; i < req->tp_block_nr; i++) {
 			char *ptr = pg_vec[i];
 			struct tpacket_hdr *header;
 			int k;
 
-			for (k=0; k<po->frames_per_block; k++) {
-				
-				header = (struct tpacket_hdr*)ptr;
+			for (k = 0; k < po->frames_per_block; k++) {
+				header = (struct tpacket_hdr *) ptr;
 				header->tp_status = TP_STATUS_KERNEL;
 				ptr += req->tp_frame_size;
 			}
 		}
 		/* Done */
 	} else {
-		if (req->tp_frame_nr)
+		if (unlikely(req->tp_frame_nr))
 			return -EINVAL;
 	}
 
@@ -1701,7 +1708,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		pg_vec = XC(po->pg_vec, pg_vec);
-		po->frame_max = req->tp_frame_nr-1;
+		po->frame_max = (req->tp_frame_nr - 1);
 		po->head = 0;
 		po->frame_size = req->tp_frame_size;
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1728,7 +1735,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 
 	release_sock(sk);
 
-out_free_pgvec:
 	if (pg_vec)
 		free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
@@ -1755,17 +1761,19 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
 		goto out;
 
-	atomic_inc(&po->mapped);
 	start = vma->vm_start;
-	err = -EAGAIN;
-	for (i=0; i<po->pg_vec_len; i++) {
-		if (remap_pfn_range(vma, start,
-				     __pa(po->pg_vec[i]) >> PAGE_SHIFT,
-				     po->pg_vec_pages*PAGE_SIZE,
-				     vma->vm_page_prot))
-			goto out;
-		start += po->pg_vec_pages*PAGE_SIZE;
+	for (i = 0; i < po->pg_vec_len; i++) {
+		struct page *page = virt_to_page(po->pg_vec[i]);
+		int pg_num;
+
+		for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
+			err = vm_insert_page(vma, start, page);
+			if (unlikely(err))
+				goto out;
+			start += PAGE_SIZE;
+		}
 	}
+	atomic_inc(&po->mapped);
 	vma->vm_ops = &packet_mmap_ops;
 	err = 0;
 
@@ -1777,7 +1785,7 @@ out:
 
 
 #ifdef CONFIG_SOCK_PACKET
-static struct proto_ops packet_ops_spkt = {
+static const struct proto_ops packet_ops_spkt = {
 	.family =	PF_PACKET,
 	.owner =	THIS_MODULE,
 	.release =	packet_release,
@@ -1799,7 +1807,7 @@ static struct proto_ops packet_ops_spkt = {
 };
 #endif
 
-static struct proto_ops packet_ops = {
+static const struct proto_ops packet_ops = {
 	.family =	PF_PACKET,
 	.owner =	THIS_MODULE,
 	.release =	packet_release,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 829fdbc4400..ea65396d161 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -9,7 +9,9 @@
  * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
  * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
  */
+
 #include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/init.h>
@@ -1320,7 +1322,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return 0;
 
 	default:
-		return dev_ioctl(cmd, argp);
+		return -ENOIOCTLCMD;
 	}
 
 	return 0;
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index 2ba14a75dbb..0e0a4553499 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -220,6 +220,7 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
 {
 	struct rxrpc_connection *conn, *candidate = NULL;
 	struct list_head *_p;
+	struct sk_buff *pkt = msg->pkt;
 	int ret, fresh = 0;
 	__be32 x_epoch, x_connid;
 	__be16 x_port, x_servid;
@@ -229,10 +230,10 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
 	_enter("%p{{%hu}},%u,%hu",
 	       peer,
 	       peer->trans->port,
-	       ntohs(msg->pkt->h.uh->source),
+	       ntohs(pkt->h.uh->source),
 	       ntohs(msg->hdr.serviceId));
 
-	x_port		= msg->pkt->h.uh->source;
+	x_port		= pkt->h.uh->source;
 	x_epoch		= msg->hdr.epoch;
 	x_clflag	= msg->hdr.flags & RXRPC_CLIENT_INITIATED;
 	x_connid	= htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
@@ -267,7 +268,7 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
 		/* fill in the specifics */
 		candidate->addr.sin_family	= AF_INET;
 		candidate->addr.sin_port	= x_port;
-		candidate->addr.sin_addr.s_addr = msg->pkt->nh.iph->saddr;
+		candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
 		candidate->in_epoch		= x_epoch;
 		candidate->out_epoch		= x_epoch;
 		candidate->in_clientflag	= RXRPC_CLIENT_INITIATED;
@@ -675,6 +676,7 @@ int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
 				   struct rxrpc_message *msg)
 {
 	struct rxrpc_message *pmsg;
+	struct dst_entry *dst;
 	struct list_head *_p;
 	unsigned cix, seq;
 	int ret = 0;
@@ -710,10 +712,10 @@ int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
 
 	call->pkt_rcv_count++;
 
-	if (msg->pkt->dst && msg->pkt->dst->dev)
+	dst = msg->pkt->dst;
+	if (dst && dst->dev)
 		conn->peer->if_mtu =
-			msg->pkt->dst->dev->mtu -
-			msg->pkt->dst->dev->hard_header_len;
+			dst->dev->mtu - dst->dev->hard_header_len;
 
 	/* queue on the call in seq order */
 	rxrpc_get_message(msg);
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
index 3ac81cdd121..3e7466900bd 100644
--- a/net/rxrpc/krxtimod.c
+++ b/net/rxrpc/krxtimod.c
@@ -81,7 +81,7 @@ static int krxtimod(void *arg)
 
 	for (;;) {
 		unsigned long jif;
-		signed long timeout;
+		long timeout;
 
 		/* deal with the server being asked to die */
 		if (krxtimod_die) {
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 3b5ecd8e240..29975d99d86 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -361,7 +361,7 @@ static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
 static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
 {
 	struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
-	signed long timeout;
+	long timeout;
 
 	/* display header on line 1 */
 	if (v == SEQ_START_TOKEN) {
@@ -373,8 +373,8 @@ static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
 	/* display one peer per line on subsequent lines */
 	timeout = 0;
 	if (!list_empty(&peer->timeout.link))
-		timeout = (signed long) peer->timeout.timo_jif -
-			(signed long) jiffies;
+		timeout = (long) peer->timeout.timo_jif -
+			(long) jiffies;
 
 	seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
 		   peer->trans->port,
@@ -468,7 +468,7 @@ static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
 static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
 {
 	struct rxrpc_connection *conn;
-	signed long timeout;
+	long timeout;
 
 	conn = list_entry(v, struct rxrpc_connection, proc_link);
 
@@ -484,8 +484,8 @@ static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
 	/* display one conn per line on subsequent lines */
 	timeout = 0;
 	if (!list_empty(&conn->timeout.link))
-		timeout = (signed long) conn->timeout.timo_jif -
-			(signed long) jiffies;
+		timeout = (long) conn->timeout.timo_jif -
+			(long) jiffies;
 
 	seq_printf(m,
 		   "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 55cd5327fbd..778b1e5a4b5 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -44,7 +44,7 @@ if NET_SCHED
 
 choice
 	prompt "Packet scheduler clock source"
-	default NET_SCH_CLK_JIFFIES
+	default NET_SCH_CLK_GETTIMEOFDAY
 	---help---
 	  Packet schedulers need a monotonic clock that increments at a static
 	  rate. The kernel provides several suitable interfaces, each with
@@ -411,7 +411,7 @@ config NET_EMATCH_META
 	tristate "Metadata"
 	depends on NET_EMATCH
 	---help---
-	  Say Y here if you want to be ablt to classify packets based on
+	  Say Y here if you want to be able to classify packets based on
 	  metadata such as load average, netfilter attributes, socket
 	  attributes and routing decisions.
 
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e48d0d456b3..0f06aec6609 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -7,13 +7,13 @@ obj-y	:= sch_generic.o
 obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_fifo.o sch_blackhole.o
 obj-$(CONFIG_NET_CLS)		+= cls_api.o
 obj-$(CONFIG_NET_CLS_ACT)	+= act_api.o
-obj-$(CONFIG_NET_ACT_POLICE)	+= police.o
-obj-$(CONFIG_NET_CLS_POLICE)	+= police.o
-obj-$(CONFIG_NET_ACT_GACT)	+= gact.o
-obj-$(CONFIG_NET_ACT_MIRRED)	+= mirred.o
-obj-$(CONFIG_NET_ACT_IPT)	+= ipt.o
-obj-$(CONFIG_NET_ACT_PEDIT)	+= pedit.o
-obj-$(CONFIG_NET_ACT_SIMP)	+= simple.o
+obj-$(CONFIG_NET_ACT_POLICE)	+= act_police.o
+obj-$(CONFIG_NET_CLS_POLICE)	+= act_police.o
+obj-$(CONFIG_NET_ACT_GACT)	+= act_gact.o
+obj-$(CONFIG_NET_ACT_MIRRED)	+= act_mirred.o
+obj-$(CONFIG_NET_ACT_IPT)	+= act_ipt.o
+obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
+obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
 obj-$(CONFIG_NET_SCH_HPFQ)	+= sch_hpfq.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8aebe8f6d27..792ce59940e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -34,7 +34,7 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
-#if 1 /* control */
+#if 0 /* control */
 #define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
 #else
 #define DPRINTK(format, args...)
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
 	while ((a = act) != NULL) {
 repeat:
 		if (a->ops && a->ops->act) {
-			ret = a->ops->act(&skb, a, res);
+			ret = a->ops->act(skb, a, res);
 			if (TC_MUNGED & skb->tc_verd) {
 				/* copied already, allow trampling */
 				skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -290,7 +290,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
 	if (a_o == NULL) {
 #ifdef CONFIG_KMOD
 		rtnl_unlock();
-		request_module(act_name);
+		request_module("act_%s", act_name);
 		rtnl_lock();
 
 		a_o = tc_lookup_action_n(act_name);
diff --git a/net/sched/gact.c b/net/sched/act_gact.c
index d1c6d542912..a1e68f78dcc 100644
--- a/net/sched/gact.c
+++ b/net/sched/act_gact.c
@@ -135,10 +135,9 @@ tcf_gact_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
+tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
 	struct tcf_gact *p = PRIV(a, gact);
-	struct sk_buff *skb = *pskb;
 	int action = TC_ACT_SHOT;
 
 	spin_lock(&p->lock);
diff --git a/net/sched/ipt.c b/net/sched/act_ipt.c
index f50136eed21..39a22a3ffe7 100644
--- a/net/sched/ipt.c
+++ b/net/sched/act_ipt.c
@@ -62,7 +62,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 	struct ipt_target *target;
 	int ret = 0;
 
-	target = ipt_find_target(t->u.user.name, t->u.user.revision);
+	target = xt_find_target(AF_INET, t->u.user.name, t->u.user.revision);
 	if (!target)
 		return -ENOENT;
 
@@ -201,11 +201,10 @@ tcf_ipt_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
+tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *p = PRIV(a, ipt);
-	struct sk_buff *skb = *pskb;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -222,6 +221,9 @@ tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 	 worry later - danger - this API seems to have changed
 	 from earlier kernels */
 
+	/* iptables targets take a double skb pointer in case the skb
+	 * needs to be replaced. We don't own the skb, so this must not
+	 * happen. The pskb_expand_head above should make sure of this */
 	ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL,
 					    p->hook, p->t->data, NULL);
 	switch (ret) {
diff --git a/net/sched/mirred.c b/net/sched/act_mirred.c
index 20d06916dc0..4fcccbd5088 100644
--- a/net/sched/mirred.c
+++ b/net/sched/act_mirred.c
@@ -158,12 +158,11 @@ tcf_mirred_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
+tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
 	struct tcf_mirred *p = PRIV(a, mirred);
 	struct net_device *dev;
 	struct sk_buff *skb2 = NULL;
-	struct sk_buff *skb = *pskb;
 	u32 at = G_TC_AT(skb->tc_verd);
 
 	spin_lock(&p->lock);
diff --git a/net/sched/pedit.c b/net/sched/act_pedit.c
index 767d24f4610..1742a68e012 100644
--- a/net/sched/pedit.c
+++ b/net/sched/act_pedit.c
@@ -130,10 +130,9 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
+tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
 	struct tcf_pedit *p = PRIV(a, pedit);
-	struct sk_buff *skb = *pskb;
 	int i, munged = 0;
 	u8 *pptr;
 
@@ -246,10 +245,12 @@ tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
 	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
 	t.expires = jiffies_to_clock_t(p->tm.expires);
 	RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
+	kfree(opt);
 	return skb->len;
 
 rtattr_failure:
 	skb_trim(skb, b - skb->data);
+	kfree(opt);
 	return -1;
 }
 
diff --git a/net/sched/police.c b/net/sched/act_police.c
index eb39fb2f39b..fa877f8f652 100644
--- a/net/sched/police.c
+++ b/net/sched/act_police.c
@@ -284,11 +284,10 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a,
+static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
                           struct tcf_result *res)
 {
 	psched_time_t now;
-	struct sk_buff *skb = *pskb;
 	struct tcf_police *p = PRIV(a);
 	long toks;
 	long ptoks = 0;
@@ -408,7 +407,7 @@ police_cleanup_module(void)
 module_init(police_init_module);
 module_exit(police_cleanup_module);
 
-#endif
+#else /* CONFIG_NET_CLS_ACT */
 
 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 {
@@ -545,6 +544,7 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p)
 	spin_unlock(&p->lock);
 	return p->action;
 }
+EXPORT_SYMBOL(tcf_police);
 
 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
 {
@@ -601,13 +601,4 @@ errout:
 	return -1;
 }
 
-
-EXPORT_SYMBOL(tcf_police);
-EXPORT_SYMBOL(tcf_police_destroy);
-EXPORT_SYMBOL(tcf_police_dump);
-EXPORT_SYMBOL(tcf_police_dump_stats);
-EXPORT_SYMBOL(tcf_police_hash);
-EXPORT_SYMBOL(tcf_police_ht);
-EXPORT_SYMBOL(tcf_police_locate);
-EXPORT_SYMBOL(tcf_police_lookup);
-EXPORT_SYMBOL(tcf_police_new_index);
+#endif /* CONFIG_NET_CLS_ACT */
diff --git a/net/sched/simple.c b/net/sched/act_simple.c
index 8a6ae4f491e..e5f2e1f431e 100644
--- a/net/sched/simple.c
+++ b/net/sched/act_simple.c
@@ -44,9 +44,8 @@ static DEFINE_RWLOCK(simp_lock);
 #include <net/pkt_act.h>
 #include <net/act_generic.h>
 
-static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
+static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
-	struct sk_buff *skb = *pskb;
 	struct tcf_defact *p = PRIV(a, defact);
 
 	spin_lock(&p->lock);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 64b047c6556..5cb956b721e 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -92,7 +92,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <net/pkt_cls.h>
-#include <config/net/ematch/stack.h>
 
 static LIST_HEAD(ematch_ops);
 static DEFINE_RWLOCK(ematch_mod_lock);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 09453f997d8..6cd81708bf7 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -257,7 +257,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	    (cl = cbq_class_lookup(q, prio)) != NULL)
 		return cl;
 
-	*qerr = NET_XMIT_DROP;
+	*qerr = NET_XMIT_BYPASS;
 	for (;;) {
 		int result = 0;
 		defmap = head->defaults;
@@ -413,7 +413,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	q->rx_class = cl;
 #endif
 	if (cl == NULL) {
-		if (ret == NET_XMIT_DROP)
+		if (ret == NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c26764bc410..91132f6871d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -208,7 +208,7 @@ struct hfsc_sched
 do {									\
 	struct timeval tv;						\
 	do_gettimeofday(&tv);						\
-	(stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec;			\
+	(stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec;		\
 } while (0)
 #endif
 
@@ -502,8 +502,8 @@ d2dx(u32 d)
 	u64 dx;
 
 	dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
-	dx += 1000000 - 1;
-	do_div(dx, 1000000);
+	dx += USEC_PER_SEC - 1;
+	do_div(dx, USEC_PER_SEC);
 	return dx;
 }
 
@@ -523,7 +523,7 @@ dx2d(u64 dx)
 {
 	u64 d;
 
-	d = dx * 1000000;
+	d = dx * USEC_PER_SEC;
 	do_div(d, PSCHED_JIFFIE2US(HZ));
 	return (u32)d;
 }
@@ -1227,7 +1227,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if (cl->level == 0)
 			return cl;
 
-	*qerr = NET_XMIT_DROP;
+	*qerr = NET_XMIT_BYPASS;
 	tcf = q->root.filter_list;
 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1643,7 +1643,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	cl = hfsc_classify(skb, sch, &err);
 	if (cl == NULL) {
-		if (err == NET_XMIT_DROP)
+		if (err == NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 558cc087e60..3ec95df4a85 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -321,7 +321,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
 	if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
 		return cl;
 
-	*qerr = NET_XMIT_DROP;
+	*qerr = NET_XMIT_BYPASS;
 	tcf = q->filter_list;
 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -724,7 +724,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 #ifdef CONFIG_NET_CLS_ACT
     } else if (!cl) {
-	if (ret == NET_XMIT_DROP)
+	if (ret == NET_XMIT_BYPASS)
 		sch->qstats.drops++;
 	kfree_skb (skb);
 	return ret;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 82fb07aa06a..ba528320483 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -25,7 +25,7 @@
 
 #include <net/pkt_sched.h>
 
-#define VERSION "1.1"
+#define VERSION "1.2"
 
 /*	Network Emulation Queuing algorithm.
 	====================================
@@ -65,11 +65,12 @@ struct netem_sched_data {
 	u32 jitter;
 	u32 duplicate;
 	u32 reorder;
+	u32 corrupt;
 
 	struct crndstate {
 		unsigned long last;
 		unsigned long rho;
-	} delay_cor, loss_cor, dup_cor, reorder_cor;
+	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
 	struct disttable {
 		u32  size;
@@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->duplicate = dupsave;
 	}
 
+	/*
+	 * Randomized packet corruption.
+	 * Make copy if needed since we are modifying
+	 * If packet is going to be hardware checksummed, then
+	 * do it now in software before we mangle it.
+	 */
+	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
+		    || (skb->ip_summed == CHECKSUM_HW
+			&& skb_checksum_help(skb, 0))) {
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
+
+		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
+	}
+
 	if (q->gap == 0 		/* not doing reordering */
 	    || q->counter < q->gap 	/* inside last reordering gap */
 	    || q->reorder < get_crandom(&q->reorder_cor)) {
@@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
 	return 0;
 }
 
+static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corrupt *r = RTA_DATA(attr);
+
+	if (RTA_PAYLOAD(attr) != sizeof(*r))
+		return -EINVAL;
+
+	q->corrupt = r->probability;
+	init_crandom(&q->corrupt_cor, r->correlation);
+	return 0;
+}
+
+/* Parse netlink message to set options */
 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 			if (ret)
 				return ret;
 		}
+
 		if (tb[TCA_NETEM_REORDER-1]) {
 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
 			if (ret)
 				return ret;
 		}
-	}
 
+		if (tb[TCA_NETEM_CORRUPT-1]) {
+			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
+			if (ret)
+				return ret;
+		}
+	}
 
 	return 0;
 }
@@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
+	struct tc_netem_corrupt corrupt;
 
 	qopt.latency = q->latency;
 	qopt.jitter = q->jitter;
@@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	reorder.correlation = q->reorder_cor.rho;
 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
 
+	corrupt.probability = q->corrupt;
+	corrupt.correlation = q->corrupt_cor.rho;
+	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+
 	rta->rta_len = skb->tail - b;
 
 	return skb->len;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3ac0f495bad..1641db33a99 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -54,7 +54,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	u32 band = skb->priority;
 	struct tcf_result res;
 
-	*qerr = NET_XMIT_DROP;
+	*qerr = NET_XMIT_BYPASS;
 	if (TC_H_MAJ(skb->priority) != sch->handle) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (tc_classify(skb, q->filter_list, &res)) {
@@ -91,7 +91,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	qdisc = prio_classify(skb, sch, &ret);
 #ifdef CONFIG_NET_CLS_ACT
 	if (qdisc == NULL) {
-		if (ret == NET_XMIT_DROP)
+
+		if (ret == NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
@@ -118,7 +119,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	qdisc = prio_classify(skb, sch, &ret);
 #ifdef CONFIG_NET_CLS_ACT
 	if (qdisc == NULL) {
-		if (ret == NET_XMIT_DROP)
+		if (ret == NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
@@ -227,14 +228,13 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
 	}
 	sch_tree_unlock(sch);
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
-		int band = q->prio2band[i];
-		if (q->queues[band] == &noop_qdisc) {
+	for (i=0; i<q->bands; i++) {
+		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child;
 			child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
 			if (child) {
 				sch_tree_lock(sch);
-				child = xchg(&q->queues[band], child);
+				child = xchg(&q->queues[i], child);
 
 				if (child != &noop_qdisc)
 					qdisc_destroy(child);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8734bb7280e..86d8da0cbd0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -144,6 +144,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
 		    (iph->protocol == IPPROTO_TCP ||
 		     iph->protocol == IPPROTO_UDP ||
+		     iph->protocol == IPPROTO_SCTP ||
+		     iph->protocol == IPPROTO_DCCP ||
 		     iph->protocol == IPPROTO_ESP))
 			h2 ^= *(((u32*)iph) + iph->ihl);
 		break;
@@ -155,6 +157,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
 		if (iph->nexthdr == IPPROTO_TCP ||
 		    iph->nexthdr == IPPROTO_UDP ||
+		    iph->nexthdr == IPPROTO_SCTP ||
+		    iph->nexthdr == IPPROTO_DCCP ||
 		    iph->nexthdr == IPPROTO_ESP)
 			h2 ^= *(u32*)&iph[1];
 		break;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6cf0342706b..79b8ef34c6e 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -22,6 +22,7 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -273,7 +274,7 @@ teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *de
 
 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct teql_master *master = (void*)dev->priv;
+	struct teql_master *master = netdev_priv(dev);
 	struct Qdisc *start, *q;
 	int busy;
 	int nores;
@@ -349,7 +350,7 @@ drop:
 static int teql_master_open(struct net_device *dev)
 {
 	struct Qdisc * q;
-	struct teql_master *m = (void*)dev->priv;
+	struct teql_master *m = netdev_priv(dev);
 	int mtu = 0xFFFE;
 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
 
@@ -396,13 +397,13 @@ static int teql_master_close(struct net_device *dev)
 
 static struct net_device_stats *teql_master_stats(struct net_device *dev)
 {
-	struct teql_master *m = (void*)dev->priv;
+	struct teql_master *m = netdev_priv(dev);
 	return &m->stats;
 }
 
 static int teql_master_mtu(struct net_device *dev, int new_mtu)
 {
-	struct teql_master *m = (void*)dev->priv;
+	struct teql_master *m = netdev_priv(dev);
 	struct Qdisc *q;
 
 	if (new_mtu < 68)
@@ -422,7 +423,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
 
 static __init void teql_master_setup(struct net_device *dev)
 {
-	struct teql_master *master = dev->priv;
+	struct teql_master *master = netdev_priv(dev);
 	struct Qdisc_ops *ops = &master->qops;
 
 	master->dev	= dev;
@@ -475,7 +476,7 @@ static int __init teql_init(void)
 			break;
 		}
 
-		master = dev->priv;
+		master = netdev_priv(dev);
 
 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 		err = register_qdisc(&master->qops);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dec68a60477..9d05e13e92f 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -110,7 +110,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000;
 	asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
 					* 1000;
-	asoc->pmtu = 0;
 	asoc->frag_point = 0;
 
 	/* Set the association max_retrans and RTO values from the
@@ -123,6 +122,25 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	asoc->overall_error_count = 0;
 
+	/* Initialize the association's heartbeat interval based on the
+	 * sock configured value.
+	 */
+	asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
+
+	/* Initialize path max retrans value. */
+	asoc->pathmaxrxt = sp->pathmaxrxt;
+
+	/* Initialize default path MTU. */
+	asoc->pathmtu = sp->pathmtu;
+
+	/* Set association default SACK delay */
+	asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
+
+	/* Set the association default flags controlling
+	 * Heartbeat, SACK delay, and Path MTU Discovery.
+	 */
+	asoc->param_flags = sp->param_flags;
+
 	/* Initialize the maximum mumber of new data packets that can be sent
 	 * in a burst.
 	 */
@@ -144,8 +162,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 		= 5 * asoc->rto_max;
 
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
-	asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
-		SCTP_DEFAULT_TIMEOUT_SACK;
+	asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
 		sp->autoclose * HZ;
 	
@@ -540,23 +557,46 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 
 	sctp_transport_set_owner(peer, asoc);
 
+	/* Initialize the peer's heartbeat interval based on the
+	 * association configured value.
+	 */
+	peer->hbinterval = asoc->hbinterval;
+
+	/* Set the path max_retrans.  */
+	peer->pathmaxrxt = asoc->pathmaxrxt;
+
+	/* Initialize the peer's SACK delay timeout based on the
+	 * association configured value.
+	 */
+	peer->sackdelay = asoc->sackdelay;
+
+	/* Enable/disable heartbeat, SACK delay, and path MTU discovery
+	 * based on association setting.
+	 */
+	peer->param_flags = asoc->param_flags;
+
 	/* Initialize the pmtu of the transport. */
-	sctp_transport_pmtu(peer);
+	if (peer->param_flags & SPP_PMTUD_ENABLE)
+		sctp_transport_pmtu(peer);
+	else if (asoc->pathmtu)
+		peer->pathmtu = asoc->pathmtu;
+	else
+		peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 
 	/* If this is the first transport addr on this association,
 	 * initialize the association PMTU to the peer's PMTU.
 	 * If not and the current association PMTU is higher than the new
 	 * peer's PMTU, reset the association PMTU to the new peer's PMTU.
 	 */
-	if (asoc->pmtu)
-		asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu);
+	if (asoc->pathmtu)
+		asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu);
 	else
-		asoc->pmtu = peer->pmtu;
+		asoc->pathmtu = peer->pathmtu;
 
 	SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to "
-			  "%d\n", asoc, asoc->pmtu);
+			  "%d\n", asoc, asoc->pathmtu);
 
-	asoc->frag_point = sctp_frag_point(sp, asoc->pmtu);
+	asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
 
 	/* The asoc->peer.port might not be meaningful yet, but
 	 * initialize the packet structure anyway.
@@ -574,7 +614,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	 *   (for example, implementations MAY use the size of the
 	 *   receiver advertised window).
 	 */
-	peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380));
+	peer->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
 
 	/* At this point, we may not have the receiver's advertised window,
 	 * so initialize ssthresh to the default value and it will be set
@@ -585,17 +625,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	peer->partial_bytes_acked = 0;
 	peer->flight_size = 0;
 
-	/* By default, enable heartbeat for peer address. */
-	peer->hb_allowed = 1;
-
-	/* Initialize the peer's heartbeat interval based on the
-	 * sock configured value.
-	 */
-	peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval);
-
-	/* Set the path max_retrans.  */
-	peer->max_retrans = sp->paddrparam.spp_pathmaxrxt;
-
 	/* Set the transport's RTO.initial value */
 	peer->rto = asoc->rto_initial;
 
@@ -1155,18 +1184,18 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 	/* Get the lowest pmtu of all the transports. */
 	list_for_each(pos, &asoc->peer.transport_addr_list) {
 		t = list_entry(pos, struct sctp_transport, transports);
-		if (!pmtu || (t->pmtu < pmtu))
-			pmtu = t->pmtu;
+		if (!pmtu || (t->pathmtu < pmtu))
+			pmtu = t->pathmtu;
 	}
 
 	if (pmtu) {
 		struct sctp_sock *sp = sctp_sk(asoc->base.sk);
-		asoc->pmtu = pmtu;
+		asoc->pathmtu = pmtu;
 		asoc->frag_point = sctp_frag_point(sp, pmtu);
 	}
 
 	SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n",
-			  __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point);
+			  __FUNCTION__, asoc, asoc->pathmtu, asoc->frag_point);
 }
 
 /* Should we send a SACK to update our peer? */
@@ -1179,7 +1208,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc)
 	case SCTP_STATE_SHUTDOWN_SENT:
 		if ((asoc->rwnd > asoc->a_rwnd) &&
 		    ((asoc->rwnd - asoc->a_rwnd) >=
-		     min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu)))
+		     min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu)))
 			return 1;
 		break;
 	default:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b24ff2c1aef..cb78b50868e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb)
 
 	if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
 		goto discard_release;
+	nf_reset(skb);
 
 	ret = sk_filter(sk, skb, 1);
 	if (ret)
@@ -256,20 +257,26 @@ int sctp_rcv(struct sk_buff *skb)
 	 */
 	sctp_bh_lock_sock(sk);
 
+	/* It is possible that the association could have moved to a different
+	 * socket if it is peeled off. If so, update the sk.
+	 */ 
+	if (sk != rcvr->sk) {
+		sctp_bh_lock_sock(rcvr->sk);
+		sctp_bh_unlock_sock(sk);
+		sk = rcvr->sk;
+	}
+
 	if (sock_owned_by_user(sk))
 		sk_add_backlog(sk, skb);
 	else
 		sctp_backlog_rcv(sk, skb);
 
-	/* Release the sock and any reference counts we took in the
-	 * lookup calls.
+	/* Release the sock and the sock ref we took in the lookup calls.
+	 * The asoc/ep ref will be released in sctp_backlog_rcv.
 	 */
 	sctp_bh_unlock_sock(sk);
-	if (asoc)
-		sctp_association_put(asoc);
-	else
-		sctp_endpoint_put(ep);
 	sock_put(sk);
+
 	return ret;
 
 discard_it:
@@ -295,28 +302,84 @@ discard_release:
 int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
-	struct sctp_inq *inqueue = &chunk->rcvr->inqueue;
-
-	sctp_inq_push(inqueue, chunk);
+ 	struct sctp_inq *inqueue = NULL;
+ 	struct sctp_ep_common *rcvr = NULL;
+
+ 	rcvr = chunk->rcvr;
+
+	BUG_TRAP(rcvr->sk == sk);
+
+ 	if (rcvr->dead) {
+ 		sctp_chunk_free(chunk);
+ 	} else {
+ 		inqueue = &chunk->rcvr->inqueue;
+ 		sctp_inq_push(inqueue, chunk);
+ 	}
+
+	/* Release the asoc/ep ref we took in the lookup calls in sctp_rcv. */ 
+ 	if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
+ 		sctp_association_put(sctp_assoc(rcvr));
+ 	else
+ 		sctp_endpoint_put(sctp_ep(rcvr));
+  
         return 0;
 }
 
+void sctp_backlog_migrate(struct sctp_association *assoc, 
+			  struct sock *oldsk, struct sock *newsk)
+{
+	struct sk_buff *skb;
+	struct sctp_chunk *chunk;
+
+	skb = oldsk->sk_backlog.head;
+	oldsk->sk_backlog.head = oldsk->sk_backlog.tail = NULL;
+	while (skb != NULL) {
+		struct sk_buff *next = skb->next;
+
+		chunk = SCTP_INPUT_CB(skb)->chunk;
+		skb->next = NULL;
+		if (&assoc->base == chunk->rcvr)
+			sk_add_backlog(newsk, skb);
+		else
+			sk_add_backlog(oldsk, skb);
+		skb = next;
+	}
+}
+
 /* Handle icmp frag needed error. */
 void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 			   struct sctp_transport *t, __u32 pmtu)
 {
-	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
-		printk(KERN_WARNING "%s: Reported pmtu %d too low, "
-		       "using default minimum of %d\n", __FUNCTION__, pmtu,
-		       SCTP_DEFAULT_MINSEGMENT);
-		pmtu = SCTP_DEFAULT_MINSEGMENT;
-	}
+	if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu))
+		return;
 
-	if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) {
-		t->pmtu = pmtu;
+	if (t->param_flags & SPP_PMTUD_ENABLE) {
+		if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
+			printk(KERN_WARNING "%s: Reported pmtu %d too low, "
+			       "using default minimum of %d\n",
+			       __FUNCTION__, pmtu,
+			       SCTP_DEFAULT_MINSEGMENT);
+			/* Use default minimum segment size and disable
+			 * pmtu discovery on this transport.
+			 */
+			t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
+			t->param_flags = (t->param_flags & ~SPP_HB) |
+				SPP_PMTUD_DISABLE;
+		} else {
+			t->pathmtu = pmtu;
+		}
+
+		/* Update association pmtu. */
 		sctp_assoc_sync_pmtu(asoc);
-		sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
 	}
+
+	/* Retransmit with the new pmtu setting.
+	 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
+	 * Needed will never be sent, but if a message was sent before
+	 * PMTU discovery was disabled that was larger than the PMTU, it
+	 * would not be fragmented, so it must be re-transmitted fragmented.	 
+	 */
+	sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
 }
 
 /*
@@ -525,10 +588,16 @@ int sctp_rcv_ootb(struct sk_buff *skb)
 	sctp_errhdr_t *err;
 
 	ch = (sctp_chunkhdr_t *) skb->data;
-	ch_end = ((__u8 *) ch) + WORD_ROUND(ntohs(ch->length));
 
 	/* Scan through all the chunks in the packet.  */
-	while (ch_end > (__u8 *)ch && ch_end < skb->tail) {
+	do {
+		/* Break out if chunk length is less then minimal. */
+		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+			break;
+
+		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		if (ch_end > skb->tail)
+			break;
 
 		/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
 		 * receiver MUST silently discard the OOTB packet and take no
@@ -559,8 +628,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
 		}
 
 		ch = (sctp_chunkhdr_t *) ch_end;
-	        ch_end = ((__u8 *) ch) + WORD_ROUND(ntohs(ch->length));
-	}
+	} while (ch_end < skb->tail);
 
 	return 0;
 
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 2d33922c044..297b8951463 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -73,8 +73,10 @@ void sctp_inq_free(struct sctp_inq *queue)
 	/* If there is a packet which is currently being worked on,
 	 * free it as well.
 	 */
-	if (queue->in_progress)
+	if (queue->in_progress) {
 		sctp_chunk_free(queue->in_progress);
+		queue->in_progress = NULL;
+	}
 
 	if (queue->malloced) {
 		/* Dump the master memory segment.  */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fa3be2b8fb5..2e266129a76 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -180,8 +180,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport,
 	}
 
 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
-			  "src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
-			  "dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			  "src:" NIP6_FMT " dst:" NIP6_FMT "\n",
 			  __FUNCTION__, skb, skb->len,
 			  NIP6(fl.fl6_src), NIP6(fl.fl6_dst));
 
@@ -206,13 +205,13 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 		fl.oif = daddr->v6.sin6_scope_id;
 	
 
-	SCTP_DEBUG_PRINTK("%s: DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+	SCTP_DEBUG_PRINTK("%s: DST=" NIP6_FMT " ",
 			  __FUNCTION__, NIP6(fl.fl6_dst));
 
 	if (saddr) {
 		ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr);
 		SCTP_DEBUG_PRINTK(
-			"SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x - ",
+			"SRC=" NIP6_FMT " - ",
 			NIP6(fl.fl6_src));
 	}
 
@@ -221,8 +220,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 		struct rt6_info *rt;
 		rt = (struct rt6_info *)dst;
 		SCTP_DEBUG_PRINTK(
-			"rt6_dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
-			"rt6_src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			"rt6_dst:" NIP6_FMT " rt6_src:" NIP6_FMT "\n",
 			NIP6(rt->rt6i_dst.addr), NIP6(rt->rt6i_src.addr));
 	} else {
 		SCTP_DEBUG_PRINTK("NO ROUTE\n");
@@ -271,13 +269,12 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 	__u8 bmatchlen;
 
 	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p "
-			  "daddr:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+			  "daddr:" NIP6_FMT " ",
 			  __FUNCTION__, asoc, dst, NIP6(daddr->v6.sin6_addr));
 
 	if (!asoc) {
 		ipv6_get_saddr(dst, &daddr->v6.sin6_addr,&saddr->v6.sin6_addr);
-		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: "
-				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
 				  NIP6(saddr->v6.sin6_addr));
 		return;
 	}
@@ -305,13 +302,11 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 
 	if (baddr) {
 		memcpy(saddr, baddr, sizeof(union sctp_addr));
-		SCTP_DEBUG_PRINTK("saddr: "
-				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		SCTP_DEBUG_PRINTK("saddr: " NIP6_FMT "\n",
 				  NIP6(saddr->v6.sin6_addr));
 	} else {
 		printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
-		       "address for the "
-		       "dest:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		       "address for the dest:" NIP6_FMT "\n",
 		       __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
 	}
 
@@ -675,8 +670,7 @@ static int sctp_v6_is_ce(const struct sk_buff *skb)
 /* Dump the v6 addr to the seq file. */
 static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 {
-	seq_printf(seq, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
-		   NIP6(addr->v6.sin6_addr));
+	seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
 }
 
 /* Initialize a PF_INET6 socket msg_name. */
@@ -866,7 +860,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 	return 2;
 }
 
-static struct proto_ops inet6_seqpacket_ops = {
+static const struct proto_ops inet6_seqpacket_ops = {
 	.family     = PF_INET6,
 	.owner      = THIS_MODULE,
 	.release    = inet6_release,
@@ -905,7 +899,7 @@ static struct inet_protosw sctpv6_stream_protosw = {
 	.flags         = SCTP_PROTOSW_FLAG,
 };
 
-static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int sctp6_rcv(struct sk_buff **pskb)
 {
 	return sctp_rcv(*pskb) ? -1 : 0;
 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 93137163346..a40991ef72c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -234,8 +234,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
 		goto finish;
 
 	pmtu  = ((packet->transport->asoc) ?
-		 (packet->transport->asoc->pmtu) :
-		 (packet->transport->pmtu));
+		 (packet->transport->asoc->pathmtu) :
+		 (packet->transport->pathmtu));
 
 	too_big = (psize + chunk_len > pmtu);
 
@@ -482,7 +482,9 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	if (!dst || (dst->obsolete > 1)) {
 		dst_release(dst);
 		sctp_transport_route(tp, NULL, sctp_sk(sk));
-		sctp_assoc_sync_pmtu(asoc);
+		if (asoc->param_flags & SPP_PMTUD_ENABLE) {
+			sctp_assoc_sync_pmtu(asoc);
+		}
 	}
 
 	nskb->dst = dst_clone(tp->dst);
@@ -492,7 +494,10 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n",
 			  nskb->len);
 
-	(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
+	if (tp->param_flags & SPP_PMTUD_ENABLE)
+		(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
+	else
+		(*tp->af_specific->sctp_xmit)(nskb, tp, 1);
 
 out:
 	packet->size = packet->overhead;
@@ -577,7 +582,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
 	 * 	if ((flightsize + Max.Burst * MTU) < cwnd)
 	 *		cwnd = flightsize + Max.Burst * MTU
 	 */
-	max_burst_bytes = asoc->max_burst * asoc->pmtu;
+	max_burst_bytes = asoc->max_burst * asoc->pathmtu;
 	if ((transport->flight_size + max_burst_bytes) < transport->cwnd) {
 		transport->cwnd = transport->flight_size + max_burst_bytes;
 		SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: "
@@ -622,7 +627,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
 		 * data will fit or delay in hopes of bundling a full
 		 * sized packet.
 		 */
-		if (len < asoc->pmtu - packet->overhead) {
+		if (len < asoc->pathmtu - packet->overhead) {
 			retval = SCTP_XMIT_NAGLE_DELAY;
 			goto finish;
 		}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 6e4dc28874d..d47a52c303a 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -176,7 +176,7 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
 
 static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (*pos > sctp_ep_hashsize)
+	if (*pos >= sctp_ep_hashsize)
 		return NULL;
 
 	if (*pos < 0)
@@ -185,8 +185,6 @@ static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
 	if (*pos == 0)
 		seq_printf(seq, " ENDPT     SOCK   STY SST HBKT LPORT   UID INODE LADDRS\n");
 
-	++*pos;
-
 	return (void *)pos;
 }
 
@@ -198,11 +196,9 @@ static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
 
 static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	if (*pos > sctp_ep_hashsize)
+	if (++*pos >= sctp_ep_hashsize)
 		return NULL;
 
-	++*pos;
-
 	return pos;
 }
 
@@ -214,19 +210,19 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
 	struct sock *sk;
-	int    hash = *(int *)v;
+	int    hash = *(loff_t *)v;
 
-	if (hash > sctp_ep_hashsize)
+	if (hash >= sctp_ep_hashsize)
 		return -ENOMEM;
 
-	head = &sctp_ep_hashtable[hash-1];
+	head = &sctp_ep_hashtable[hash];
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
 	for (epb = head->chain; epb; epb = epb->next) {
 		ep = sctp_ep(epb);
 		sk = epb->sk;
 		seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
-			   sctp_sk(sk)->type, sk->sk_state, hash-1,
+			   sctp_sk(sk)->type, sk->sk_state, hash,
 			   epb->bind_addr.port,
 			   sock_i_uid(sk), sock_i_ino(sk));
 
@@ -283,7 +279,7 @@ void sctp_eps_proc_exit(void)
 
 static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (*pos > sctp_assoc_hashsize)
+	if (*pos >= sctp_assoc_hashsize)
 		return NULL;
 
 	if (*pos < 0)
@@ -293,8 +289,6 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
 		seq_printf(seq, " ASSOC     SOCK   STY SST ST HBKT ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
 				"RPORT LADDRS <-> RADDRS\n");
 
-	++*pos;
-
 	return (void *)pos;
 }
 
@@ -306,11 +300,9 @@ static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
 
 static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	if (*pos > sctp_assoc_hashsize)
+	if (++*pos >= sctp_assoc_hashsize)
 		return NULL;
 
-	++*pos;
-
 	return pos;
 }
 
@@ -321,12 +313,12 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
 	struct sock *sk;
-	int    hash = *(int *)v;
+	int    hash = *(loff_t *)v;
 
-	if (hash > sctp_assoc_hashsize)
+	if (hash >= sctp_assoc_hashsize)
 		return -ENOMEM;
 
-	head = &sctp_assoc_hashtable[hash-1];
+	head = &sctp_assoc_hashtable[hash];
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
 	for (epb = head->chain; epb; epb = epb->next) {
@@ -335,7 +327,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq,
 			   "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
 			   assoc, sk, sctp_sk(sk)->type, sk->sk_state,
-			   assoc->state, hash-1, assoc->assoc_id,
+			   assoc->state, hash, assoc->assoc_id,
 			   (sk->sk_rcvbuf - assoc->rwnd),
 			   assoc->sndbuf_used,
 			   sock_i_uid(sk), sock_i_ino(sk),
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f775d78aa59..de693b43c8e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -54,6 +54,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/route.h>
 #include <net/sctp/sctp.h>
 #include <net/addrconf.h>
 #include <net/inet_common.h>
@@ -829,7 +830,7 @@ static struct notifier_block sctp_inetaddr_notifier = {
 };
 
 /* Socket operations.  */
-static struct proto_ops inet_seqpacket_ops = {
+static const struct proto_ops inet_seqpacket_ops = {
 	.family      = PF_INET,
 	.owner       = THIS_MODULE,
 	.release     = inet_release,       /* Needs to be wrapped... */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f9573eba5c7..5e0de3c0eea 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1275,7 +1275,12 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 	unsigned int keylen;
 	char *key;
 
-	headersize = sizeof(sctp_paramhdr_t) + SCTP_SECRET_SIZE;
+	/* Header size is static data prior to the actual cookie, including
+	 * any padding.
+	 */
+	headersize = sizeof(sctp_paramhdr_t) + 
+		     (sizeof(struct sctp_signed_cookie) - 
+		      sizeof(struct sctp_cookie));
 	bodysize = sizeof(struct sctp_cookie)
 		+ ntohs(init_chunk->chunk_hdr->length) + addrs_len;
 
@@ -1287,7 +1292,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 			- (bodysize % SCTP_COOKIE_MULTIPLE);
 	*cookie_len = headersize + bodysize;
 
-	retval = (sctp_cookie_param_t *)kmalloc(*cookie_len, GFP_ATOMIC);
+	retval = kmalloc(*cookie_len, GFP_ATOMIC);
 
 	if (!retval) {
 		*cookie_len = 0;
@@ -1354,7 +1359,7 @@ struct sctp_association *sctp_unpack_cookie(
 	struct sctp_signed_cookie *cookie;
 	struct sctp_cookie *bear_cookie;
 	int headersize, bodysize, fixed_size;
-	__u8 digest[SCTP_SIGNATURE_SIZE];
+	__u8 *digest = ep->digest;
 	struct scatterlist sg;
 	unsigned int keylen, len;
 	char *key;
@@ -1362,7 +1367,12 @@ struct sctp_association *sctp_unpack_cookie(
 	struct sk_buff *skb = chunk->skb;
 	struct timeval tv;
 
-	headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE;
+	/* Header size is static data prior to the actual cookie, including
+	 * any padding.
+	 */
+	headersize = sizeof(sctp_chunkhdr_t) +
+		     (sizeof(struct sctp_signed_cookie) - 
+		      sizeof(struct sctp_cookie));
 	bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
 	fixed_size = headersize + sizeof(struct sctp_cookie);
 
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 823947170a3..8d1dc24bab4 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -157,9 +157,12 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 {
 	__u32 ctsn, max_tsn_seen;
 	struct sctp_chunk *sack;
+	struct sctp_transport *trans = asoc->peer.last_data_from;
 	int error = 0;
 
-	if (force)
+	if (force || 
+	    (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) ||
+	    (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE)))
 		asoc->peer.sack_needed = 1;
 
 	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
@@ -189,7 +192,22 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 	if (!asoc->peer.sack_needed) {
 		/* We will need a SACK for the next packet.  */
 		asoc->peer.sack_needed = 1;
-		goto out;
+
+		/* Set the SACK delay timeout based on the
+		 * SACK delay for the last transport
+		 * data was received from, or the default
+		 * for the association.
+		 */
+		if (trans)
+			asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = 
+				trans->sackdelay;
+		else
+			asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = 
+				asoc->sackdelay;
+
+		/* Restart the SACK timer. */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 	} else {
 		if (asoc->a_rwnd > asoc->rwnd)
 			asoc->a_rwnd = asoc->rwnd;
@@ -205,7 +223,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 	}
-out:
+
 	return error;
 nomem:
 	error = -ENOMEM;
@@ -415,7 +433,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
 	asoc->overall_error_count++;
 
 	if (transport->state != SCTP_INACTIVE &&
-	    (transport->error_count++ >= transport->max_retrans)) {
+	    (transport->error_count++ >= transport->pathmaxrxt)) {
 		SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p",
 					 " transport IP: port:%d failed.\n",
 					 asoc,
@@ -1232,8 +1250,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 		case SCTP_CMD_TIMER_START:
 			timer = &asoc->timers[cmd->obj.to];
 			timeout = asoc->timeouts[cmd->obj.to];
-			if (!timeout)
-				BUG();
+			BUG_ON(!timeout);
 
 			timer->expires = jiffies + timeout;
 			sctp_association_hold(asoc);
@@ -1283,7 +1300,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 					"T1 INIT Timeout adjustment"
 					" init_err_counter: %d"
 					" cycle: %d"
-					" timeout: %d\n",
+					" timeout: %ld\n",
 					asoc->init_err_counter,
 					asoc->init_cycle,
 					asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]);
@@ -1311,7 +1328,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			SCTP_DEBUG_PRINTK(
 				"T1 COOKIE Timeout adjustment"
 				" init_err_counter: %d"
-				" timeout: %d\n",
+				" timeout: %ld\n",
 				asoc->init_err_counter,
 				asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 475bfb4972d..71c9a961c32 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -900,7 +900,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
 	 * HEARTBEAT is sent (see Section 8.3).
 	 */
 
-	if (transport->hb_allowed) {
+	if (transport->param_flags & SPP_HB_ENABLE) {
 		if (SCTP_DISPOSITION_NOMEM ==
 				sctp_sf_heartbeat(ep, asoc, type, arg,
 						  commands))
@@ -1036,14 +1036,14 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 		if (from_addr.sa.sa_family == AF_INET6) {
 			printk(KERN_WARNING
 			       "%s association %p could not find address "
-			       "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			       NIP6_FMT "\n",
 			       __FUNCTION__,
 			       asoc,
 			       NIP6(from_addr.v6.sin6_addr));
 		} else {
 			printk(KERN_WARNING
 			       "%s association %p could not find address "
-			       "%u.%u.%u.%u\n",
+			       NIPQUAD_FMT "\n",
 			       __FUNCTION__,
 			       asoc,
 			       NIPQUAD(from_addr.v4.sin_addr.s_addr));
@@ -1051,7 +1051,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 		return SCTP_DISPOSITION_DISCARD;
 	}
 
-	max_interval = link->hb_interval + link->rto;
+	max_interval = link->hbinterval + link->rto;
 
 	/* Check if the timestamp looks valid.  */
 	if (time_after(hbinfo->sent_at, jiffies) ||
@@ -2691,14 +2691,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
 	 * document allow. However, an SCTP transmitter MUST NOT be
 	 * more aggressive than the following algorithms allow.
 	 */
-	if (chunk->end_of_packet) {
+	if (chunk->end_of_packet)
 		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
 
-		/* Start the SACK timer.  */
-		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
-				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
-	}
-
 	return SCTP_DISPOSITION_CONSUME;
 
 discard_force:
@@ -2721,13 +2716,9 @@ discard_force:
 	return SCTP_DISPOSITION_DISCARD;
 
 discard_noforce:
-	if (chunk->end_of_packet) {
+	if (chunk->end_of_packet)
 		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
 
-		/* Start the SACK timer.  */
-		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
-				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
-	}
 	return SCTP_DISPOSITION_DISCARD;
 consume:
 	return SCTP_DISPOSITION_CONSUME;
@@ -3099,6 +3090,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 			break;
 
 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		if (ch_end > skb->tail)
+			break;
 
 		if (SCTP_CID_SHUTDOWN_ACK == ch->type)
 			ootb_shut_ack = 1;
@@ -3442,9 +3435,6 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
 	 * send another. 
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
-	/* Start the SACK timer.  */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
-			SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 
 	return SCTP_DISPOSITION_CONSUME;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index abab81f3818..fb1821d9f33 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -63,6 +63,7 @@
 #include <linux/wait.h>
 #include <linux/time.h>
 #include <linux/ip.h>
+#include <linux/capability.h>
 #include <linux/fcntl.h>
 #include <linux/poll.h>
 #include <linux/init.h>
@@ -156,10 +157,6 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	sk->sk_wmem_queued += SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
 	atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 }
 
@@ -864,7 +861,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
 		return -EFAULT;
 
 	/* Alloc space for the address array in kernel memory.  */
-	kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL);
+	kaddrs = kmalloc(addrs_size, GFP_KERNEL);
 	if (unlikely(!kaddrs))
 		return -ENOMEM;
 
@@ -1154,7 +1151,7 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
 		return -EFAULT;
 
 	/* Alloc space for the address array in kernel memory.  */
-	kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL);
+	kaddrs = kmalloc(addrs_size, GFP_KERNEL);
 	if (unlikely(!kaddrs))
 		return -ENOMEM;
 
@@ -1945,107 +1942,379 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  * address's parameters:
  *
  *  struct sctp_paddrparams {
- *      sctp_assoc_t            spp_assoc_id;
- *      struct sockaddr_storage spp_address;
- *      uint32_t                spp_hbinterval;
- *      uint16_t                spp_pathmaxrxt;
- *  };
- *
- *   spp_assoc_id    - (UDP style socket) This is filled in the application,
- *                     and identifies the association for this query.
+ *     sctp_assoc_t            spp_assoc_id;
+ *     struct sockaddr_storage spp_address;
+ *     uint32_t                spp_hbinterval;
+ *     uint16_t                spp_pathmaxrxt;
+ *     uint32_t                spp_pathmtu;
+ *     uint32_t                spp_sackdelay;
+ *     uint32_t                spp_flags;
+ * };
+ *
+ *   spp_assoc_id    - (one-to-many style socket) This is filled in the
+ *                     application, and identifies the association for
+ *                     this query.
  *   spp_address     - This specifies which address is of interest.
  *   spp_hbinterval  - This contains the value of the heartbeat interval,
- *                     in milliseconds.  A value of 0, when modifying the
- *                     parameter, specifies that the heartbeat on this
- *                     address should be disabled. A value of UINT32_MAX
- *                     (4294967295), when modifying the parameter,
- *                     specifies that a heartbeat should be sent
- *                     immediately to the peer address, and the current
- *                     interval should remain unchanged.
+ *                     in milliseconds.  If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
  *   spp_pathmaxrxt  - This contains the maximum number of
  *                     retransmissions before this address shall be
- *                     considered unreachable.
+ *                     considered unreachable. If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
+ *   spp_pathmtu     - When Path MTU discovery is disabled the value
+ *                     specified here will be the "fixed" path mtu.
+ *                     Note that if the spp_address field is empty
+ *                     then all associations on this address will
+ *                     have this fixed path mtu set upon them.
+ *
+ *   spp_sackdelay   - When delayed sack is enabled, this value specifies
+ *                     the number of milliseconds that sacks will be delayed
+ *                     for. This value will apply to all addresses of an
+ *                     association if the spp_address field is empty. Note
+ *                     also, that if delayed sack is enabled and this
+ *                     value is set to 0, no change is made to the last
+ *                     recorded delayed sack timer value.
+ *
+ *   spp_flags       - These flags are used to control various features
+ *                     on an association. The flag field may contain
+ *                     zero or more of the following options.
+ *
+ *                     SPP_HB_ENABLE  - Enable heartbeats on the
+ *                     specified address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     have heartbeats enabled upon them.
+ *
+ *                     SPP_HB_DISABLE - Disable heartbeats on the
+ *                     speicifed address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     will have their heartbeats disabled. Note also
+ *                     that SPP_HB_ENABLE and SPP_HB_DISABLE are
+ *                     mutually exclusive, only one of these two should
+ *                     be specified. Enabling both fields will have
+ *                     undetermined results.
+ *
+ *                     SPP_HB_DEMAND - Request a user initiated heartbeat
+ *                     to be made immediately.
+ *
+ *                     SPP_PMTUD_ENABLE - This field will enable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected.
+ *
+ *                     SPP_PMTUD_DISABLE - This field will disable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected. Not also that
+ *                     SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually
+ *                     exclusive. Enabling both will have undetermined
+ *                     results.
+ *
+ *                     SPP_SACKDELAY_ENABLE - Setting this flag turns
+ *                     on delayed sack. The time specified in spp_sackdelay
+ *                     is used to specify the sack delay for this address. Note
+ *                     that if spp_address is empty then all addresses will
+ *                     enable delayed sack and take on the sack delay
+ *                     value specified in spp_sackdelay.
+ *                     SPP_SACKDELAY_DISABLE - Setting this flag turns
+ *                     off delayed sack. If the spp_address field is blank then
+ *                     delayed sack is disabled for the entire association. Note
+ *                     also that this field is mutually exclusive to
+ *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
+ *                     results.
  */
+int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
+				struct sctp_transport   *trans,
+				struct sctp_association *asoc,
+				struct sctp_sock        *sp,
+				int                      hb_change,
+				int                      pmtud_change,
+				int                      sackdelay_change)
+{
+	int error;
+
+	if (params->spp_flags & SPP_HB_DEMAND && trans) {
+		error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
+		if (error)
+			return error;
+	}
+
+	if (params->spp_hbinterval) {
+		if (trans) {
+			trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
+		} else if (asoc) {
+			asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
+		} else {
+			sp->hbinterval = params->spp_hbinterval;
+		}
+	}
+
+	if (hb_change) {
+		if (trans) {
+			trans->param_flags =
+				(trans->param_flags & ~SPP_HB) | hb_change;
+		} else if (asoc) {
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_HB) | hb_change;
+		} else {
+			sp->param_flags =
+				(sp->param_flags & ~SPP_HB) | hb_change;
+		}
+	}
+
+	if (params->spp_pathmtu) {
+		if (trans) {
+			trans->pathmtu = params->spp_pathmtu;
+			sctp_assoc_sync_pmtu(asoc);
+		} else if (asoc) {
+			asoc->pathmtu = params->spp_pathmtu;
+			sctp_frag_point(sp, params->spp_pathmtu);
+		} else {
+			sp->pathmtu = params->spp_pathmtu;
+		}
+	}
+
+	if (pmtud_change) {
+		if (trans) {
+			int update = (trans->param_flags & SPP_PMTUD_DISABLE) &&
+				(params->spp_flags & SPP_PMTUD_ENABLE);
+			trans->param_flags =
+				(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
+			if (update) {
+				sctp_transport_pmtu(trans);
+				sctp_assoc_sync_pmtu(asoc);
+			}
+		} else if (asoc) {
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
+		} else {
+			sp->param_flags =
+				(sp->param_flags & ~SPP_PMTUD) | pmtud_change;
+		}
+	}
+
+	if (params->spp_sackdelay) {
+		if (trans) {
+			trans->sackdelay =
+				msecs_to_jiffies(params->spp_sackdelay);
+		} else if (asoc) {
+			asoc->sackdelay =
+				msecs_to_jiffies(params->spp_sackdelay);
+		} else {
+			sp->sackdelay = params->spp_sackdelay;
+		}
+	}
+
+	if (sackdelay_change) {
+		if (trans) {
+			trans->param_flags =
+				(trans->param_flags & ~SPP_SACKDELAY) |
+				sackdelay_change;
+		} else if (asoc) {
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				sackdelay_change;
+		} else {
+			sp->param_flags =
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				sackdelay_change;
+		}
+	}
+
+	if (params->spp_pathmaxrxt) {
+		if (trans) {
+			trans->pathmaxrxt = params->spp_pathmaxrxt;
+		} else if (asoc) {
+			asoc->pathmaxrxt = params->spp_pathmaxrxt;
+		} else {
+			sp->pathmaxrxt = params->spp_pathmaxrxt;
+		}
+	}
+
+	return 0;
+}
+
 static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 					    char __user *optval, int optlen)
 {
-	struct sctp_paddrparams params;
-	struct sctp_transport *trans;
+	struct sctp_paddrparams  params;
+	struct sctp_transport   *trans = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
 	int error;
+	int hb_change, pmtud_change, sackdelay_change;
 
 	if (optlen != sizeof(struct sctp_paddrparams))
-		return -EINVAL;
+		return - EINVAL;
+
 	if (copy_from_user(&params, optval, optlen))
 		return -EFAULT;
 
-	/*
-	 * API 7. Socket Options (setting the default value for the endpoint)
-	 * All options that support specific settings on an association by
-	 * filling in either an association id variable or a sockaddr_storage
-	 * SHOULD also support setting of the same value for the entire endpoint
-	 * (i.e. future associations). To accomplish this the following logic is
-	 * used when setting one of these options:
-
-	 * c) If neither the sockaddr_storage or association identification is
-	 *    set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and
-	 *    the association identification is 0, the settings are a default
-	 *    and to be applied to the endpoint (all future associations).
-	 */
+	/* Validate flags and value parameters. */
+	hb_change        = params.spp_flags & SPP_HB;
+	pmtud_change     = params.spp_flags & SPP_PMTUD;
+	sackdelay_change = params.spp_flags & SPP_SACKDELAY;
+
+	if (hb_change        == SPP_HB ||
+	    pmtud_change     == SPP_PMTUD ||
+	    sackdelay_change == SPP_SACKDELAY ||
+	    params.spp_sackdelay > 500 ||
+	    (params.spp_pathmtu
+	    && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT))
+		return -EINVAL;
 
-	/* update default value for endpoint (all future associations) */
-	if (!params.spp_assoc_id && 
-	    sctp_is_any(( union sctp_addr *)&params.spp_address)) {
-		/* Manual heartbeat on an endpoint is invalid. */
-		if (0xffffffff == params.spp_hbinterval)
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
+	 */
+	if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+		trans = sctp_addr_id2transport(sk, &params.spp_address,
+					       params.spp_assoc_id);
+		if (!trans)
 			return -EINVAL;
-		else if (params.spp_hbinterval)
-			sctp_sk(sk)->paddrparam.spp_hbinterval =
-						params.spp_hbinterval;
-		if (params.spp_pathmaxrxt)
-			sctp_sk(sk)->paddrparam.spp_pathmaxrxt =
-						params.spp_pathmaxrxt;
-		return 0;
 	}
 
-	trans = sctp_addr_id2transport(sk, &params.spp_address,
-				       params.spp_assoc_id);
-	if (!trans)
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, params.spp_assoc_id);
+	if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
-	/* Applications can enable or disable heartbeats for any peer address
-	 * of an association, modify an address's heartbeat interval, force a
-	 * heartbeat to be sent immediately, and adjust the address's maximum
-	 * number of retransmissions sent before an address is considered
-	 * unreachable.
-	 *
-	 * The value of the heartbeat interval, in milliseconds. A value of
-	 * UINT32_MAX (4294967295), when modifying the parameter, specifies
-	 * that a heartbeat should be sent immediately to the peer address,
-	 * and the current interval should remain unchanged.
+	/* Heartbeat demand can only be sent on a transport or
+	 * association, but not a socket.
 	 */
-	if (0xffffffff == params.spp_hbinterval) {
-		error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
-		if (error)
-			return error;
-	} else {
-	/* The value of the heartbeat interval, in milliseconds. A value of 0,
-	 * when modifying the parameter, specifies that the heartbeat on this
-	 * address should be disabled.
+	if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc)
+		return -EINVAL;
+
+	/* Process parameters. */
+	error = sctp_apply_peer_addr_params(&params, trans, asoc, sp,
+					    hb_change, pmtud_change,
+					    sackdelay_change);
+
+	if (error)
+		return error;
+
+	/* If changes are for association, also apply parameters to each
+	 * transport.
 	 */
-		if (params.spp_hbinterval) {
-			trans->hb_allowed = 1;
-			trans->hb_interval = 
-				msecs_to_jiffies(params.spp_hbinterval);
-		} else
-			trans->hb_allowed = 0;
+	if (!trans && asoc) {
+		struct list_head *pos;
+
+		list_for_each(pos, &asoc->peer.transport_addr_list) {
+			trans = list_entry(pos, struct sctp_transport,
+					   transports);
+			sctp_apply_peer_addr_params(&params, trans, asoc, sp,
+						    hb_change, pmtud_change,
+						    sackdelay_change);
+		}
 	}
 
-	/* spp_pathmaxrxt contains the maximum number of retransmissions
-	 * before this address shall be considered unreachable.
-	 */
-	if (params.spp_pathmaxrxt)
-		trans->max_retrans = params.spp_pathmaxrxt;
+	return 0;
+}
+
+/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+ *
+ *   This options will get or set the delayed ack timer.  The time is set
+ *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
+ *   endpoints default delayed ack timer value.  If the assoc_id field is
+ *   non-zero, then the set or get effects the specified association.
+ *
+ *   struct sctp_assoc_value {
+ *       sctp_assoc_t            assoc_id;
+ *       uint32_t                assoc_value;
+ *   };
+ *
+ *     assoc_id    - This parameter, indicates which association the
+ *                   user is preforming an action upon. Note that if
+ *                   this field's value is zero then the endpoints
+ *                   default value is changed (effecting future
+ *                   associations only).
+ *
+ *     assoc_value - This parameter contains the number of milliseconds
+ *                   that the user is requesting the delayed ACK timer
+ *                   be set to. Note that this value is defined in
+ *                   the standard to be between 200 and 500 milliseconds.
+ *
+ *                   Note: a value of zero will leave the value alone,
+ *                   but disable SACK delay. A non-zero value will also
+ *                   enable SACK delay.
+ */
+
+static int sctp_setsockopt_delayed_ack_time(struct sock *sk,
+					    char __user *optval, int optlen)
+{
+	struct sctp_assoc_value  params;
+	struct sctp_transport   *trans = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
+
+	if (optlen != sizeof(struct sctp_assoc_value))
+		return - EINVAL;
+
+	if (copy_from_user(&params, optval, optlen))
+		return -EFAULT;
 
+	/* Validate value parameter. */
+	if (params.assoc_value > 500)
+		return -EINVAL;
+
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+ 	 */
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (params.assoc_value) {
+		if (asoc) {
+			asoc->sackdelay =
+				msecs_to_jiffies(params.assoc_value);
+			asoc->param_flags = 
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_ENABLE;
+		} else {
+			sp->sackdelay = params.assoc_value;
+			sp->param_flags = 
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_ENABLE;
+		}
+	} else {
+		if (asoc) {
+			asoc->param_flags = 
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_DISABLE;
+		} else {
+			sp->param_flags = 
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_DISABLE;
+		}
+	}
+
+	/* If change is for association, also apply to each transport. */
+	if (asoc) {
+		struct list_head *pos;
+
+		list_for_each(pos, &asoc->peer.transport_addr_list) {
+			trans = list_entry(pos, struct sctp_transport,
+					   transports);
+			if (params.assoc_value) {
+				trans->sackdelay =
+					msecs_to_jiffies(params.assoc_value);
+				trans->param_flags = 
+					(trans->param_flags & ~SPP_SACKDELAY) |
+					SPP_SACKDELAY_ENABLE;
+			} else {
+				trans->param_flags = 
+					(trans->param_flags & ~SPP_SACKDELAY) |
+					SPP_SACKDELAY_DISABLE;
+			}
+		}
+	}
+ 
 	return 0;
 }
 
@@ -2338,7 +2607,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
 	/* Update the frag_point of the existing associations. */
 	list_for_each(pos, &(sp->ep->asocs)) {
 		asoc = list_entry(pos, struct sctp_association, asocs);
-		asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); 
+		asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); 
 	}
 
 	return 0;
@@ -2495,6 +2764,10 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
 		break;
 
+	case SCTP_DELAYED_ACK_TIME:
+		retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
+		break;
+
 	case SCTP_INITMSG:
 		retval = sctp_setsockopt_initmsg(sk, optval, optlen);
 		break;
@@ -2719,8 +2992,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	/* Default Peer Address Parameters.  These defaults can
 	 * be modified via SCTP_PEER_ADDR_PARAMS
 	 */
-	sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval);
-	sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path;
+	sp->hbinterval  = jiffies_to_msecs(sctp_hb_interval);
+	sp->pathmaxrxt  = sctp_max_retrans_path;
+	sp->pathmtu     = 0; // allow default discovery
+	sp->sackdelay   = jiffies_to_msecs(sctp_sack_timeout);
+	sp->param_flags = SPP_HB_ENABLE |
+	                  SPP_PMTUD_ENABLE |
+	                  SPP_SACKDELAY_ENABLE;
 
 	/* If enabled no SCTP message fragmentation will be performed.
 	 * Configure through SCTP_DISABLE_FRAGMENTS socket option.
@@ -2869,7 +3147,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	status.sstat_primary.spinfo_cwnd = transport->cwnd;
 	status.sstat_primary.spinfo_srtt = transport->srtt;
 	status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto);
-	status.sstat_primary.spinfo_mtu = transport->pmtu;
+	status.sstat_primary.spinfo_mtu = transport->pathmtu;
 
 	if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN)
 		status.sstat_primary.spinfo_state = SCTP_ACTIVE;
@@ -2928,7 +3206,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
 	pinfo.spinfo_cwnd = transport->cwnd;
 	pinfo.spinfo_srtt = transport->srtt;
 	pinfo.spinfo_rto = jiffies_to_msecs(transport->rto);
-	pinfo.spinfo_mtu = transport->pmtu;
+	pinfo.spinfo_mtu = transport->pathmtu;
 
 	if (pinfo.spinfo_state == SCTP_UNKNOWN)
 		pinfo.spinfo_state = SCTP_ACTIVE;
@@ -3090,69 +3368,227 @@ out:
  * address's parameters:
  *
  *  struct sctp_paddrparams {
- *      sctp_assoc_t            spp_assoc_id;
- *      struct sockaddr_storage spp_address;
- *      uint32_t                spp_hbinterval;
- *      uint16_t                spp_pathmaxrxt;
- *  };
- *
- *   spp_assoc_id    - (UDP style socket) This is filled in the application,
- *                     and identifies the association for this query.
+ *     sctp_assoc_t            spp_assoc_id;
+ *     struct sockaddr_storage spp_address;
+ *     uint32_t                spp_hbinterval;
+ *     uint16_t                spp_pathmaxrxt;
+ *     uint32_t                spp_pathmtu;
+ *     uint32_t                spp_sackdelay;
+ *     uint32_t                spp_flags;
+ * };
+ *
+ *   spp_assoc_id    - (one-to-many style socket) This is filled in the
+ *                     application, and identifies the association for
+ *                     this query.
  *   spp_address     - This specifies which address is of interest.
  *   spp_hbinterval  - This contains the value of the heartbeat interval,
- *                     in milliseconds.  A value of 0, when modifying the
- *                     parameter, specifies that the heartbeat on this
- *                     address should be disabled. A value of UINT32_MAX
- *                     (4294967295), when modifying the parameter,
- *                     specifies that a heartbeat should be sent
- *                     immediately to the peer address, and the current
- *                     interval should remain unchanged.
+ *                     in milliseconds.  If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
  *   spp_pathmaxrxt  - This contains the maximum number of
  *                     retransmissions before this address shall be
- *                     considered unreachable.
+ *                     considered unreachable. If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
+ *   spp_pathmtu     - When Path MTU discovery is disabled the value
+ *                     specified here will be the "fixed" path mtu.
+ *                     Note that if the spp_address field is empty
+ *                     then all associations on this address will
+ *                     have this fixed path mtu set upon them.
+ *
+ *   spp_sackdelay   - When delayed sack is enabled, this value specifies
+ *                     the number of milliseconds that sacks will be delayed
+ *                     for. This value will apply to all addresses of an
+ *                     association if the spp_address field is empty. Note
+ *                     also, that if delayed sack is enabled and this
+ *                     value is set to 0, no change is made to the last
+ *                     recorded delayed sack timer value.
+ *
+ *   spp_flags       - These flags are used to control various features
+ *                     on an association. The flag field may contain
+ *                     zero or more of the following options.
+ *
+ *                     SPP_HB_ENABLE  - Enable heartbeats on the
+ *                     specified address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     have heartbeats enabled upon them.
+ *
+ *                     SPP_HB_DISABLE - Disable heartbeats on the
+ *                     speicifed address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     will have their heartbeats disabled. Note also
+ *                     that SPP_HB_ENABLE and SPP_HB_DISABLE are
+ *                     mutually exclusive, only one of these two should
+ *                     be specified. Enabling both fields will have
+ *                     undetermined results.
+ *
+ *                     SPP_HB_DEMAND - Request a user initiated heartbeat
+ *                     to be made immediately.
+ *
+ *                     SPP_PMTUD_ENABLE - This field will enable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected.
+ *
+ *                     SPP_PMTUD_DISABLE - This field will disable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected. Not also that
+ *                     SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually
+ *                     exclusive. Enabling both will have undetermined
+ *                     results.
+ *
+ *                     SPP_SACKDELAY_ENABLE - Setting this flag turns
+ *                     on delayed sack. The time specified in spp_sackdelay
+ *                     is used to specify the sack delay for this address. Note
+ *                     that if spp_address is empty then all addresses will
+ *                     enable delayed sack and take on the sack delay
+ *                     value specified in spp_sackdelay.
+ *                     SPP_SACKDELAY_DISABLE - Setting this flag turns
+ *                     off delayed sack. If the spp_address field is blank then
+ *                     delayed sack is disabled for the entire association. Note
+ *                     also that this field is mutually exclusive to
+ *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
+ *                     results.
  */
 static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
-						char __user *optval, int __user *optlen)
+					    char __user *optval, int __user *optlen)
 {
-	struct sctp_paddrparams params;
-	struct sctp_transport *trans;
+	struct sctp_paddrparams  params;
+	struct sctp_transport   *trans = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
 
 	if (len != sizeof(struct sctp_paddrparams))
 		return -EINVAL;
+
 	if (copy_from_user(&params, optval, len))
 		return -EFAULT;
 
-	/* If no association id is specified retrieve the default value
-	 * for the endpoint that will be used for all future associations
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
 	 */
-	if (!params.spp_assoc_id &&
-	    sctp_is_any(( union sctp_addr *)&params.spp_address)) {
-		params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval;
-		params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt;
-
-		goto done;
+	if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+		trans = sctp_addr_id2transport(sk, &params.spp_address,
+					       params.spp_assoc_id);
+		if (!trans) {
+			SCTP_DEBUG_PRINTK("Failed no transport\n");
+			return -EINVAL;
+		}
 	}
 
-	trans = sctp_addr_id2transport(sk, &params.spp_address,
-				       params.spp_assoc_id);
-	if (!trans)
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, params.spp_assoc_id);
+	if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) {
+		SCTP_DEBUG_PRINTK("Failed no association\n");
 		return -EINVAL;
+	}
 
-	/* The value of the heartbeat interval, in milliseconds. A value of 0,
-	 * when modifying the parameter, specifies that the heartbeat on this
-	 * address should be disabled.
-	 */
-	if (!trans->hb_allowed)
-		params.spp_hbinterval = 0;
-	else
-		params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval);
+	if (trans) {
+		/* Fetch transport values. */
+		params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval);
+		params.spp_pathmtu    = trans->pathmtu;
+		params.spp_pathmaxrxt = trans->pathmaxrxt;
+		params.spp_sackdelay  = jiffies_to_msecs(trans->sackdelay);
+
+		/*draft-11 doesn't say what to return in spp_flags*/
+		params.spp_flags      = trans->param_flags;
+	} else if (asoc) {
+		/* Fetch association values. */
+		params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
+		params.spp_pathmtu    = asoc->pathmtu;
+		params.spp_pathmaxrxt = asoc->pathmaxrxt;
+		params.spp_sackdelay  = jiffies_to_msecs(asoc->sackdelay);
+
+		/*draft-11 doesn't say what to return in spp_flags*/
+		params.spp_flags      = asoc->param_flags;
+	} else {
+		/* Fetch socket values. */
+		params.spp_hbinterval = sp->hbinterval;
+		params.spp_pathmtu    = sp->pathmtu;
+		params.spp_sackdelay  = sp->sackdelay;
+		params.spp_pathmaxrxt = sp->pathmaxrxt;
+
+		/*draft-11 doesn't say what to return in spp_flags*/
+		params.spp_flags      = sp->param_flags;
+	}
 
-	/* spp_pathmaxrxt contains the maximum number of retransmissions
-	 * before this address shall be considered unreachable.
-	 */
-	params.spp_pathmaxrxt = trans->max_retrans;
+	if (copy_to_user(optval, &params, len))
+		return -EFAULT;
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+ *
+ *   This options will get or set the delayed ack timer.  The time is set
+ *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
+ *   endpoints default delayed ack timer value.  If the assoc_id field is
+ *   non-zero, then the set or get effects the specified association.
+ *
+ *   struct sctp_assoc_value {
+ *       sctp_assoc_t            assoc_id;
+ *       uint32_t                assoc_value;
+ *   };
+ *
+ *     assoc_id    - This parameter, indicates which association the
+ *                   user is preforming an action upon. Note that if
+ *                   this field's value is zero then the endpoints
+ *                   default value is changed (effecting future
+ *                   associations only).
+ *
+ *     assoc_value - This parameter contains the number of milliseconds
+ *                   that the user is requesting the delayed ACK timer
+ *                   be set to. Note that this value is defined in
+ *                   the standard to be between 200 and 500 milliseconds.
+ *
+ *                   Note: a value of zero will leave the value alone,
+ *                   but disable SACK delay. A non-zero value will also
+ *                   enable SACK delay.
+ */
+static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len,
+					    char __user *optval,
+					    int __user *optlen)
+{
+	struct sctp_assoc_value  params;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
+
+	if (len != sizeof(struct sctp_assoc_value))
+		return - EINVAL;
+
+	if (copy_from_user(&params, optval, len))
+		return -EFAULT;
+
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+ 	 */
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		/* Fetch association values. */
+		if (asoc->param_flags & SPP_SACKDELAY_ENABLE)
+			params.assoc_value = jiffies_to_msecs(
+				asoc->sackdelay);
+		else
+			params.assoc_value = 0;
+	} else {
+		/* Fetch socket values. */
+		if (sp->param_flags & SPP_SACKDELAY_ENABLE)
+			params.assoc_value  = sp->sackdelay;
+		else
+			params.assoc_value  = 0;
+	}
 
-done:
 	if (copy_to_user(optval, &params, len))
 		return -EFAULT;
 
@@ -3425,7 +3861,7 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
 }
 
 static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
-				    void * __user *to, size_t space_left)
+				    void __user **to, size_t space_left)
 {
 	struct list_head *pos;
 	struct sctp_sockaddr_entry *addr;
@@ -4019,6 +4455,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
 							  optlen);
 		break;
+	case SCTP_DELAYED_ACK_TIME:
+		retval = sctp_getsockopt_delayed_ack_time(sk, len, optval,
+							  optlen);
+		break;
 	case SCTP_INITMSG:
 		retval = sctp_getsockopt_initmsg(sk, len, optval, optlen);
 		break;
@@ -4426,7 +4866,7 @@ cleanup:
  * tcp_poll().  Note that, based on these implementations, we don't
  * lock the socket in this function, even though it seems that,
  * ideally, locking or some other mechanisms can be used to ensure
- * the integrity of the counters (sndbuf and wmem_queued) used
+ * the integrity of the counters (sndbuf and wmem_alloc) used
  * in this place.  We assume that we don't need locks either until proven
  * otherwise.
  *
@@ -4743,11 +5183,6 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
 	struct sk_buff *skb;
 	long timeo;
 
-	/* Caller is allowed not to check sk->sk_err before calling.  */
-	error = sock_error(sk);
-	if (error)
-		goto no_packet;
-
 	timeo = sock_rcvtimeo(sk, noblock);
 
 	SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n",
@@ -4774,6 +5209,11 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
 		if (skb)
 			return skb;
 
+		/* Caller is allowed not to check sk->sk_err before calling. */
+		error = sock_error(sk);
+		if (error)
+			goto no_packet;
+
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			break;
 
@@ -4833,10 +5273,6 @@ static void sctp_wfree(struct sk_buff *skb)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	sk->sk_wmem_queued -= SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
 	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 
 	sock_wfree(skb);
@@ -4920,7 +5356,7 @@ void sctp_write_space(struct sock *sk)
 
 /* Is there any sndbuf space available on the socket?
  *
- * Note that wmem_queued is the sum of the send buffers on all of the
+ * Note that sk_wmem_alloc is the sum of the send buffers on all of the
  * associations on the same socket.  For a UDP-style socket with
  * multiple associations, it is possible for it to be "unwriteable"
  * prematurely.  I assume that this is acceptable because
@@ -4933,7 +5369,7 @@ static int sctp_writeable(struct sock *sk)
 {
 	int amt = 0;
 
-	amt = sk->sk_sndbuf - sk->sk_wmem_queued;
+	amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
 	if (amt < 0)
 		amt = 0;
 	return amt;
@@ -5166,8 +5602,12 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	 */
 	newsp->type = type;
 
+	spin_lock_bh(&oldsk->sk_lock.slock);
+	/* Migrate the backlog from oldsk to newsk. */
+	sctp_backlog_migrate(assoc, oldsk, newsk);
 	/* Migrate the association to the new socket. */
 	sctp_assoc_migrate(assoc, newsk);
+	spin_unlock_bh(&oldsk->sk_lock.slock);
 
 	/* If the association on the newsk is already closed before accept()
 	 * is called, set RCV_SHUTDOWN flag.
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index fcd7096c953..dc6f3ff3235 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -159,12 +159,9 @@ static ctl_table sctp_table[] = {
 		.ctl_name	= NET_SCTP_PRESERVE_ENABLE,
 		.procname	= "cookie_preserve_enable",
 		.data		= &sctp_cookie_preserve_enable,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_ALPHA,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 6bc27200e6c..160f62ad1cc 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -86,10 +86,13 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	peer->init_sent_count = 0;
 
 	peer->state = SCTP_ACTIVE;
-	peer->hb_allowed = 0;
+	peer->param_flags = SPP_HB_DISABLE |
+			    SPP_PMTUD_ENABLE |
+			    SPP_SACKDELAY_ENABLE;
+	peer->hbinterval  = 0;
 
 	/* Initialize the default path max_retrans.  */
-	peer->max_retrans = sctp_max_retrans_path;
+	peer->pathmaxrxt  = sctp_max_retrans_path;
 	peer->error_count = 0;
 
 	INIT_LIST_HEAD(&peer->transmitted);
@@ -229,10 +232,10 @@ void sctp_transport_pmtu(struct sctp_transport *transport)
 	dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
 
 	if (dst) {
-		transport->pmtu = dst_mtu(dst);
+		transport->pathmtu = dst_mtu(dst);
 		dst_release(dst);
 	} else
-		transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
+		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
 /* Caches the dst entry and source address for a transport's destination
@@ -254,16 +257,20 @@ void sctp_transport_route(struct sctp_transport *transport,
 		af->get_saddr(asoc, dst, daddr, &transport->saddr);
 
 	transport->dst = dst;
+	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
+		return;
+	}
 	if (dst) {
-		transport->pmtu = dst_mtu(dst);
+		transport->pathmtu = dst_mtu(dst);
 
 		/* Initialize sk->sk_rcv_saddr, if the transport is the
 		 * association's active path for getsockname().
 		 */ 
 		if (asoc && (transport == asoc->peer.active_path))
-			af->to_sk_saddr(&transport->saddr, asoc->base.sk);
+			opt->pf->af->to_sk_saddr(&transport->saddr,
+						 asoc->base.sk);
 	} else
-		transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
+		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
 /* Hold a reference to a transport.  */
@@ -343,7 +350,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 	tp->rto_pending = 0;
 
 	SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "
-			  "rttvar: %d, rto: %d\n", __FUNCTION__,
+			  "rttvar: %d, rto: %ld\n", __FUNCTION__,
 			  tp, rtt, tp->srtt, tp->rttvar, tp->rto);
 }
 
@@ -368,7 +375,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 
 	ssthresh = transport->ssthresh;
 	pba = transport->partial_bytes_acked;
-	pmtu = transport->asoc->pmtu;
+	pmtu = transport->asoc->pathmtu;
 
 	if (cwnd <= ssthresh) {
 		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
@@ -440,8 +447,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pmtu);
-		transport->cwnd = transport->asoc->pmtu;
+					  4*transport->asoc->pathmtu);
+		transport->cwnd = transport->asoc->pathmtu;
 		break;
 
 	case SCTP_LOWER_CWND_FAST_RTX:
@@ -458,7 +465,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pmtu);
+					  4*transport->asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
 		break;
 
@@ -478,7 +485,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		if ((jiffies - transport->last_time_ecne_reduced) >
 		    transport->rtt) {
 			transport->ssthresh = max(transport->cwnd/2,
-					  	  4*transport->asoc->pmtu);
+					  	  4*transport->asoc->pathmtu);
 			transport->cwnd = transport->ssthresh;
 			transport->last_time_ecne_reduced = jiffies;
 		}
@@ -495,7 +502,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 */
 		if ((jiffies - transport->last_time_used) > transport->rto)
 			transport->cwnd = max(transport->cwnd/2,
-						 4*transport->asoc->pmtu);
+						 4*transport->asoc->pathmtu);
 		break;
 	};
 
@@ -510,7 +517,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 unsigned long sctp_transport_timeout(struct sctp_transport *t)
 {
 	unsigned long timeout;
-	timeout = t->hb_interval + t->rto + sctp_jitter(t->rto);
+	timeout = t->hbinterval + t->rto + sctp_jitter(t->rto);
 	timeout += jiffies;
 	return timeout;
 }
diff --git a/net/socket.c b/net/socket.c
index 3145103cdf5..b38a263853c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -640,154 +640,150 @@ static void sock_aio_dtor(struct kiocb *iocb)
 	kfree(iocb->private);
 }
 
-/*
- *	Read data from a socket. ubuf is a user mode pointer. We make sure the user
- *	area ubuf...ubuf+size-1 is writable before asking the protocol.
- */
-
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-			 size_t size, loff_t pos)
+static ssize_t sock_sendpage(struct file *file, struct page *page,
+			     int offset, size_t size, loff_t *ppos, int more)
 {
-	struct sock_iocb *x, siocb;
 	struct socket *sock;
 	int flags;
 
-	if (pos != 0)
-		return -ESPIPE;
-	if (size==0)		/* Match SYS5 behaviour */
-		return 0;
+	sock = file->private_data;
 
-	if (is_sync_kiocb(iocb))
-		x = &siocb;
-	else {
-		x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
-		if (!x)
-			return -ENOMEM;
+	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	if (more)
+		flags |= MSG_MORE;
+
+	return sock->ops->sendpage(sock, page, offset, size, flags);
+}
+
+static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
+		char __user *ubuf, size_t size, struct sock_iocb *siocb)
+{
+	if (!is_sync_kiocb(iocb)) {
+		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
+		if (!siocb)
+			return NULL;
 		iocb->ki_dtor = sock_aio_dtor;
 	}
-	iocb->private = x;
-	x->kiocb = iocb;
-	sock = iocb->ki_filp->private_data; 
 
-	x->async_msg.msg_name = NULL;
-	x->async_msg.msg_namelen = 0;
-	x->async_msg.msg_iov = &x->async_iov;
-	x->async_msg.msg_iovlen = 1;
-	x->async_msg.msg_control = NULL;
-	x->async_msg.msg_controllen = 0;
-	x->async_iov.iov_base = ubuf;
-	x->async_iov.iov_len = size;
-	flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	siocb->kiocb = iocb;
+	siocb->async_iov.iov_base = ubuf;
+	siocb->async_iov.iov_len = size;
 
-	return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
+	iocb->private = siocb;
+	return siocb;
 }
 
+static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
+		struct file *file, struct iovec *iov, unsigned long nr_segs)
+{
+	struct socket *sock = file->private_data;
+	size_t size = 0;
+	int i;
 
-/*
- *	Write data to a socket. We verify that the user area ubuf..ubuf+size-1
- *	is readable by the user process.
- */
+        for (i = 0 ; i < nr_segs ; i++)
+                size += iov[i].iov_len;
 
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-			  size_t size, loff_t pos)
+	msg->msg_name = NULL;
+	msg->msg_namelen = 0;
+	msg->msg_control = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iovlen = nr_segs;
+	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+
+	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
+}
+
+static ssize_t sock_readv(struct file *file, const struct iovec *iov,
+			  unsigned long nr_segs, loff_t *ppos)
 {
-	struct sock_iocb *x, siocb;
-	struct socket *sock;
-	
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	struct msghdr msg;
+	int ret;
+
+        init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+
+	ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
+static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
+			 size_t count, loff_t pos)
+{
+	struct sock_iocb siocb, *x;
+
 	if (pos != 0)
 		return -ESPIPE;
-	if(size==0)		/* Match SYS5 behaviour */
+	if (count == 0)		/* Match SYS5 behaviour */
 		return 0;
 
-	if (is_sync_kiocb(iocb))
-		x = &siocb;
-	else {
-		x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
-		if (!x)
-			return -ENOMEM;
-		iocb->ki_dtor = sock_aio_dtor;
-	}
-	iocb->private = x;
-	x->kiocb = iocb;
-	sock = iocb->ki_filp->private_data; 
-
-	x->async_msg.msg_name = NULL;
-	x->async_msg.msg_namelen = 0;
-	x->async_msg.msg_iov = &x->async_iov;
-	x->async_msg.msg_iovlen = 1;
-	x->async_msg.msg_control = NULL;
-	x->async_msg.msg_controllen = 0;
-	x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
-	if (sock->type == SOCK_SEQPACKET)
-		x->async_msg.msg_flags |= MSG_EOR;
-	x->async_iov.iov_base = (void __user *)ubuf;
-	x->async_iov.iov_len = size;
-	
-	return __sock_sendmsg(iocb, sock, &x->async_msg, size);
+	x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+	if (!x)
+		return -ENOMEM;
+	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
+			&x->async_iov, 1);
 }
 
-static ssize_t sock_sendpage(struct file *file, struct page *page,
-			     int offset, size_t size, loff_t *ppos, int more)
+static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
+		struct file *file, struct iovec *iov, unsigned long nr_segs)
 {
-	struct socket *sock;
-	int flags;
+	struct socket *sock = file->private_data;
+	size_t size = 0;
+	int i;
 
-	sock = file->private_data;
+        for (i = 0 ; i < nr_segs ; i++)
+                size += iov[i].iov_len;
 
-	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
-	if (more)
-		flags |= MSG_MORE;
+	msg->msg_name = NULL;
+	msg->msg_namelen = 0;
+	msg->msg_control = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iovlen = nr_segs;
+	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	if (sock->type == SOCK_SEQPACKET)
+		msg->msg_flags |= MSG_EOR;
 
-	return sock->ops->sendpage(sock, page, offset, size, flags);
+	return __sock_sendmsg(iocb, sock, msg, size);
 }
 
-static int sock_readv_writev(int type,
-			     struct file * file, const struct iovec * iov,
-			     long count, size_t size)
+static ssize_t sock_writev(struct file *file, const struct iovec *iov,
+			   unsigned long nr_segs, loff_t *ppos)
 {
 	struct msghdr msg;
-	struct socket *sock;
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
 
-	sock = file->private_data;
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
 
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_iov = (struct iovec *) iov;
-	msg.msg_iovlen = count;
-	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
 
-	/* read() does a VERIFY_WRITE */
-	if (type == VERIFY_WRITE)
-		return sock_recvmsg(sock, &msg, size, msg.msg_flags);
+static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
+			  size_t count, loff_t pos)
+{
+	struct sock_iocb siocb, *x;
 
-	if (sock->type == SOCK_SEQPACKET)
-		msg.msg_flags |= MSG_EOR;
+	if (pos != 0)
+		return -ESPIPE;
+	if (count == 0)		/* Match SYS5 behaviour */
+		return 0;
 
-	return sock_sendmsg(sock, &msg, size);
-}
+	x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+	if (!x)
+		return -ENOMEM;
 
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
-			  unsigned long count, loff_t *ppos)
-{
-	size_t tot_len = 0;
-	int i;
-        for (i = 0 ; i < count ; i++)
-                tot_len += vector[i].iov_len;
-	return sock_readv_writev(VERIFY_WRITE,
-				 file, vector, count, tot_len);
-}
-	
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
-			   unsigned long count, loff_t *ppos)
-{
-	size_t tot_len = 0;
-	int i;
-        for (i = 0 ; i < count ; i++)
-                tot_len += vector[i].iov_len;
-	return sock_readv_writev(VERIFY_READ,
-				 file, vector, count, tot_len);
+	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
+			&x->async_iov, 1);
 }
 
 
@@ -904,6 +900,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			break;
 		default:
 			err = sock->ops->ioctl(sock, cmd, arg);
+
+			/*
+			 * If this ioctl is unknown try to hand it down
+			 * to the NIC driver.
+			 */
+			if (err == -ENOIOCTLCMD)
+				err = dev_ioctl(cmd, argp);
 			break;
 	}
 	return err;
@@ -990,7 +993,7 @@ static int sock_fasync(int fd, struct file *filp, int on)
 
 	if (on)
 	{
-		fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
+		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 		if(fna==NULL)
 			return -ENOMEM;
 	}
@@ -2036,7 +2039,7 @@ int sock_unregister(int family)
 	return 0;
 }
 
-void __init sock_init(void)
+static int __init sock_init(void)
 {
 	/*
 	 *	Initialize sock SLAB cache.
@@ -2044,12 +2047,10 @@ void __init sock_init(void)
 	 
 	sk_init();
 
-#ifdef SLAB_SKB
 	/*
 	 *	Initialize skbuff SLAB cache 
 	 */
 	skb_init();
-#endif
 
 	/*
 	 *	Initialize the protocols module. 
@@ -2058,15 +2059,19 @@ void __init sock_init(void)
 	init_inodecache();
 	register_filesystem(&sock_fs_type);
 	sock_mnt = kern_mount(&sock_fs_type);
-	/* The real protocol initialization is performed when
-	 *  do_initcalls is run.  
+
+	/* The real protocol initialization is performed in later initcalls.
 	 */
 
 #ifdef CONFIG_NETFILTER
 	netfilter_init();
 #endif
+
+	return 0;
 }
 
+core_initcall(sock_init);	/* early initcall */
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8c7756036e9..9ac1b8c26c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -94,7 +94,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
 	struct rpc_cred_cache *new;
 	int i;
 
-	new = (struct rpc_cred_cache *)kmalloc(sizeof(*new), GFP_KERNEL);
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
 	for (i = 0; i < RPC_CREDCACHE_NR; i++)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f44f46f1d8e..8d782282ec1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -638,7 +638,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 				gss_msg);
 		atomic_inc(&gss_msg->count);
 		gss_unhash_msg(gss_msg);
-		if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+		if (msg->errno == -ETIMEDOUT) {
 			unsigned long now = jiffies;
 			if (time_after(now, ratelimit)) {
 				printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
@@ -786,7 +786,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
 	cred->gc_flags = 0;
 	cred->gc_base.cr_ops = &gss_credops;
 	cred->gc_service = gss_auth->service;
-	err = gss_create_upcall(gss_auth, cred);
+	do {
+		err = gss_create_upcall(gss_auth, cred);
+	} while (err == -EAGAIN);
 	if (err < 0)
 		goto out_err;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 5f1f806a0b1..129e2bd36af 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -97,13 +97,17 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
 			alg_mode = CRYPTO_TFM_MODE_CBC;
 			break;
 		default:
-			dprintk("RPC:      get_key: unsupported algorithm %d\n", alg);
+			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
 		goto out_err_free_key;
-	if (crypto_cipher_setkey(*res, key.data, key.len))
+	}
+	if (crypto_cipher_setkey(*res, key.data, key.len)) {
+		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
 		goto out_err_free_tfm;
+	}
 
 	kfree(key.data);
 	return p;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 39b3edc1469..58400807d4d 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -111,14 +111,18 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
 			setkey = 0;
 			break;
 		default:
-			dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg);
+			dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+		printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
 		goto out_err_free_key;
+	}
 	if (setkey) {
-		if (crypto_cipher_setkey(*res, key.data, key.len))
+		if (crypto_cipher_setkey(*res, key.data, key.len)) {
+			printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
 			goto out_err_free_tfm;
+		}
 	}
 
 	if(key.len > 0)
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index d1e12b25d6e..86fbf7c3e39 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -59,7 +59,7 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 	char			tokhdrbuf[25];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
 	struct xdr_netobj	mic_hdr = {.len = 0, .data = tokhdrbuf};
-	int			tmsglen, tokenlen = 0;
+	int			tokenlen = 0;
 	unsigned char		*ptr;
 	s32			now;
 	int			ctxelen = 0, ctxzbit = 0;
@@ -92,24 +92,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 	}
 
 	if (toktype == SPKM_MIC_TOK) {
-		tmsglen = 0;
 		/* Calculate checksum over the mic-header */
 		asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
 		spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
 		                         ctxelen, ctxzbit);
 
 		if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 
-		                             text, &md5cksum))
+		                             text, 0, &md5cksum))
 			goto out_err;
 
 		asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
-		tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1;
+		tokenlen = 10 + ctxelen + 1 + md5elen + 1;
 
 		/* Create token header using generic routines */
-		token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen);
+		token->len = g_token_size(&ctx->mech_used, tokenlen);
 
 		ptr = token->data;
-		g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr);
+		g_make_token_header(&ctx->mech_used, tokenlen, &ptr);
 
 		spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
 	} else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 1f824578d77..af0d7ce7468 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -182,6 +182,7 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
  * *tokp points to the beginning of the SPKM_MIC token  described 
  * in rfc 2025, section 3.2.1: 
  *
+ * toklen is the inner token length
  */
 void
 spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
@@ -189,7 +190,7 @@ spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hd
 	unsigned char *ict = *tokp;
 
 	*(u8 *)ict++ = 0xa4;
-	*(u8 *)ict++ = toklen - 2; 
+	*(u8 *)ict++ = toklen;
 	memcpy(ict, mic_hdr->data, mic_hdr->len);
 	ict += mic_hdr->len;
 
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 241d5b30dfc..96851b0ba1b 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -95,7 +95,7 @@ spkm3_read_token(struct spkm3_ctx *ctx,
 		ret = GSS_S_DEFECTIVE_TOKEN;
 		code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 
 					mic_hdrlen + 2, 
-		                        message_buffer, &md5cksum);
+		                        message_buffer, 0, &md5cksum);
 
 		if (code)
 			goto out;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index e4ada15ed85..23632d84d8d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -420,7 +420,8 @@ static int rsc_parse(struct cache_detail *cd,
 			gss_mech_put(gm);
 			goto out;
 		}
-		if (gss_import_sec_context(buf, len, gm, &rsci.mechctx)) {
+		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
+		if (status) {
 			gss_mech_put(gm);
 			goto out;
 		}
@@ -586,6 +587,20 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 }
 
 static int
+gss_write_null_verf(struct svc_rqst *rqstp)
+{
+	u32     *p;
+
+	svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_NULL));
+	p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
+	/* don't really need to check if head->iov_len > PAGE_SIZE ... */
+	*p++ = 0;
+	if (!xdr_ressize_check(rqstp, p))
+		return -1;
+	return 0;
+}
+
+static int
 gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
 {
 	u32			xdr_seq;
@@ -741,6 +756,21 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 	return SVC_OK;
 }
 
+static inline int
+gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip)
+{
+	struct rsc *rsci;
+
+	if (rsip->major_status != GSS_S_COMPLETE)
+		return gss_write_null_verf(rqstp);
+	rsci = gss_svc_searchbyctx(&rsip->out_handle);
+	if (rsci == NULL) {
+		rsip->major_status = GSS_S_NO_CONTEXT;
+		return gss_write_null_verf(rqstp);
+	}
+	return gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN);
+}
+
 /*
  * Accept an rpcsec packet.
  * If context establishment, punt to user space
@@ -876,11 +906,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 		case -ENOENT:
 			goto drop;
 		case 0:
-			rsci = gss_svc_searchbyctx(&rsip->out_handle);
-			if (!rsci) {
-				goto drop;
-			}
-			if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN))
+			if (gss_write_init_verf(rqstp, rsip))
 				goto drop;
 			if (resv->iov_len + 4 > PAGE_SIZE)
 				goto drop;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 890fb5ea0dc..1b3ed4fd198 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -70,7 +70,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	dprintk("RPC:      allocating UNIX cred for uid %d gid %d\n",
 				acred->uid, acred->gid);
 
-	if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL)))
+	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
 		return ERR_PTR(-ENOMEM);
 
 	atomic_set(&cred->uc_count, 1);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f509e999276..dcaa0c4453f 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -575,12 +575,11 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 	if (rp->q.list.next == &cd->queue) {
 		spin_unlock(&queue_lock);
 		up(&queue_io_sem);
-		if (rp->offset)
-			BUG();
+		BUG_ON(rp->offset);
 		return 0;
 	}
 	rq = container_of(rp->q.list.next, struct cache_request, q.list);
-	if (rq->q.reader) BUG();
+	BUG_ON(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
 	spin_unlock(&queue_lock);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 61c3abeacca..d2f0550c4ba 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -118,7 +118,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 		goto out_err;
 
 	err = -ENOMEM;
-	clnt = (struct rpc_clnt *) kmalloc(sizeof(*clnt), GFP_KERNEL);
+	clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
 	if (!clnt)
 		goto out_err;
 	memset(clnt, 0, sizeof(*clnt));
@@ -225,7 +225,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 {
 	struct rpc_clnt *new;
 
-	new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL);
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		goto out_no_clnt;
 	memcpy(new, clnt, sizeof(*new));
@@ -268,7 +268,8 @@ rpc_shutdown_client(struct rpc_clnt *clnt)
 		clnt->cl_oneshot = 0;
 		clnt->cl_dead = 0;
 		rpc_killall_tasks(clnt);
-		sleep_on_timeout(&destroy_wait, 1*HZ);
+		wait_event_timeout(destroy_wait,
+			!atomic_read(&clnt->cl_users), 1*HZ);
 	}
 
 	if (atomic_read(&clnt->cl_users) < 0) {
@@ -374,19 +375,23 @@ out:
  * Default callback for async RPC calls
  */
 static void
-rpc_default_callback(struct rpc_task *task)
+rpc_default_callback(struct rpc_task *task, void *data)
 {
 }
 
+static const struct rpc_call_ops rpc_default_ops = {
+	.rpc_call_done = rpc_default_callback,
+};
+
 /*
  *	Export the signal mask handling for synchronous code that
  *	sleeps on RPC calls
  */
-#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL))
+#define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
  
 static void rpc_save_sigmask(sigset_t *oldset, int intr)
 {
-	unsigned long	sigallow = 0;
+	unsigned long	sigallow = sigmask(SIGKILL);
 	sigset_t sigmask;
 
 	/* Block all signals except those listed in sigallow */
@@ -432,7 +437,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	BUG_ON(flags & RPC_TASK_ASYNC);
 
 	status = -ENOMEM;
-	task = rpc_new_task(clnt, NULL, flags);
+	task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
 	if (task == NULL)
 		goto out;
 
@@ -442,14 +447,15 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	rpc_call_setup(task, msg, 0);
 
 	/* Set up the call info struct and execute the task */
-	if (task->tk_status == 0) {
+	status = task->tk_status;
+	if (status == 0) {
+		atomic_inc(&task->tk_count);
 		status = rpc_execute(task);
-	} else {
-		status = task->tk_status;
-		rpc_release_task(task);
+		if (status == 0)
+			status = task->tk_status;
 	}
-
 	rpc_restore_sigmask(&oldset);
+	rpc_release_task(task);
 out:
 	return status;
 }
@@ -459,7 +465,7 @@ out:
  */
 int
 rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
-	       rpc_action callback, void *data)
+	       const struct rpc_call_ops *tk_ops, void *data)
 {
 	struct rpc_task	*task;
 	sigset_t	oldset;
@@ -472,12 +478,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	flags |= RPC_TASK_ASYNC;
 
 	/* Create/initialize a new RPC task */
-	if (!callback)
-		callback = rpc_default_callback;
 	status = -ENOMEM;
-	if (!(task = rpc_new_task(clnt, callback, flags)))
+	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
 		goto out;
-	task->tk_calldata = data;
 
 	/* Mask signals on GSS_AUTH upcalls */
 	rpc_task_sigmask(task, &oldset);		
@@ -511,7 +514,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 	if (task->tk_status == 0)
 		task->tk_action = call_start;
 	else
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 }
 
 void
@@ -536,6 +539,18 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL(rpc_max_payload);
 
+/**
+ * rpc_force_rebind - force transport to check that remote port is unchanged
+ * @clnt: client to rebind
+ *
+ */
+void rpc_force_rebind(struct rpc_clnt *clnt)
+{
+	if (clnt->cl_autobind)
+		clnt->cl_port = 0;
+}
+EXPORT_SYMBOL(rpc_force_rebind);
+
 /*
  * Restart an (async) RPC call. Usually called from within the
  * exit handler.
@@ -642,24 +657,26 @@ call_reserveresult(struct rpc_task *task)
 
 /*
  * 2.	Allocate the buffer. For details, see sched.c:rpc_malloc.
- *	(Note: buffer memory is freed in rpc_task_release).
+ *	(Note: buffer memory is freed in xprt_release).
  */
 static void
 call_allocate(struct rpc_task *task)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = task->tk_xprt;
 	unsigned int	bufsiz;
 
 	dprintk("RPC: %4d call_allocate (status %d)\n", 
 				task->tk_pid, task->tk_status);
 	task->tk_action = call_bind;
-	if (task->tk_buffer)
+	if (req->rq_buffer)
 		return;
 
 	/* FIXME: compute buffer requirements more exactly using
 	 * auth->au_wslack */
 	bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
 
-	if (rpc_malloc(task, bufsiz << 1) != NULL)
+	if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
 		return;
 	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 
 
@@ -702,14 +719,14 @@ call_encode(struct rpc_task *task)
 				task->tk_pid, task->tk_status);
 
 	/* Default buffer setup */
-	bufsiz = task->tk_bufsize >> 1;
-	sndbuf->head[0].iov_base = (void *)task->tk_buffer;
+	bufsiz = req->rq_bufsize >> 1;
+	sndbuf->head[0].iov_base = (void *)req->rq_buffer;
 	sndbuf->head[0].iov_len  = bufsiz;
 	sndbuf->tail[0].iov_len  = 0;
 	sndbuf->page_len	 = 0;
 	sndbuf->len		 = 0;
 	sndbuf->buflen		 = bufsiz;
-	rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
+	rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
 	rcvbuf->head[0].iov_len  = bufsiz;
 	rcvbuf->tail[0].iov_len  = 0;
 	rcvbuf->page_len	 = 0;
@@ -849,8 +866,7 @@ call_connect_status(struct rpc_task *task)
 	}
 
 	/* Something failed: remote service port may have changed */
-	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+	rpc_force_rebind(clnt);
 
 	switch (status) {
 	case -ENOTCONN:
@@ -892,7 +908,7 @@ call_transmit(struct rpc_task *task)
 	if (task->tk_status < 0)
 		return;
 	if (!task->tk_msg.rpc_proc->p_decode) {
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 		rpc_wake_up_task(task);
 	}
 	return;
@@ -931,8 +947,7 @@ call_status(struct rpc_task *task)
 		break;
 	case -ECONNREFUSED:
 	case -ENOTCONN:
-		if (clnt->cl_autobind)
-			clnt->cl_port = 0;
+		rpc_force_rebind(clnt);
 		task->tk_action = call_bind;
 		break;
 	case -EAGAIN:
@@ -943,8 +958,7 @@ call_status(struct rpc_task *task)
 		rpc_exit(task, status);
 		break;
 	default:
-		if (clnt->cl_chatty)
-			printk("%s: RPC call returned error %d\n",
+		printk("%s: RPC call returned error %d\n",
 			       clnt->cl_protname, -status);
 		rpc_exit(task, status);
 		break;
@@ -979,20 +993,18 @@ call_timeout(struct rpc_task *task)
 
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
 	if (RPC_IS_SOFT(task)) {
-		if (clnt->cl_chatty)
-			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
 		return;
 	}
 
-	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
+	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_protname, clnt->cl_server);
 	}
-	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+	rpc_force_rebind(clnt);
 
 retry:
 	clnt->cl_stats->rpcretrans++;
@@ -1014,7 +1026,7 @@ call_decode(struct rpc_task *task)
 	dprintk("RPC: %4d call_decode (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) {
+	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		printk(KERN_NOTICE "%s: server %s OK\n",
 			clnt->cl_protname, clnt->cl_server);
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
@@ -1039,13 +1051,14 @@ call_decode(struct rpc_task *task)
 				sizeof(req->rq_rcv_buf)) != 0);
 
 	/* Verify the RPC header */
-	if (!(p = call_verify(task))) {
-		if (task->tk_action == NULL)
-			return;
-		goto out_retry;
+	p = call_verify(task);
+	if (IS_ERR(p)) {
+		if (p == ERR_PTR(-EAGAIN))
+			goto out_retry;
+		return;
 	}
 
-	task->tk_action = NULL;
+	task->tk_action = rpc_exit_task;
 
 	if (decode)
 		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
@@ -1138,7 +1151,7 @@ call_verify(struct rpc_task *task)
 
 	if ((n = ntohl(*p++)) != RPC_REPLY) {
 		printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
-		goto out_retry;
+		goto out_garbage;
 	}
 	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
 		if (--len < 0)
@@ -1168,7 +1181,7 @@ call_verify(struct rpc_task *task)
 							task->tk_pid);
 			rpcauth_invalcred(task);
 			task->tk_action = call_refresh;
-			return NULL;
+			goto out_retry;
 		case RPC_AUTH_BADCRED:
 		case RPC_AUTH_BADVERF:
 			/* possibly garbled cred/verf? */
@@ -1178,7 +1191,7 @@ call_verify(struct rpc_task *task)
 			dprintk("RPC: %4d call_verify: retry garbled creds\n",
 							task->tk_pid);
 			task->tk_action = call_bind;
-			return NULL;
+			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
 			printk(KERN_NOTICE "call_verify: server requires stronger "
 			       "authentication.\n");
@@ -1193,7 +1206,7 @@ call_verify(struct rpc_task *task)
 	}
 	if (!(p = rpcauth_checkverf(task, p))) {
 		printk(KERN_WARNING "call_verify: auth check failed\n");
-		goto out_retry;		/* bad verifier, retry */
+		goto out_garbage;		/* bad verifier, retry */
 	}
 	len = p - (u32 *)iov->iov_base - 1;
 	if (len < 0)
@@ -1230,23 +1243,24 @@ call_verify(struct rpc_task *task)
 		/* Also retry */
 	}
 
-out_retry:
+out_garbage:
 	task->tk_client->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
 		dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
 		task->tk_action = call_bind;
-		return NULL;
+out_retry:
+		return ERR_PTR(-EAGAIN);
 	}
 	printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
 out_eio:
 	error = -EIO;
 out_err:
 	rpc_exit(task, error);
-	return NULL;
+	return ERR_PTR(error);
 out_overflow:
 	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
-	goto out_retry;
+	goto out_garbage;
 }
 
 static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index a398575f94b..8139ce68e91 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -90,8 +90,7 @@ bailout:
 	map->pm_binding = 0;
 	rpc_wake_up(&map->pm_bindwait);
 	spin_unlock(&pmap_lock);
-	task->tk_status = -EIO;
-	task->tk_action = NULL;
+	rpc_exit(task, -EIO);
 }
 
 #ifdef CONFIG_ROOT_NFS
@@ -132,21 +131,22 @@ static void
 pmap_getport_done(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_portmap *map = clnt->cl_pmap;
 
 	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
 			task->tk_pid, task->tk_status, clnt->cl_port);
+
+	xprt->ops->set_port(xprt, 0);
 	if (task->tk_status < 0) {
 		/* Make the calling task exit with an error */
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 	} else if (clnt->cl_port == 0) {
 		/* Program not registered */
-		task->tk_status = -EACCES;
-		task->tk_action = NULL;
+		rpc_exit(task, -EACCES);
 	} else {
-		/* byte-swap port number first */
+		xprt->ops->set_port(xprt, clnt->cl_port);
 		clnt->cl_port = htons(clnt->cl_port);
-		clnt->cl_xprt->addr.sin_port = clnt->cl_port;
 	}
 	spin_lock(&pmap_lock);
 	map->pm_binding = 0;
@@ -207,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 	xprt = xprt_create_proto(proto, srvaddr, NULL);
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
-	xprt->addr.sin_port = htons(RPC_PMAP_PORT);
+	xprt->ops->set_port(xprt, RPC_PMAP_PORT);
 	if (!privileged)
 		xprt->resvport = 0;
 
@@ -217,7 +217,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 				RPC_AUTH_UNIX);
 	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 	}
 	return clnt;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index e3b242daf53..9764c80ab0b 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -59,7 +59,6 @@ __rpc_purge_upcall(struct inode *inode, int err)
 	struct rpc_inode *rpci = RPC_I(inode);
 
 	__rpc_purge_list(rpci, &rpci->pipe, err);
-	__rpc_purge_list(rpci, &rpci->in_upcall, err);
 	rpci->pipelen = 0;
 	wake_up(&rpci->waitq);
 }
@@ -70,10 +69,13 @@ rpc_timeout_upcall_queue(void *data)
 	struct rpc_inode *rpci = (struct rpc_inode *)data;
 	struct inode *inode = &rpci->vfs_inode;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
+	if (rpci->ops == NULL)
+		goto out;
 	if (rpci->nreaders == 0 && !list_empty(&rpci->pipe))
 		__rpc_purge_upcall(inode, -ETIMEDOUT);
-	up(&inode->i_sem);
+out:
+	mutex_unlock(&inode->i_mutex);
 }
 
 int
@@ -82,7 +84,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
 	struct rpc_inode *rpci = RPC_I(inode);
 	int res = -EPIPE;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (rpci->ops == NULL)
 		goto out;
 	if (rpci->nreaders) {
@@ -98,7 +100,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
 		res = 0;
 	}
 out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	wake_up(&rpci->waitq);
 	return res;
 }
@@ -114,11 +116,10 @@ rpc_close_pipes(struct inode *inode)
 {
 	struct rpc_inode *rpci = RPC_I(inode);
 
-	cancel_delayed_work(&rpci->queue_timeout);
-	flush_scheduled_work();
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (rpci->ops != NULL) {
 		rpci->nreaders = 0;
+		__rpc_purge_list(rpci, &rpci->in_upcall, -EPIPE);
 		__rpc_purge_upcall(inode, -EPIPE);
 		rpci->nwriters = 0;
 		if (rpci->ops->release_pipe)
@@ -126,7 +127,9 @@ rpc_close_pipes(struct inode *inode)
 		rpci->ops = NULL;
 	}
 	rpc_inode_setowner(inode, NULL);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
+	cancel_delayed_work(&rpci->queue_timeout);
+	flush_scheduled_work();
 }
 
 static struct inode *
@@ -151,7 +154,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
 	struct rpc_inode *rpci = RPC_I(inode);
 	int res = -ENXIO;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (rpci->ops != NULL) {
 		if (filp->f_mode & FMODE_READ)
 			rpci->nreaders ++;
@@ -159,22 +162,22 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
 			rpci->nwriters ++;
 		res = 0;
 	}
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return res;
 }
 
 static int
 rpc_pipe_release(struct inode *inode, struct file *filp)
 {
-	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	struct rpc_inode *rpci = RPC_I(inode);
 	struct rpc_pipe_msg *msg;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (rpci->ops == NULL)
 		goto out;
 	msg = (struct rpc_pipe_msg *)filp->private_data;
 	if (msg != NULL) {
-		msg->errno = -EPIPE;
+		msg->errno = -EAGAIN;
 		list_del_init(&msg->list);
 		rpci->ops->destroy_msg(msg);
 	}
@@ -183,11 +186,11 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
 	if (filp->f_mode & FMODE_READ)
 		rpci->nreaders --;
 	if (!rpci->nreaders)
-		__rpc_purge_upcall(inode, -EPIPE);
+		__rpc_purge_upcall(inode, -EAGAIN);
 	if (rpci->ops->release_pipe)
 		rpci->ops->release_pipe(inode);
 out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return 0;
 }
 
@@ -199,7 +202,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 	struct rpc_pipe_msg *msg;
 	int res = 0;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	if (rpci->ops == NULL) {
 		res = -EPIPE;
 		goto out_unlock;
@@ -226,7 +229,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 		rpci->ops->destroy_msg(msg);
 	}
 out_unlock:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return res;
 }
 
@@ -237,11 +240,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
 	struct rpc_inode *rpci = RPC_I(inode);
 	int res;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	res = -EPIPE;
 	if (rpci->ops != NULL)
 		res = rpci->ops->downcall(filp, buf, len);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return res;
 }
 
@@ -319,7 +322,7 @@ rpc_info_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		clnt = RPC_I(inode)->private;
 		if (clnt) {
 			atomic_inc(&clnt->cl_users);
@@ -328,7 +331,7 @@ rpc_info_open(struct inode *inode, struct file *file)
 			single_release(inode, file);
 			ret = -EINVAL;
 		}
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 	return ret;
 }
@@ -488,11 +491,11 @@ rpc_depopulate(struct dentry *parent)
 	struct dentry *dentry, *dvec[10];
 	int n = 0;
 
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 repeat:
 	spin_lock(&dcache_lock);
 	list_for_each_safe(pos, next, &parent->d_subdirs) {
-		dentry = list_entry(pos, struct dentry, d_child);
+		dentry = list_entry(pos, struct dentry, d_u.d_child);
 		spin_lock(&dentry->d_lock);
 		if (!d_unhashed(dentry)) {
 			dget_locked(dentry);
@@ -516,7 +519,7 @@ repeat:
 		} while (n);
 		goto repeat;
 	}
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 }
 
 static int
@@ -529,7 +532,7 @@ rpc_populate(struct dentry *parent,
 	struct dentry *dentry;
 	int mode, i;
 
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 	for (i = start; i < eof; i++) {
 		dentry = d_alloc_name(parent, files[i].name);
 		if (!dentry)
@@ -549,10 +552,10 @@ rpc_populate(struct dentry *parent,
 			dir->i_nlink++;
 		d_add(dentry, inode);
 	}
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	return 0;
 out_bad:
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
 			__FILE__, __FUNCTION__, parent->d_name.name);
 	return -ENOMEM;
@@ -606,7 +609,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
 	if ((error = rpc_lookup_parent(path, nd)) != 0)
 		return ERR_PTR(error);
 	dir = nd->dentry->d_inode;
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 	dentry = lookup_hash(nd);
 	if (IS_ERR(dentry))
 		goto out_err;
@@ -617,7 +620,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
 	}
 	return dentry;
 out_err:
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(nd);
 	return dentry;
 }
@@ -643,7 +646,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
 	if (error)
 		goto err_depopulate;
 out:
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
 	return dentry;
 err_depopulate:
@@ -668,7 +671,7 @@ rpc_rmdir(char *path)
 	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 	dentry = lookup_hash(&nd);
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
@@ -678,7 +681,7 @@ rpc_rmdir(char *path)
 	error = __rpc_rmdir(dir, dentry);
 	dput(dentry);
 out_release:
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
 	return error;
 }
@@ -707,7 +710,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
 	rpci->ops = ops;
 	inode_dir_notify(dir, DN_CREATE);
 out:
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
 	return dentry;
 err_dput:
@@ -729,7 +732,7 @@ rpc_unlink(char *path)
 	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
-	down(&dir->i_sem);
+	mutex_lock(&dir->i_mutex);
 	dentry = lookup_hash(&nd);
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
@@ -743,7 +746,7 @@ rpc_unlink(char *path)
 	dput(dentry);
 	inode_dir_notify(dir, DN_DELETE);
 out_release:
-	up(&dir->i_sem);
+	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
 	return error;
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 54e60a65750..7415406aa1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,8 +41,6 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-static void			rpc_free(struct rpc_task *task);
-
 static void			rpc_async_schedule(void *);
 
 /*
@@ -264,6 +262,35 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 }
 EXPORT_SYMBOL(rpc_init_wait_queue);
 
+static int rpc_wait_bit_interruptible(void *word)
+{
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	schedule();
+	return 0;
+}
+
+/*
+ * Mark an RPC call as having completed by clearing the 'active' bit
+ */
+static inline void rpc_mark_complete_task(struct rpc_task *task)
+{
+	rpc_clear_active(task);
+	wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+}
+
+/*
+ * Allow callers to wait for completion of an RPC call
+ */
+int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
+{
+	if (action == NULL)
+		action = rpc_wait_bit_interruptible;
+	return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+			action, TASK_INTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__rpc_wait_for_completion_task);
+
 /*
  * Make an RPC task runnable.
  *
@@ -299,10 +326,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 static inline void
 rpc_schedule_run(struct rpc_task *task)
 {
-	/* Don't run a child twice! */
-	if (RPC_IS_ACTIVATED(task))
-		return;
-	task->tk_active = 1;
+	rpc_set_active(task);
 	rpc_make_runnable(task);
 }
 
@@ -324,8 +348,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	}
 
 	/* Mark the task as being activated if so needed */
-	if (!RPC_IS_ACTIVATED(task))
-		task->tk_active = 1;
+	rpc_set_active(task);
 
 	__rpc_add_wait_queue(q, task);
 
@@ -555,36 +578,29 @@ __rpc_atrun(struct rpc_task *task)
 }
 
 /*
- * Helper that calls task->tk_exit if it exists and then returns
- * true if we should exit __rpc_execute.
+ * Helper to call task->tk_ops->rpc_call_prepare
  */
-static inline int __rpc_do_exit(struct rpc_task *task)
+static void rpc_prepare_task(struct rpc_task *task)
 {
-	if (task->tk_exit != NULL) {
-		lock_kernel();
-		task->tk_exit(task);
-		unlock_kernel();
-		/* If tk_action is non-null, we should restart the call */
-		if (task->tk_action != NULL) {
-			if (!RPC_ASSASSINATED(task)) {
-				/* Release RPC slot and buffer memory */
-				xprt_release(task);
-				rpc_free(task);
-				return 0;
-			}
-			printk(KERN_ERR "RPC: dead task tried to walk away.\n");
-		}
-	}
-	return 1;
+	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
 }
 
-static int rpc_wait_bit_interruptible(void *word)
+/*
+ * Helper that calls task->tk_ops->rpc_call_done if it exists
+ */
+void rpc_exit_task(struct rpc_task *task)
 {
-	if (signal_pending(current))
-		return -ERESTARTSYS;
-	schedule();
-	return 0;
+	task->tk_action = NULL;
+	if (task->tk_ops->rpc_call_done != NULL) {
+		task->tk_ops->rpc_call_done(task, task->tk_calldata);
+		if (task->tk_action != NULL) {
+			WARN_ON(RPC_ASSASSINATED(task));
+			/* Always release the RPC slot and buffer memory */
+			xprt_release(task);
+		}
+	}
 }
+EXPORT_SYMBOL(rpc_exit_task);
 
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
@@ -631,12 +647,11 @@ static int __rpc_execute(struct rpc_task *task)
 		 * by someone else.
 		 */
 		if (!RPC_IS_QUEUED(task)) {
-			if (task->tk_action != NULL) {
-				lock_kernel();
-				task->tk_action(task);
-				unlock_kernel();
-			} else if (__rpc_do_exit(task))
+			if (task->tk_action == NULL)
 				break;
+			lock_kernel();
+			task->tk_action(task);
+			unlock_kernel();
 		}
 
 		/*
@@ -676,9 +691,9 @@ static int __rpc_execute(struct rpc_task *task)
 		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 	}
 
-	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
-	status = task->tk_status;
-
+	dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
+	/* Wake up anyone who is waiting for task completion */
+	rpc_mark_complete_task(task);
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
 	return status;
@@ -696,9 +711,7 @@ static int __rpc_execute(struct rpc_task *task)
 int
 rpc_execute(struct rpc_task *task)
 {
-	BUG_ON(task->tk_active);
-
-	task->tk_active = 1;
+	rpc_set_active(task);
 	rpc_set_running(task);
 	return __rpc_execute(task);
 }
@@ -708,17 +721,19 @@ static void rpc_async_schedule(void *arg)
 	__rpc_execute((struct rpc_task *)arg);
 }
 
-/*
- * Allocate memory for RPC purposes.
+/**
+ * rpc_malloc - allocate an RPC buffer
+ * @task: RPC task that will use this buffer
+ * @size: requested byte size
  *
  * We try to ensure that some NFS reads and writes can always proceed
  * by using a mempool when allocating 'small' buffers.
  * In order to avoid memory starvation triggering more writebacks of
  * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
  */
-void *
-rpc_malloc(struct rpc_task *task, size_t size)
+void * rpc_malloc(struct rpc_task *task, size_t size)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
 	gfp_t	gfp;
 
 	if (task->tk_flags & RPC_TASK_SWAPPER)
@@ -727,42 +742,52 @@ rpc_malloc(struct rpc_task *task, size_t size)
 		gfp = GFP_NOFS;
 
 	if (size > RPC_BUFFER_MAXSIZE) {
-		task->tk_buffer =  kmalloc(size, gfp);
-		if (task->tk_buffer)
-			task->tk_bufsize = size;
+		req->rq_buffer = kmalloc(size, gfp);
+		if (req->rq_buffer)
+			req->rq_bufsize = size;
 	} else {
-		task->tk_buffer =  mempool_alloc(rpc_buffer_mempool, gfp);
-		if (task->tk_buffer)
-			task->tk_bufsize = RPC_BUFFER_MAXSIZE;
+		req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
+		if (req->rq_buffer)
+			req->rq_bufsize = RPC_BUFFER_MAXSIZE;
 	}
-	return task->tk_buffer;
+	return req->rq_buffer;
 }
 
-static void
-rpc_free(struct rpc_task *task)
+/**
+ * rpc_free - free buffer allocated via rpc_malloc
+ * @task: RPC task with a buffer to be freed
+ *
+ */
+void rpc_free(struct rpc_task *task)
 {
-	if (task->tk_buffer) {
-		if (task->tk_bufsize == RPC_BUFFER_MAXSIZE)
-			mempool_free(task->tk_buffer, rpc_buffer_mempool);
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (req->rq_buffer) {
+		if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
+			mempool_free(req->rq_buffer, rpc_buffer_mempool);
 		else
-			kfree(task->tk_buffer);
-		task->tk_buffer = NULL;
-		task->tk_bufsize = 0;
+			kfree(req->rq_buffer);
+		req->rq_buffer = NULL;
+		req->rq_bufsize = 0;
 	}
 }
 
 /*
  * Creation and deletion of RPC task structures
  */
-void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags)
+void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	memset(task, 0, sizeof(*task));
 	init_timer(&task->tk_timer);
 	task->tk_timer.data     = (unsigned long) task;
 	task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+	atomic_set(&task->tk_count, 1);
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
-	task->tk_exit   = callback;
+	task->tk_ops = tk_ops;
+	if (tk_ops->rpc_call_prepare != NULL)
+		task->tk_action = rpc_prepare_task;
+	task->tk_calldata = calldata;
 
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
@@ -791,6 +816,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 	list_add_tail(&task->tk_task, &all_tasks);
 	spin_unlock(&rpc_sched_lock);
 
+	BUG_ON(task->tk_ops == NULL);
+
 	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
 				current->pid);
 }
@@ -801,8 +828,7 @@ rpc_alloc_task(void)
 	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
-static void
-rpc_default_free_task(struct rpc_task *task)
+static void rpc_free_task(struct rpc_task *task)
 {
 	dprintk("RPC: %4d freeing task\n", task->tk_pid);
 	mempool_free(task, rpc_task_mempool);
@@ -813,8 +839,7 @@ rpc_default_free_task(struct rpc_task *task)
  * clean up after an allocation failure, as the client may
  * have specified "oneshot".
  */
-struct rpc_task *
-rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
+struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	struct rpc_task	*task;
 
@@ -822,10 +847,7 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
 	if (!task)
 		goto cleanup;
 
-	rpc_init_task(task, clnt, callback, flags);
-
-	/* Replace tk_release */
-	task->tk_release = rpc_default_free_task;
+	rpc_init_task(task, clnt, flags, tk_ops, calldata);
 
 	dprintk("RPC: %4d allocated task\n", task->tk_pid);
 	task->tk_flags |= RPC_TASK_DYNAMIC;
@@ -845,11 +867,15 @@ cleanup:
 
 void rpc_release_task(struct rpc_task *task)
 {
-	dprintk("RPC: %4d release task\n", task->tk_pid);
+	const struct rpc_call_ops *tk_ops = task->tk_ops;
+	void *calldata = task->tk_calldata;
 
 #ifdef RPC_DEBUG
 	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
 #endif
+	if (!atomic_dec_and_test(&task->tk_count))
+		return;
+	dprintk("RPC: %4d release task\n", task->tk_pid);
 
 	/* Remove from global task list */
 	spin_lock(&rpc_sched_lock);
@@ -857,7 +883,6 @@ void rpc_release_task(struct rpc_task *task)
 	spin_unlock(&rpc_sched_lock);
 
 	BUG_ON (RPC_IS_QUEUED(task));
-	task->tk_active = 0;
 
 	/* Synchronously delete any running timer */
 	rpc_delete_timer(task);
@@ -867,7 +892,6 @@ void rpc_release_task(struct rpc_task *task)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
 		rpcauth_unbindcred(task);
-	rpc_free(task);
 	if (task->tk_client) {
 		rpc_release_client(task->tk_client);
 		task->tk_client = NULL;
@@ -876,11 +900,34 @@ void rpc_release_task(struct rpc_task *task)
 #ifdef RPC_DEBUG
 	task->tk_magic = 0;
 #endif
-	if (task->tk_release)
-		task->tk_release(task);
+	if (task->tk_flags & RPC_TASK_DYNAMIC)
+		rpc_free_task(task);
+	if (tk_ops->rpc_release)
+		tk_ops->rpc_release(calldata);
 }
 
 /**
+ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ * @clnt - pointer to RPC client
+ * @flags - RPC flags
+ * @ops - RPC call ops
+ * @data - user call data
+ */
+struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+					const struct rpc_call_ops *ops,
+					void *data)
+{
+	struct rpc_task *task;
+	task = rpc_new_task(clnt, flags, ops, data);
+	if (task == NULL)
+		return ERR_PTR(-ENOMEM);
+	atomic_inc(&task->tk_count);
+	rpc_execute(task);
+	return task;
+}
+EXPORT_SYMBOL(rpc_run_task);
+
+/**
  * rpc_find_parent - find the parent of a child task.
  * @child: child task
  *
@@ -890,12 +937,11 @@ void rpc_release_task(struct rpc_task *task)
  *
  * Caller must hold childq.lock
  */
-static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
+static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
 {
-	struct rpc_task	*task, *parent;
+	struct rpc_task	*task;
 	struct list_head *le;
 
-	parent = (struct rpc_task *) child->tk_calldata;
 	task_for_each(task, le, &childq.tasks[0])
 		if (task == parent)
 			return parent;
@@ -903,18 +949,22 @@ static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
 	return NULL;
 }
 
-static void rpc_child_exit(struct rpc_task *child)
+static void rpc_child_exit(struct rpc_task *child, void *calldata)
 {
 	struct rpc_task	*parent;
 
 	spin_lock_bh(&childq.lock);
-	if ((parent = rpc_find_parent(child)) != NULL) {
+	if ((parent = rpc_find_parent(child, calldata)) != NULL) {
 		parent->tk_status = child->tk_status;
 		__rpc_wake_up_task(parent);
 	}
 	spin_unlock_bh(&childq.lock);
 }
 
+static const struct rpc_call_ops rpc_child_ops = {
+	.rpc_call_done = rpc_child_exit,
+};
+
 /*
  * Note: rpc_new_task releases the client after a failure.
  */
@@ -923,11 +973,9 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
 {
 	struct rpc_task	*task;
 
-	task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
+	task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
 	if (!task)
 		goto fail;
-	task->tk_exit = rpc_child_exit;
-	task->tk_calldata = parent;
 	return task;
 
 fail:
@@ -1063,7 +1111,7 @@ void rpc_show_tasks(void)
 		return;
 	}
 	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
-		"-rpcwait -action- --exit--\n");
+		"-rpcwait -action- ---ops--\n");
 	alltask_for_each(t, le, &all_tasks) {
 		const char *rpc_waitq = "none";
 
@@ -1078,7 +1126,7 @@ void rpc_show_tasks(void)
 			(t->tk_client ? t->tk_client->cl_prog : 0),
 			t->tk_rqstp, t->tk_timeout,
 			rpc_waitq,
-			t->tk_action, t->tk_exit);
+			t->tk_action, t->tk_ops);
 	}
 	spin_unlock(&rpc_sched_lock);
 }
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index a03d4b600c9..9f737320359 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -30,8 +30,6 @@ EXPORT_SYMBOL(rpc_init_task);
 EXPORT_SYMBOL(rpc_sleep_on);
 EXPORT_SYMBOL(rpc_wake_up_next);
 EXPORT_SYMBOL(rpc_wake_up_task);
-EXPORT_SYMBOL(rpc_new_child);
-EXPORT_SYMBOL(rpc_run_child);
 EXPORT_SYMBOL(rpciod_down);
 EXPORT_SYMBOL(rpciod_up);
 EXPORT_SYMBOL(rpc_new_task);
@@ -45,7 +43,6 @@ EXPORT_SYMBOL(rpc_clone_client);
 EXPORT_SYMBOL(rpc_bind_new_program);
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
-EXPORT_SYMBOL(rpc_release_client);
 EXPORT_SYMBOL(rpc_killall_tasks);
 EXPORT_SYMBOL(rpc_call_sync);
 EXPORT_SYMBOL(rpc_call_async);
@@ -120,7 +117,6 @@ EXPORT_SYMBOL(unix_domain_find);
 
 /* Generic XDR */
 EXPORT_SYMBOL(xdr_encode_string);
-EXPORT_SYMBOL(xdr_decode_string);
 EXPORT_SYMBOL(xdr_decode_string_inplace);
 EXPORT_SYMBOL(xdr_decode_netobj);
 EXPORT_SYMBOL(xdr_encode_netobj);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e4296c8b861..b08419e1fc6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -32,7 +32,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
 	int vers;
 	unsigned int xdrsize;
 
-	if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
+	if (!(serv = kmalloc(sizeof(*serv), GFP_KERNEL)))
 		return NULL;
 	memset(serv, 0, sizeof(*serv));
 	serv->sv_name      = prog->pg_name;
@@ -122,8 +122,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
 	rqstp->rq_argused = 0;
 	rqstp->rq_resused = 0;
 	arghi = 0;
-	if (pages > RPCSVC_MAXPAGES)
-		BUG();
+	BUG_ON(pages > RPCSVC_MAXPAGES);
 	while (pages) {
 		struct page *p = alloc_page(GFP_KERNEL);
 		if (!p)
@@ -167,8 +166,8 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
 	memset(rqstp, 0, sizeof(*rqstp));
 	init_waitqueue_head(&rqstp->rq_wait);
 
-	if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
-	 || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
+	if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
+	 || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
 	 || !svc_init_buffer(rqstp, serv->sv_bufsz))
 		goto out_thread;
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index cac2e774dd8..3e6c694bbad 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -101,10 +101,22 @@ static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
 	}
 }
 
+#if IP_HASHBITS == 8
+/* hash_long on a 64 bit machine is currently REALLY BAD for
+ * IP addresses in reverse-endian (i.e. on a little-endian machine).
+ * So use a trivial but reliable hash instead
+ */
+static inline int hash_ip(unsigned long ip)
+{
+	int hash = ip ^ (ip>>16);
+	return (hash ^ (hash>>8)) & 0xff;
+}
+#endif
+
 static inline int ip_map_hash(struct ip_map *item)
 {
 	return hash_str(item->m_class, IP_HASHBITS) ^ 
-		hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS);
+		hash_ip((unsigned long)item->m_addr.s_addr);
 }
 static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp)
 {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c6a51911e71..50580620e89 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk)
 	struct svc_serv	*serv = svsk->sk_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
-	struct proto_ops *ops;
+	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
 
@@ -1026,7 +1026,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	} else {
 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 					svsk->sk_server->sv_name, -len);
-		svc_sock_received(svsk);
+		goto err_delete;
 	}
 
 	return len;
@@ -1527,6 +1527,7 @@ svc_defer(struct cache_req *req)
 		dr->handle.owner = rqstp->rq_server;
 		dr->prot = rqstp->rq_prot;
 		dr->addr = rqstp->rq_addr;
+		dr->daddr = rqstp->rq_daddr;
 		dr->argslen = rqstp->rq_arg.len >> 2;
 		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
 	}
@@ -1552,6 +1553,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
 	rqstp->rq_arg.len = dr->argslen<<2;
 	rqstp->rq_prot        = dr->prot;
 	rqstp->rq_addr        = dr->addr;
+	rqstp->rq_daddr       = dr->daddr;
 	return dr->argslen<<2;
 }
 
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index aaf08cdd19f..ca4bfa57e11 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -93,27 +93,6 @@ xdr_encode_string(u32 *p, const char *string)
 }
 
 u32 *
-xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
-{
-	unsigned int	len;
-	char		*string;
-
-	if ((len = ntohl(*p++)) > maxlen)
-		return NULL;
-	if (lenp)
-		*lenp = len;
-	if ((len % 4) != 0) {
-		string = (char *) p;
-	} else {
-		string = (char *) (p - 1);
-		memmove(string, p, len);
-	}
-	string[len] = '\0';
-	*sp = string;
-	return p + XDR_QUADLEN(len);
-}
-
-u32 *
 xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
 {
 	unsigned int	len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6dda3860351..8ff2c8acb22 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -119,6 +119,17 @@ out_sleep:
 	return 0;
 }
 
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+	xprt->snd_task = NULL;
+	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) {
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_LOCKED, &xprt->state);
+		smp_mb__after_clear_bit();
+	} else
+		schedule_work(&xprt->task_cleanup);
+}
+
 /*
  * xprt_reserve_xprt_cong - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -145,9 +156,7 @@ int xprt_reserve_xprt_cong(struct rpc_task *task)
 		}
 		return 1;
 	}
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 out_sleep:
 	dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt);
 	task->tk_timeout = 0;
@@ -193,9 +202,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
 	return;
 
 out_unlock:
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 }
 
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
@@ -222,9 +229,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 		return;
 	}
 out_unlock:
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 }
 
 /**
@@ -237,10 +242,7 @@ out_unlock:
 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt->snd_task = NULL;
-		smp_mb__before_clear_bit();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		xprt_clear_locked(xprt);
 		__xprt_lock_write_next(xprt);
 	}
 }
@@ -256,10 +258,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt->snd_task = NULL;
-		smp_mb__before_clear_bit();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		xprt_clear_locked(xprt);
 		__xprt_lock_write_next_cong(xprt);
 	}
 }
@@ -535,10 +534,6 @@ void xprt_connect(struct rpc_task *task)
 	dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
 			xprt, (xprt_connected(xprt) ? "is" : "is not"));
 
-	if (xprt->shutdown) {
-		task->tk_status = -EIO;
-		return;
-	}
 	if (!xprt->addr.sin_port) {
 		task->tk_status = -EIO;
 		return;
@@ -687,9 +682,6 @@ int xprt_prepare_transmit(struct rpc_task *task)
 
 	dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid);
 
-	if (xprt->shutdown)
-		return -EIO;
-
 	spin_lock_bh(&xprt->transport_lock);
 	if (req->rq_received && !req->rq_bytes_sent) {
 		err = req->rq_received;
@@ -814,11 +806,9 @@ void xprt_reserve(struct rpc_task *task)
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
 	task->tk_status = -EIO;
-	if (!xprt->shutdown) {
-		spin_lock(&xprt->reserve_lock);
-		do_xprt_reserve(task);
-		spin_unlock(&xprt->reserve_lock);
-	}
+	spin_lock(&xprt->reserve_lock);
+	do_xprt_reserve(task);
+	spin_unlock(&xprt->reserve_lock);
 }
 
 static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -838,6 +828,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 	req->rq_timeout = xprt->timeout.to_initval;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
+	req->rq_buffer  = NULL;
+	req->rq_bufsize = 0;
 	req->rq_xid     = xprt_alloc_xid(xprt);
 	req->rq_release_snd_buf = NULL;
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
@@ -863,10 +855,11 @@ void xprt_release(struct rpc_task *task)
 	if (!list_empty(&req->rq_list))
 		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
-	if (list_empty(&xprt->recv) && !xprt->shutdown)
+	if (list_empty(&xprt->recv))
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
+	xprt->ops->buf_free(task);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
@@ -974,16 +967,6 @@ struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rp
 	return xprt;
 }
 
-static void xprt_shutdown(struct rpc_xprt *xprt)
-{
-	xprt->shutdown = 1;
-	rpc_wake_up(&xprt->sending);
-	rpc_wake_up(&xprt->resend);
-	xprt_wake_pending_tasks(xprt, -EIO);
-	rpc_wake_up(&xprt->backlog);
-	del_timer_sync(&xprt->timer);
-}
-
 /**
  * xprt_destroy - destroy an RPC transport, killing off all requests.
  * @xprt: transport to destroy
@@ -992,7 +975,8 @@ static void xprt_shutdown(struct rpc_xprt *xprt)
 int xprt_destroy(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:      destroying transport %p\n", xprt);
-	xprt_shutdown(xprt);
+	xprt->shutdown = 1;
+	del_timer_sync(&xprt->timer);
 	xprt->ops->destroy(xprt);
 	kfree(xprt);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0a51fd46a84..c458f8d1d6d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -28,6 +28,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
 #include <linux/file.h>
 
 #include <net/sock.h>
@@ -424,7 +425,7 @@ static void xs_close(struct rpc_xprt *xprt)
 	struct sock *sk = xprt->inet;
 
 	if (!sk)
-		return;
+		goto clear_close_wait;
 
 	dprintk("RPC:      xs_close xprt %p\n", xprt);
 
@@ -441,6 +442,10 @@ static void xs_close(struct rpc_xprt *xprt)
 	sk->sk_no_check = 0;
 
 	sock_release(sock);
+clear_close_wait:
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+	smp_mb__after_clear_bit();
 }
 
 /**
@@ -800,9 +805,13 @@ static void xs_tcp_state_change(struct sock *sk)
 	case TCP_SYN_SENT:
 	case TCP_SYN_RECV:
 		break;
+	case TCP_CLOSE_WAIT:
+		/* Try to schedule an autoclose RPC calls */
+		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+		if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+			schedule_work(&xprt->task_cleanup);
 	default:
 		xprt_disconnect(xprt);
-		break;
 	}
  out:
 	read_unlock(&sk->sk_callback_lock);
@@ -920,6 +929,18 @@ static void xs_udp_timer(struct rpc_task *task)
 	xprt_adjust_cwnd(task, -ETIMEDOUT);
 }
 
+/**
+ * xs_set_port - reset the port number in the remote endpoint address
+ * @xprt: generic transport
+ * @port: new port number
+ *
+ */
+static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
+{
+	dprintk("RPC:      setting port for xprt %p to %u\n", xprt, port);
+	xprt->addr.sin_port = htons(port);
+}
+
 static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
@@ -990,6 +1011,7 @@ static void xs_udp_connect_worker(void *args)
 		sk->sk_data_ready = xs_udp_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
 		sk->sk_no_check = UDP_CSUM_NORCV;
+		sk->sk_allocation = GFP_ATOMIC;
 
 		xprt_set_connected(xprt);
 
@@ -1074,6 +1096,7 @@ static void xs_tcp_connect_worker(void *args)
 		sk->sk_data_ready = xs_tcp_data_ready;
 		sk->sk_state_change = xs_tcp_state_change;
 		sk->sk_write_space = xs_tcp_write_space;
+		sk->sk_allocation = GFP_ATOMIC;
 
 		/* socket options */
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
@@ -1158,7 +1181,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
+	.set_port		= xs_set_port,
 	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
 	.send_request		= xs_udp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_rtt,
 	.timer			= xs_udp_timer,
@@ -1170,7 +1196,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
 static struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
+	.set_port		= xs_set_port,
 	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_close,
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
new file mode 100644
index 00000000000..3891cc00087
--- /dev/null
+++ b/net/tipc/Kconfig
@@ -0,0 +1,117 @@
+#
+# TIPC configuration
+#
+
+menu "TIPC Configuration (EXPERIMENTAL)"
+	depends on INET && EXPERIMENTAL
+
+config TIPC
+	tristate "The TIPC Protocol (EXPERIMENTAL)"
+	---help---
+	  The Transparent Inter Process Communication (TIPC) protocol is
+	  specially designed for intra cluster communication. This protocol
+	  originates from Ericsson where it has been used in carrier grade
+	  cluster applications for many years.
+	
+	  For more information about TIPC, see http://tipc.sourceforge.net.
+
+	  This protocol support is also available as a module ( = code which
+	  can be inserted in and removed from the running kernel whenever you
+	  want). The module will be called tipc. If you want to compile it
+	  as a module, say M here and read <file:Documentation/modules.txt>.
+
+	  If in doubt, say N.
+
+config TIPC_ADVANCED
+	bool "TIPC: Advanced configuration"
+	depends on TIPC
+	default n
+	help
+	  Saying Y here will open some advanced configuration
+          for TIPC. Most users do not need to bother, so if
+          unsure, just say N.
+
+config TIPC_ZONES
+	int "Maximum number of zones in network"
+	depends on TIPC && TIPC_ADVANCED
+	default "3"
+	help
+	 Max number of zones inside TIPC network. Max supported value 
+         is 255 zones, minimum is 1
+
+	 Default is 3 zones in a network; setting this to higher
+	 allows more zones but might use more memory.
+
+config TIPC_CLUSTERS
+	int "Maximum number of clusters in a zone"
+	depends on TIPC && TIPC_ADVANCED
+	default "1"
+	help
+          ***Only 1 (one cluster in a zone) is supported by current code.
+          Any value set here will be overridden.***
+
+          (Max number of clusters inside TIPC zone. Max supported 
+          value is 4095 clusters, minimum is 1.
+
+	  Default is 1; setting this to smaller value might save 
+          some memory, setting it to higher
+	  allows more clusters and might consume more memory.)
+
+config TIPC_NODES
+	int "Maximum number of nodes in cluster"
+	depends on TIPC && TIPC_ADVANCED
+	default "255"
+	help
+	  Maximum number of nodes inside a TIPC cluster. Maximum 
+          supported value is 2047 nodes, minimum is 8. 
+
+	  Setting this to a smaller value saves some memory, 
+	  setting it to higher allows more nodes.
+
+config TIPC_SLAVE_NODES
+	int "Maximum number of slave nodes in cluster"
+	depends on TIPC && TIPC_ADVANCED
+	default "0"
+	help
+          ***This capability is not supported by current code.***
+	  
+	  Maximum number of slave nodes inside a TIPC cluster. Maximum 
+          supported value is 2047 nodes, minimum is 0. 
+
+	  Setting this to a smaller value saves some memory, 
+	  setting it to higher allows more nodes.
+
+config TIPC_PORTS
+	int "Maximum number of ports in a node"
+	depends on TIPC && TIPC_ADVANCED
+	default "8191"
+	help
+	  Maximum number of ports within a node. Maximum 
+          supported value is 64535 nodes, minimum is 127. 
+
+	  Setting this to a smaller value saves some memory, 
+	  setting it to higher allows more ports.
+
+config TIPC_LOG
+	int "Size of log buffer"
+	depends on TIPC && TIPC_ADVANCED
+	default 0
+	help
+ 	  Size (in bytes) of TIPC's internal log buffer, which records the
+	  occurrence of significant events.  Maximum supported value
+	  is 32768 bytes, minimum is 0.
+
+	  There is no need to enable the log buffer unless the node will be
+	  managed remotely via TIPC.
+
+config TIPC_DEBUG
+	bool "Enable debugging support"
+	depends on TIPC
+	default n
+	help
+ 	  This will enable debugging of TIPC.
+
+	  Only say Y here if you are having trouble with TIPC.  It will
+	  enable the display of detailed information about what is going on.
+
+endmenu
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
new file mode 100644
index 00000000000..dceb7027946
--- /dev/null
+++ b/net/tipc/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for the Linux TIPC layer
+#
+
+obj-$(CONFIG_TIPC) := tipc.o
+
+tipc-y	+= addr.o bcast.o bearer.o config.o cluster.o \
+	   core.o handler.o link.o discover.o msg.o  \
+	   name_distr.o  subscr.o name_table.o net.o  \
+	   netlink.o node.o node_subscr.o port.o ref.o  \
+	   socket.o user_reg.o zone.o dbg.o eth_media.o
+
+# End of file
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
new file mode 100644
index 00000000000..0be25e175b9
--- /dev/null
+++ b/net/tipc/addr.c
@@ -0,0 +1,94 @@
+/*
+ * net/tipc/addr.c: TIPC address utility routines
+ *     
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "dbg.h"
+#include "addr.h"
+#include "zone.h"
+#include "cluster.h"
+#include "net.h"
+
+u32 tipc_get_addr(void)
+{
+	return tipc_own_addr;
+}
+
+/**
+ * tipc_addr_domain_valid - validates a network domain address
+ * 
+ * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, 
+ * where Z, C, and N are non-zero and do not exceed the configured limits.
+ * 
+ * Returns 1 if domain address is valid, otherwise 0
+ */
+
+int tipc_addr_domain_valid(u32 addr)
+{
+	u32 n = tipc_node(addr);
+	u32 c = tipc_cluster(addr);
+	u32 z = tipc_zone(addr);
+	u32 max_nodes = tipc_max_nodes;
+
+	if (is_slave(addr))
+		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
+	if (n > max_nodes)
+		return 0;
+	if (c > tipc_max_clusters)
+		return 0;
+	if (z > tipc_max_zones)
+		return 0;
+
+	if (n && (!z || !c))
+		return 0;
+	if (c && !z)
+		return 0;
+	return 1;
+}
+
+/**
+ * tipc_addr_node_valid - validates a proposed network address for this node
+ * 
+ * Accepts <Z.C.N>, where Z, C, and N are non-zero and do not exceed 
+ * the configured limits.
+ * 
+ * Returns 1 if address can be used, otherwise 0
+ */
+
+int tipc_addr_node_valid(u32 addr)
+{
+	return (tipc_addr_domain_valid(addr) && tipc_node(addr));
+}
+
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
new file mode 100644
index 00000000000..bcfebb3cbbf
--- /dev/null
+++ b/net/tipc/addr.h
@@ -0,0 +1,128 @@
+/*
+ * net/tipc/addr.h: Include file for TIPC address utility routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_ADDR_H
+#define _TIPC_ADDR_H
+
+static inline u32 own_node(void)
+{
+	return tipc_node(tipc_own_addr);
+}
+
+static inline u32 own_cluster(void)
+{
+	return tipc_cluster(tipc_own_addr);
+}
+
+static inline u32 own_zone(void)
+{
+	return tipc_zone(tipc_own_addr);
+}
+
+static inline int in_own_cluster(u32 addr)
+{
+	return !((addr ^ tipc_own_addr) >> 12);
+}
+
+static inline int in_own_zone(u32 addr)
+{
+	return !((addr ^ tipc_own_addr) >> 24);
+}
+
+static inline int is_slave(u32 addr)
+{
+	return addr & 0x800;
+}
+
+static inline int may_route(u32 addr)
+{
+	return(addr ^ tipc_own_addr) >> 11;
+}
+
+static inline int in_scope(u32 domain, u32 addr)
+{
+	if (!domain || (domain == addr))
+		return 1;
+	if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
+		return 1;
+	if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
+		return 1;
+	return 0;
+}
+
+/**
+ * addr_scope - convert message lookup domain to equivalent 2-bit scope value
+ */
+
+static inline int addr_scope(u32 domain)
+{
+	if (likely(!domain))
+		return TIPC_ZONE_SCOPE;
+	if (tipc_node(domain))
+		return TIPC_NODE_SCOPE;
+	if (tipc_cluster(domain))
+		return TIPC_CLUSTER_SCOPE;
+	return TIPC_ZONE_SCOPE;
+}
+
+/**
+ * addr_domain - convert 2-bit scope value to equivalent message lookup domain
+ *  
+ * Needed when address of a named message must be looked up a second time 
+ * after a network hop.
+ */
+
+static inline int addr_domain(int sc)
+{
+	if (likely(sc == TIPC_NODE_SCOPE))
+		return tipc_own_addr;
+	if (sc == TIPC_CLUSTER_SCOPE)
+		return tipc_addr(tipc_zone(tipc_own_addr),
+				 tipc_cluster(tipc_own_addr), 0);
+	return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
+}
+
+static inline char *addr_string_fill(char *string, u32 addr)
+{
+	snprintf(string, 16, "<%u.%u.%u>",
+		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
+	return string;
+}
+
+int tipc_addr_domain_valid(u32);
+int tipc_addr_node_valid(u32 addr);
+
+#endif
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
new file mode 100644
index 00000000000..a7b04f397c1
--- /dev/null
+++ b/net/tipc/bcast.c
@@ -0,0 +1,806 @@
+/*
+ * net/tipc/bcast.c: TIPC broadcast code
+ *     
+ * Copyright (c) 2004-2006, Ericsson AB
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "msg.h"
+#include "dbg.h"
+#include "link.h"
+#include "net.h"
+#include "node.h"
+#include "port.h"
+#include "addr.h"
+#include "node_subscr.h"
+#include "name_distr.h"
+#include "bearer.h"
+#include "name_table.h"
+#include "bcast.h"
+
+
+#define MAX_PKT_DEFAULT_MCAST 1500	/* bcast link max packet size (fixed) */
+
+#define BCLINK_WIN_DEFAULT 20		/* bcast link window size (default) */
+
+#define BCLINK_LOG_BUF_SIZE 0
+
+/**
+ * struct bcbearer_pair - a pair of bearers used by broadcast link
+ * @primary: pointer to primary bearer
+ * @secondary: pointer to secondary bearer
+ * 
+ * Bearers must have same priority and same set of reachable destinations 
+ * to be paired.
+ */
+
+struct bcbearer_pair {
+	struct bearer *primary;
+	struct bearer *secondary;
+};
+
+/**
+ * struct bcbearer - bearer used by broadcast link
+ * @bearer: (non-standard) broadcast bearer structure
+ * @media: (non-standard) broadcast media structure
+ * @bpairs: array of bearer pairs
+ * @bpairs_temp: array of bearer pairs used during creation of "bpairs"
+ */
+
+struct bcbearer {
+	struct bearer bearer;
+	struct media media;
+	struct bcbearer_pair bpairs[MAX_BEARERS];
+	struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
+};
+
+/**
+ * struct bclink - link used for broadcast messages
+ * @link: (non-standard) broadcast link structure
+ * @node: (non-standard) node structure representing b'cast link's peer node
+ * 
+ * Handles sequence numbering, fragmentation, bundling, etc.
+ */
+
+struct bclink {
+	struct link link;
+	struct node node;
+};
+
+
+static struct bcbearer *bcbearer = NULL;
+static struct bclink *bclink = NULL;
+static struct link *bcl = NULL;
+static spinlock_t bc_lock = SPIN_LOCK_UNLOCKED;
+
+char tipc_bclink_name[] = "multicast-link";
+
+
+static inline u32 buf_seqno(struct sk_buff *buf)
+{
+	return msg_seqno(buf_msg(buf));
+} 
+
+static inline u32 bcbuf_acks(struct sk_buff *buf)
+{
+	return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
+}
+
+static inline void bcbuf_set_acks(struct sk_buff *buf, u32 acks)
+{
+	TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks;
+}
+
+static inline void bcbuf_decr_acks(struct sk_buff *buf)
+{
+	bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
+}
+
+
+/** 
+ * bclink_set_gap - set gap according to contents of current deferred pkt queue
+ * 
+ * Called with 'node' locked, bc_lock unlocked
+ */
+
+static inline void bclink_set_gap(struct node *n_ptr)
+{
+	struct sk_buff *buf = n_ptr->bclink.deferred_head;
+
+	n_ptr->bclink.gap_after = n_ptr->bclink.gap_to =
+		mod(n_ptr->bclink.last_in);
+	if (unlikely(buf != NULL))
+		n_ptr->bclink.gap_to = mod(buf_seqno(buf) - 1);
+}
+
+/** 
+ * bclink_ack_allowed - test if ACK or NACK message can be sent at this moment
+ * 
+ * This mechanism endeavours to prevent all nodes in network from trying
+ * to ACK or NACK at the same time.
+ * 
+ * Note: TIPC uses a different trigger to distribute ACKs than it does to
+ *       distribute NACKs, but tries to use the same spacing (divide by 16). 
+ */
+
+static inline int bclink_ack_allowed(u32 n)
+{
+	return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag);
+}
+
+
+/** 
+ * bclink_retransmit_pkt - retransmit broadcast packets
+ * @after: sequence number of last packet to *not* retransmit
+ * @to: sequence number of last packet to retransmit
+ * 
+ * Called with 'node' locked, bc_lock unlocked
+ */
+
+static void bclink_retransmit_pkt(u32 after, u32 to)
+{
+	struct sk_buff *buf;
+
+	spin_lock_bh(&bc_lock);
+	buf = bcl->first_out;
+	while (buf && less_eq(buf_seqno(buf), after)) {
+		buf = buf->next;                
+	}
+	if (buf != NULL)
+		tipc_link_retransmit(bcl, buf, mod(to - after));
+	spin_unlock_bh(&bc_lock);              
+}
+
+/** 
+ * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets
+ * @n_ptr: node that sent acknowledgement info
+ * @acked: broadcast sequence # that has been acknowledged
+ * 
+ * Node is locked, bc_lock unlocked.
+ */
+
+void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked)
+{
+	struct sk_buff *crs;
+	struct sk_buff *next;
+	unsigned int released = 0;
+
+	if (less_eq(acked, n_ptr->bclink.acked))
+		return;
+
+	spin_lock_bh(&bc_lock);
+
+	/* Skip over packets that node has previously acknowledged */
+
+	crs = bcl->first_out;
+	while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked)) {
+		crs = crs->next;
+	}
+
+	/* Update packets that node is now acknowledging */
+
+	while (crs && less_eq(buf_seqno(crs), acked)) {
+		next = crs->next;
+		bcbuf_decr_acks(crs);
+		if (bcbuf_acks(crs) == 0) {
+			bcl->first_out = next;
+			bcl->out_queue_size--;
+			buf_discard(crs);
+			released = 1;
+		}
+		crs = next;
+	}
+	n_ptr->bclink.acked = acked;
+
+	/* Try resolving broadcast link congestion, if necessary */
+
+	if (unlikely(bcl->next_out))
+		tipc_link_push_queue(bcl);
+	if (unlikely(released && !list_empty(&bcl->waiting_ports)))
+		tipc_link_wakeup_ports(bcl, 0);
+	spin_unlock_bh(&bc_lock);
+}
+
+/** 
+ * bclink_send_ack - unicast an ACK msg
+ * 
+ * tipc_net_lock and node lock set
+ */
+
+static void bclink_send_ack(struct node *n_ptr)
+{
+	struct link *l_ptr = n_ptr->active_links[n_ptr->addr & 1];
+
+	if (l_ptr != NULL)
+		tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+}
+
+/** 
+ * bclink_send_nack- broadcast a NACK msg
+ * 
+ * tipc_net_lock and node lock set
+ */
+
+static void bclink_send_nack(struct node *n_ptr)
+{
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+
+	if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to))
+		return;
+
+	buf = buf_acquire(INT_H_SIZE);
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
+			 TIPC_OK, INT_H_SIZE, n_ptr->addr);
+		msg_set_mc_netid(msg, tipc_net_id);
+		msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); 
+		msg_set_bcgap_after(msg, n_ptr->bclink.gap_after);
+		msg_set_bcgap_to(msg, n_ptr->bclink.gap_to);
+		msg_set_bcast_tag(msg, tipc_own_tag);
+
+		if (tipc_bearer_send(&bcbearer->bearer, buf, 0)) {
+			bcl->stats.sent_nacks++;
+			buf_discard(buf);
+		} else {
+			tipc_bearer_schedule(bcl->b_ptr, bcl);
+			bcl->proto_msg_queue = buf;
+			bcl->stats.bearer_congs++;
+		}
+
+		/* 
+		 * Ensure we doesn't send another NACK msg to the node
+		 * until 16 more deferred messages arrive from it
+		 * (i.e. helps prevent all nodes from NACK'ing at same time)
+		 */
+		
+		n_ptr->bclink.nack_sync = tipc_own_tag;
+	}
+}
+
+/** 
+ * tipc_bclink_check_gap - send a NACK if a sequence gap exists
+ *
+ * tipc_net_lock and node lock set
+ */
+
+void tipc_bclink_check_gap(struct node *n_ptr, u32 last_sent)
+{
+	if (!n_ptr->bclink.supported ||
+	    less_eq(last_sent, mod(n_ptr->bclink.last_in)))
+		return;
+
+	bclink_set_gap(n_ptr);
+	if (n_ptr->bclink.gap_after == n_ptr->bclink.gap_to)
+		n_ptr->bclink.gap_to = last_sent;
+	bclink_send_nack(n_ptr);
+}
+
+/** 
+ * tipc_bclink_peek_nack - process a NACK msg meant for another node
+ * 
+ * Only tipc_net_lock set.
+ */
+
+void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 gap_to)
+{
+	struct node *n_ptr = tipc_node_find(dest);
+	u32 my_after, my_to;
+
+	if (unlikely(!n_ptr || !tipc_node_is_up(n_ptr)))
+		return;
+	tipc_node_lock(n_ptr);
+	/*
+	 * Modify gap to suppress unnecessary NACKs from this node
+	 */
+	my_after = n_ptr->bclink.gap_after;
+	my_to = n_ptr->bclink.gap_to;
+
+	if (less_eq(gap_after, my_after)) {
+		if (less(my_after, gap_to) && less(gap_to, my_to))
+			n_ptr->bclink.gap_after = gap_to;
+		else if (less_eq(my_to, gap_to))
+			n_ptr->bclink.gap_to = n_ptr->bclink.gap_after;
+	} else if (less_eq(gap_after, my_to)) {
+		if (less_eq(my_to, gap_to))
+			n_ptr->bclink.gap_to = gap_after;
+	} else {
+		/* 
+		 * Expand gap if missing bufs not in deferred queue:
+		 */
+		struct sk_buff *buf = n_ptr->bclink.deferred_head;
+		u32 prev = n_ptr->bclink.gap_to;
+
+		for (; buf; buf = buf->next) {
+			u32 seqno = buf_seqno(buf);
+
+			if (mod(seqno - prev) != 1)
+				buf = NULL;
+			if (seqno == gap_after)
+				break;
+			prev = seqno;
+		}
+		if (buf == NULL)
+			n_ptr->bclink.gap_to = gap_after;
+	}
+	/*
+	 * Some nodes may send a complementary NACK now:
+	 */ 
+	if (bclink_ack_allowed(sender_tag + 1)) {
+		if (n_ptr->bclink.gap_to != n_ptr->bclink.gap_after) {
+			bclink_send_nack(n_ptr);
+			bclink_set_gap(n_ptr);
+		}
+	}
+	tipc_node_unlock(n_ptr);
+}
+
+/**
+ * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster
+ */
+
+int tipc_bclink_send_msg(struct sk_buff *buf)
+{
+	int res;
+
+	spin_lock_bh(&bc_lock);
+
+	res = tipc_link_send_buf(bcl, buf);
+	if (unlikely(res == -ELINKCONG))
+		buf_discard(buf);
+	else
+		bcl->stats.sent_info++;
+
+	if (bcl->out_queue_size > bcl->stats.max_queue_sz)
+		bcl->stats.max_queue_sz = bcl->out_queue_size;
+	bcl->stats.queue_sz_counts++;
+	bcl->stats.accu_queue_sz += bcl->out_queue_size;
+
+	spin_unlock_bh(&bc_lock);
+	return res;
+}
+
+/**
+ * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards
+ * 
+ * tipc_net_lock is read_locked, no other locks set
+ */
+
+void tipc_bclink_recv_pkt(struct sk_buff *buf)
+{        
+	struct tipc_msg *msg = buf_msg(buf);
+	struct node* node = tipc_node_find(msg_prevnode(msg));
+	u32 next_in;
+	u32 seqno;
+	struct sk_buff *deferred;
+
+	msg_dbg(msg, "<BC<<<");
+
+	if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported || 
+		     (msg_mc_netid(msg) != tipc_net_id))) {
+		buf_discard(buf);
+		return;
+	}
+
+	if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
+		msg_dbg(msg, "<BCNACK<<<");
+		if (msg_destnode(msg) == tipc_own_addr) {
+			tipc_node_lock(node);
+			tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
+			tipc_node_unlock(node);
+			bcl->stats.recv_nacks++;
+			bclink_retransmit_pkt(msg_bcgap_after(msg),
+					      msg_bcgap_to(msg));
+		} else {
+			tipc_bclink_peek_nack(msg_destnode(msg),
+					 msg_bcast_tag(msg),
+					 msg_bcgap_after(msg),
+					 msg_bcgap_to(msg));
+		}
+		buf_discard(buf);
+		return;
+	}
+
+	tipc_node_lock(node);
+receive:
+	deferred = node->bclink.deferred_head;
+	next_in = mod(node->bclink.last_in + 1);
+	seqno = msg_seqno(msg);
+
+	if (likely(seqno == next_in)) {
+		bcl->stats.recv_info++;
+		node->bclink.last_in++;
+		bclink_set_gap(node);
+		if (unlikely(bclink_ack_allowed(seqno))) {
+			bclink_send_ack(node);
+			bcl->stats.sent_acks++;
+		}
+		if (likely(msg_isdata(msg))) {
+			tipc_node_unlock(node);
+			tipc_port_recv_mcast(buf, NULL);
+		} else if (msg_user(msg) == MSG_BUNDLER) {
+			bcl->stats.recv_bundles++;
+			bcl->stats.recv_bundled += msg_msgcnt(msg);
+			tipc_node_unlock(node);
+			tipc_link_recv_bundle(buf);
+		} else if (msg_user(msg) == MSG_FRAGMENTER) {
+			bcl->stats.recv_fragments++;
+			if (tipc_link_recv_fragment(&node->bclink.defragm,
+						    &buf, &msg))
+				bcl->stats.recv_fragmented++;
+			tipc_node_unlock(node);
+			tipc_net_route_msg(buf);
+		} else {
+			tipc_node_unlock(node);
+			tipc_net_route_msg(buf);
+		}
+		if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) {
+			tipc_node_lock(node);
+			buf = deferred;
+			msg = buf_msg(buf);
+			node->bclink.deferred_head = deferred->next;
+			goto receive;
+		}
+		return;
+	} else if (less(next_in, seqno)) {
+		u32 gap_after = node->bclink.gap_after;
+		u32 gap_to = node->bclink.gap_to;
+
+		if (tipc_link_defer_pkt(&node->bclink.deferred_head,
+					&node->bclink.deferred_tail,
+					buf)) {
+			node->bclink.nack_sync++;
+			bcl->stats.deferred_recv++;
+			if (seqno == mod(gap_after + 1))
+				node->bclink.gap_after = seqno;
+			else if (less(gap_after, seqno) && less(seqno, gap_to))
+				node->bclink.gap_to = seqno;
+		}
+		if (bclink_ack_allowed(node->bclink.nack_sync)) {
+			if (gap_to != gap_after)
+				bclink_send_nack(node);
+			bclink_set_gap(node);
+		}
+	} else {
+		bcl->stats.duplicates++;
+		buf_discard(buf);
+	}
+	tipc_node_unlock(node);
+}
+
+u32 tipc_bclink_get_last_sent(void)
+{
+	u32 last_sent = mod(bcl->next_out_no - 1);
+
+	if (bcl->next_out)
+		last_sent = mod(buf_seqno(bcl->next_out) - 1);
+	return last_sent;
+}
+
+u32 tipc_bclink_acks_missing(struct node *n_ptr)
+{
+	return (n_ptr->bclink.supported &&
+		(tipc_bclink_get_last_sent() != n_ptr->bclink.acked));
+}
+
+
+/**
+ * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
+ * 
+ * Send through as many bearers as necessary to reach all nodes
+ * that support TIPC multicasting.
+ * 
+ * Returns 0 if packet sent successfully, non-zero if not
+ */
+
+int tipc_bcbearer_send(struct sk_buff *buf,
+		       struct tipc_bearer *unused1,
+		       struct tipc_media_addr *unused2)
+{
+	static int send_count = 0;
+
+	struct node_map remains;
+	struct node_map remains_new;
+	int bp_index;
+	int swap_time;
+
+	/* Prepare buffer for broadcasting (if first time trying to send it) */
+
+	if (likely(!msg_non_seq(buf_msg(buf)))) {
+		struct tipc_msg *msg;
+
+		assert(tipc_cltr_bcast_nodes.count != 0);
+		bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count);
+		msg = buf_msg(buf);
+		msg_set_non_seq(msg);
+		msg_set_mc_netid(msg, tipc_net_id);
+	}
+
+	/* Determine if bearer pairs should be swapped following this attempt */
+
+	if ((swap_time = (++send_count >= 10)))
+		send_count = 0;
+
+	/* Send buffer over bearers until all targets reached */
+	
+	remains = tipc_cltr_bcast_nodes;
+
+	for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
+		struct bearer *p = bcbearer->bpairs[bp_index].primary;
+		struct bearer *s = bcbearer->bpairs[bp_index].secondary;
+
+		if (!p)
+			break;	/* no more bearers to try */
+
+		tipc_nmap_diff(&remains, &p->nodes, &remains_new);
+		if (remains_new.count == remains.count)
+			continue;	/* bearer pair doesn't add anything */
+
+		if (!p->publ.blocked &&
+		    !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
+			if (swap_time && s && !s->publ.blocked)
+				goto swap;
+			else
+				goto update;
+		}
+
+		if (!s || s->publ.blocked ||
+		    s->media->send_msg(buf, &s->publ, &s->media->bcast_addr))
+			continue;	/* unable to send using bearer pair */
+swap:
+		bcbearer->bpairs[bp_index].primary = s;
+		bcbearer->bpairs[bp_index].secondary = p;
+update:
+		if (remains_new.count == 0)
+			return TIPC_OK;
+
+		remains = remains_new;
+	}
+	
+	/* Unable to reach all targets */
+
+	bcbearer->bearer.publ.blocked = 1;
+	bcl->stats.bearer_congs++;
+	return ~TIPC_OK;
+}
+
+/**
+ * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
+ */
+
+void tipc_bcbearer_sort(void)
+{
+	struct bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
+	struct bcbearer_pair *bp_curr;
+	int b_index;
+	int pri;
+
+	spin_lock_bh(&bc_lock);
+
+	/* Group bearers by priority (can assume max of two per priority) */
+
+	memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
+
+	for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
+		struct bearer *b = &tipc_bearers[b_index];
+
+		if (!b->active || !b->nodes.count)
+			continue;
+
+		if (!bp_temp[b->priority].primary)
+			bp_temp[b->priority].primary = b;
+		else
+			bp_temp[b->priority].secondary = b;
+	}
+
+	/* Create array of bearer pairs for broadcasting */
+
+	bp_curr = bcbearer->bpairs;
+	memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs));
+
+	for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) {
+
+		if (!bp_temp[pri].primary)
+			continue;
+
+		bp_curr->primary = bp_temp[pri].primary;
+
+		if (bp_temp[pri].secondary) {
+			if (tipc_nmap_equal(&bp_temp[pri].primary->nodes,
+					    &bp_temp[pri].secondary->nodes)) {
+				bp_curr->secondary = bp_temp[pri].secondary;
+			} else {
+				bp_curr++;
+				bp_curr->primary = bp_temp[pri].secondary;
+			}
+		}
+
+		bp_curr++;
+	}
+
+	spin_unlock_bh(&bc_lock);
+}
+
+/**
+ * tipc_bcbearer_push - resolve bearer congestion
+ * 
+ * Forces bclink to push out any unsent packets, until all packets are gone
+ * or congestion reoccurs.
+ * No locks set when function called
+ */
+
+void tipc_bcbearer_push(void)
+{
+	struct bearer *b_ptr;
+
+	spin_lock_bh(&bc_lock);
+	b_ptr = &bcbearer->bearer;
+	if (b_ptr->publ.blocked) {
+		b_ptr->publ.blocked = 0;
+		tipc_bearer_lock_push(b_ptr);
+	}
+	spin_unlock_bh(&bc_lock);
+}
+
+
+int tipc_bclink_stats(char *buf, const u32 buf_size)
+{
+	struct print_buf pb;
+
+	if (!bcl)
+		return 0;
+
+	tipc_printbuf_init(&pb, buf, buf_size);
+
+	spin_lock_bh(&bc_lock);
+
+	tipc_printf(&pb, "Link <%s>\n"
+		         "  Window:%u packets\n", 
+		    bcl->name, bcl->queue_limit[0]);
+	tipc_printf(&pb, "  RX packets:%u fragments:%u/%u bundles:%u/%u\n", 
+		    bcl->stats.recv_info,
+		    bcl->stats.recv_fragments,
+		    bcl->stats.recv_fragmented,
+		    bcl->stats.recv_bundles,
+		    bcl->stats.recv_bundled);
+	tipc_printf(&pb, "  TX packets:%u fragments:%u/%u bundles:%u/%u\n", 
+		    bcl->stats.sent_info,
+		    bcl->stats.sent_fragments,
+		    bcl->stats.sent_fragmented, 
+		    bcl->stats.sent_bundles,
+		    bcl->stats.sent_bundled);
+	tipc_printf(&pb, "  RX naks:%u defs:%u dups:%u\n", 
+		    bcl->stats.recv_nacks,
+		    bcl->stats.deferred_recv, 
+		    bcl->stats.duplicates);
+	tipc_printf(&pb, "  TX naks:%u acks:%u dups:%u\n", 
+		    bcl->stats.sent_nacks, 
+		    bcl->stats.sent_acks, 
+		    bcl->stats.retransmitted);
+	tipc_printf(&pb, "  Congestion bearer:%u link:%u  Send queue max:%u avg:%u\n",
+		    bcl->stats.bearer_congs,
+		    bcl->stats.link_congs,
+		    bcl->stats.max_queue_sz,
+		    bcl->stats.queue_sz_counts
+		    ? (bcl->stats.accu_queue_sz / bcl->stats.queue_sz_counts)
+		    : 0);
+
+	spin_unlock_bh(&bc_lock);
+	return tipc_printbuf_validate(&pb);
+}
+
+int tipc_bclink_reset_stats(void)
+{
+	if (!bcl)
+		return -ENOPROTOOPT;
+
+	spin_lock_bh(&bc_lock);
+	memset(&bcl->stats, 0, sizeof(bcl->stats));
+	spin_unlock_bh(&bc_lock);
+	return TIPC_OK;
+}
+
+int tipc_bclink_set_queue_limits(u32 limit)
+{
+	if (!bcl)
+		return -ENOPROTOOPT;
+	if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
+		return -EINVAL;
+
+	spin_lock_bh(&bc_lock);
+	tipc_link_set_queue_limits(bcl, limit);
+	spin_unlock_bh(&bc_lock);
+	return TIPC_OK;
+}
+
+int tipc_bclink_init(void)
+{
+	bcbearer = kmalloc(sizeof(*bcbearer), GFP_ATOMIC);
+	bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC);
+	if (!bcbearer || !bclink) {
+ nomem:
+	 	warn("Memory squeeze; Failed to create multicast link\n");
+		kfree(bcbearer);
+		bcbearer = NULL;
+		kfree(bclink);
+		bclink = NULL;
+		return -ENOMEM;
+	}
+
+	memset(bcbearer, 0, sizeof(struct bcbearer));
+	INIT_LIST_HEAD(&bcbearer->bearer.cong_links);
+	bcbearer->bearer.media = &bcbearer->media;
+	bcbearer->media.send_msg = tipc_bcbearer_send;
+	sprintf(bcbearer->media.name, "tipc-multicast");
+
+	bcl = &bclink->link;
+	memset(bclink, 0, sizeof(struct bclink));
+	INIT_LIST_HEAD(&bcl->waiting_ports);
+	bcl->next_out_no = 1;
+	bclink->node.lock =  SPIN_LOCK_UNLOCKED;        
+	bcl->owner = &bclink->node;
+        bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
+	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
+	bcl->b_ptr = &bcbearer->bearer;
+	bcl->state = WORKING_WORKING;
+	sprintf(bcl->name, tipc_bclink_name);
+
+	if (BCLINK_LOG_BUF_SIZE) {
+		char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC);
+
+		if (!pb)
+			goto nomem;
+		tipc_printbuf_init(&bcl->print_buf, pb, BCLINK_LOG_BUF_SIZE);
+	}
+
+	return TIPC_OK;
+}
+
+void tipc_bclink_stop(void)
+{
+	spin_lock_bh(&bc_lock);
+	if (bcbearer) {
+		tipc_link_stop(bcl);
+		if (BCLINK_LOG_BUF_SIZE)
+			kfree(bcl->print_buf.buf);
+		bcl = NULL;
+		kfree(bclink);
+		bclink = NULL;
+		kfree(bcbearer);
+		bcbearer = NULL;
+	}
+	spin_unlock_bh(&bc_lock);
+}
+
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
new file mode 100644
index 00000000000..0e3be2ab330
--- /dev/null
+++ b/net/tipc/bcast.h
@@ -0,0 +1,223 @@
+/*
+ * net/tipc/bcast.h: Include file for TIPC broadcast code
+ * 
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_BCAST_H
+#define _TIPC_BCAST_H
+
+#define MAX_NODES 4096
+#define WSIZE 32
+
+/**
+ * struct node_map - set of node identifiers
+ * @count: # of nodes in set
+ * @map: bitmap of node identifiers that are in the set
+ */
+
+struct node_map {
+	u32 count;
+	u32 map[MAX_NODES / WSIZE];
+};
+
+
+#define PLSIZE 32
+
+/**
+ * struct port_list - set of node local destination ports
+ * @count: # of ports in set (only valid for first entry in list)
+ * @next: pointer to next entry in list
+ * @ports: array of port references
+ */
+
+struct port_list {
+	int count;
+	struct port_list *next;
+	u32 ports[PLSIZE];
+};
+
+
+struct node;
+
+extern char tipc_bclink_name[];
+
+
+/**
+ * nmap_get - determine if node exists in a node map
+ */
+
+static inline int tipc_nmap_get(struct node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	int b = n % WSIZE;
+
+	return nm_ptr->map[w] & (1 << b);
+}
+
+/**
+ * nmap_add - add a node to a node map
+ */
+
+static inline void tipc_nmap_add(struct node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	u32 mask = (1 << (n % WSIZE));
+
+	if ((nm_ptr->map[w] & mask) == 0) {
+		nm_ptr->count++;
+		nm_ptr->map[w] |= mask;
+	}
+}
+
+/** 
+ * nmap_remove - remove a node from a node map
+ */
+
+static inline void tipc_nmap_remove(struct node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	u32 mask = (1 << (n % WSIZE));
+
+	if ((nm_ptr->map[w] & mask) != 0) {
+		nm_ptr->map[w] &= ~mask;
+		nm_ptr->count--;
+	}
+}
+
+/**
+ * nmap_equal - test for equality of node maps
+ */
+
+static inline int tipc_nmap_equal(struct node_map *nm_a, struct node_map *nm_b)
+{
+	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
+}
+
+/**
+ * nmap_diff - find differences between node maps
+ * @nm_a: input node map A
+ * @nm_b: input node map B
+ * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
+ */
+
+static inline void tipc_nmap_diff(struct node_map *nm_a, struct node_map *nm_b,
+				  struct node_map *nm_diff)
+{
+	int stop = sizeof(nm_a->map) / sizeof(u32);
+	int w;
+	int b;
+	u32 map;
+
+	memset(nm_diff, 0, sizeof(*nm_diff));
+	for (w = 0; w < stop; w++) {
+		map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
+		nm_diff->map[w] = map;
+		if (map != 0) {
+			for (b = 0 ; b < WSIZE; b++) {
+				if (map & (1 << b))
+					nm_diff->count++;
+			}
+		}
+	}
+}
+
+/**
+ * port_list_add - add a port to a port list, ensuring no duplicates
+ */
+
+static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
+{
+	struct port_list *item = pl_ptr;
+	int i;
+	int item_sz = PLSIZE;
+	int cnt = pl_ptr->count;
+
+	for (; ; cnt -= item_sz, item = item->next) {
+		if (cnt < PLSIZE)
+			item_sz = cnt;
+		for (i = 0; i < item_sz; i++)
+			if (item->ports[i] == port)
+				return;
+		if (i < PLSIZE) {
+			item->ports[i] = port;
+			pl_ptr->count++;
+			return;
+		}
+		if (!item->next) {
+			item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
+			if (!item->next) {
+				warn("Memory squeeze: multicast destination port list is incomplete\n");
+				return;
+			}
+			item->next->next = NULL;
+		}
+	}
+}
+
+/**
+ * port_list_free - free dynamically created entries in port_list chain
+ * 
+ * Note: First item is on stack, so it doesn't need to be released
+ */
+
+static inline void tipc_port_list_free(struct port_list *pl_ptr)
+{
+	struct port_list *item;
+	struct port_list *next;
+
+	for (item = pl_ptr->next; item; item = next) {
+		next = item->next;
+		kfree(item);
+	}
+}
+
+
+int  tipc_bclink_init(void);
+void tipc_bclink_stop(void);
+void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked);
+int  tipc_bclink_send_msg(struct sk_buff *buf);
+void tipc_bclink_recv_pkt(struct sk_buff *buf);
+u32  tipc_bclink_get_last_sent(void);
+u32  tipc_bclink_acks_missing(struct node *n_ptr);
+void tipc_bclink_check_gap(struct node *n_ptr, u32 seqno);
+int  tipc_bclink_stats(char *stats_buf, const u32 buf_size);
+int  tipc_bclink_reset_stats(void);
+int  tipc_bclink_set_queue_limits(u32 limit);
+void tipc_bcbearer_sort(void);
+void tipc_bcbearer_push(void);
+
+#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
new file mode 100644
index 00000000000..64dcb0f3a8b
--- /dev/null
+++ b/net/tipc/bearer.c
@@ -0,0 +1,699 @@
+/*
+ * net/tipc/bearer.c: TIPC bearer code
+ * 
+ * Copyright (c) 1996-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include "dbg.h"
+#include "bearer.h"
+#include "link.h"
+#include "port.h"
+#include "discover.h"
+#include "bcast.h"
+
+#define MAX_ADDR_STR 32
+
+static struct media *media_list = 0;
+static u32 media_count = 0;
+
+struct bearer *tipc_bearers = 0;
+
+/**
+ * media_name_valid - validate media name
+ * 
+ * Returns 1 if media name is valid, otherwise 0.
+ */
+
+static int media_name_valid(const char *name)
+{
+	u32 len;
+
+	len = strlen(name);
+	if ((len + 1) > TIPC_MAX_MEDIA_NAME)
+		return 0;
+	return (strspn(name, tipc_alphabet) == len);
+}
+
+/**
+ * media_find - locates specified media object by name
+ */
+
+static struct media *media_find(const char *name)
+{
+	struct media *m_ptr;
+	u32 i;
+
+	for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
+		if (!strcmp(m_ptr->name, name))
+			return m_ptr;
+	}
+	return 0;
+}
+
+/**
+ * tipc_register_media - register a media type
+ * 
+ * Bearers for this media type must be activated separately at a later stage.
+ */
+
+int  tipc_register_media(u32 media_type,
+			 char *name, 
+			 int (*enable)(struct tipc_bearer *), 
+			 void (*disable)(struct tipc_bearer *), 
+			 int (*send_msg)(struct sk_buff *, 
+					 struct tipc_bearer *,
+					 struct tipc_media_addr *), 
+			 char *(*addr2str)(struct tipc_media_addr *a,
+					   char *str_buf, int str_size),
+			 struct tipc_media_addr *bcast_addr,
+			 const u32 bearer_priority,
+			 const u32 link_tolerance,  /* [ms] */
+			 const u32 send_window_limit)
+{
+	struct media *m_ptr;
+	u32 media_id;
+	u32 i;
+	int res = -EINVAL;
+
+	write_lock_bh(&tipc_net_lock);
+	if (!media_list)
+		goto exit;
+
+	if (!media_name_valid(name)) {
+		warn("Media registration error: illegal name <%s>\n", name);
+		goto exit;
+	}
+	if (!bcast_addr) {
+		warn("Media registration error: no broadcast address supplied\n");
+		goto exit;
+	}
+	if ((bearer_priority < TIPC_MIN_LINK_PRI) &&
+	    (bearer_priority > TIPC_MAX_LINK_PRI)) {
+		warn("Media registration error: priority %u\n", bearer_priority);
+		goto exit;
+	}
+	if ((link_tolerance < TIPC_MIN_LINK_TOL) || 
+	    (link_tolerance > TIPC_MAX_LINK_TOL)) {
+		warn("Media registration error: tolerance %u\n", link_tolerance);
+		goto exit;
+	}
+
+	media_id = media_count++;
+	if (media_id >= MAX_MEDIA) {
+		warn("Attempt to register more than %u media\n", MAX_MEDIA);
+		media_count--;
+		goto exit;
+	}
+	for (i = 0; i < media_id; i++) {
+		if (media_list[i].type_id == media_type) {
+			warn("Attempt to register second media with type %u\n", 
+			     media_type);
+			media_count--;
+			goto exit;
+		}
+		if (!strcmp(name, media_list[i].name)) {
+			warn("Attempt to re-register media name <%s>\n", name);
+			media_count--;
+			goto exit;
+		}
+	}
+
+	m_ptr = &media_list[media_id];
+	m_ptr->type_id = media_type;
+	m_ptr->send_msg = send_msg;
+	m_ptr->enable_bearer = enable;
+	m_ptr->disable_bearer = disable;
+	m_ptr->addr2str = addr2str;
+	memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr));
+	m_ptr->bcast = 1;
+	strcpy(m_ptr->name, name);
+	m_ptr->priority = bearer_priority;
+	m_ptr->tolerance = link_tolerance;
+	m_ptr->window = send_window_limit;
+	dbg("Media <%s> registered\n", name);
+	res = 0;
+exit:
+	write_unlock_bh(&tipc_net_lock);
+	return res;
+}
+
+/**
+ * tipc_media_addr_printf - record media address in print buffer
+ */
+
+void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a)
+{
+	struct media *m_ptr;
+	u32 media_type;
+	u32 i;
+
+	media_type = ntohl(a->type);
+	for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
+		if (m_ptr->type_id == media_type)
+			break;
+	}
+
+	if ((i < media_count) && (m_ptr->addr2str != NULL)) {
+		char addr_str[MAX_ADDR_STR];
+
+		tipc_printf(pb, "%s(%s) ", m_ptr->name, 
+			    m_ptr->addr2str(a, addr_str, sizeof(addr_str)));
+	} else {
+		unchar *addr = (unchar *)&a->dev_addr;
+
+		tipc_printf(pb, "UNKNOWN(%u):", media_type);
+		for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++) {
+			tipc_printf(pb, "%02x ", addr[i]);
+		}
+	}
+}
+
+/**
+ * tipc_media_get_names - record names of registered media in buffer
+ */
+
+struct sk_buff *tipc_media_get_names(void)
+{
+	struct sk_buff *buf;
+	struct media *m_ptr;
+	int i;
+
+	buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME));
+	if (!buf)
+		return NULL;
+
+	read_lock_bh(&tipc_net_lock);
+	for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
+		tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, m_ptr->name, 
+				    strlen(m_ptr->name) + 1);
+	}
+	read_unlock_bh(&tipc_net_lock);
+	return buf;
+}
+
+/**
+ * bearer_name_validate - validate & (optionally) deconstruct bearer name
+ * @name - ptr to bearer name string
+ * @name_parts - ptr to area for bearer name components (or NULL if not needed)
+ * 
+ * Returns 1 if bearer name is valid, otherwise 0.
+ */
+
+static int bearer_name_validate(const char *name, 
+				struct bearer_name *name_parts)
+{
+	char name_copy[TIPC_MAX_BEARER_NAME];
+	char *media_name;
+	char *if_name;
+	u32 media_len;
+	u32 if_len;
+
+	/* copy bearer name & ensure length is OK */
+
+	name_copy[TIPC_MAX_BEARER_NAME - 1] = 0;
+	/* need above in case non-Posix strncpy() doesn't pad with nulls */
+	strncpy(name_copy, name, TIPC_MAX_BEARER_NAME);
+	if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0)
+		return 0;
+
+	/* ensure all component parts of bearer name are present */
+
+	media_name = name_copy;
+	if ((if_name = strchr(media_name, ':')) == NULL)
+		return 0;
+	*(if_name++) = 0;
+	media_len = if_name - media_name;
+	if_len = strlen(if_name) + 1;
+
+	/* validate component parts of bearer name */
+
+	if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || 
+	    (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME) || 
+	    (strspn(media_name, tipc_alphabet) != (media_len - 1)) ||
+	    (strspn(if_name, tipc_alphabet) != (if_len - 1)))
+		return 0;
+
+	/* return bearer name components, if necessary */
+
+	if (name_parts) {
+		strcpy(name_parts->media_name, media_name);
+		strcpy(name_parts->if_name, if_name);
+	}
+	return 1;
+}
+
+/**
+ * bearer_find - locates bearer object with matching bearer name
+ */
+
+static struct bearer *bearer_find(const char *name)
+{
+	struct bearer *b_ptr;
+	u32 i;
+
+	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
+		if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
+			return b_ptr;
+	}
+	return 0;
+}
+
+/**
+ * tipc_bearer_find_interface - locates bearer object with matching interface name
+ */
+
+struct bearer *tipc_bearer_find_interface(const char *if_name)
+{
+	struct bearer *b_ptr;
+	char *b_if_name;
+	u32 i;
+
+	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
+		if (!b_ptr->active)
+			continue;
+		b_if_name = strchr(b_ptr->publ.name, ':') + 1;
+		if (!strcmp(b_if_name, if_name))
+			return b_ptr;
+	}
+	return 0;
+}
+
+/**
+ * tipc_bearer_get_names - record names of bearers in buffer
+ */
+
+struct sk_buff *tipc_bearer_get_names(void)
+{
+	struct sk_buff *buf;
+	struct media *m_ptr;
+	struct bearer *b_ptr;
+	int i, j;
+
+	buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
+	if (!buf)
+		return NULL;
+
+	read_lock_bh(&tipc_net_lock);
+	for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
+		for (j = 0; j < MAX_BEARERS; j++) {
+			b_ptr = &tipc_bearers[j];
+			if (b_ptr->active && (b_ptr->media == m_ptr)) {
+				tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, 
+						    b_ptr->publ.name, 
+						    strlen(b_ptr->publ.name) + 1);
+			}
+		}
+	}
+	read_unlock_bh(&tipc_net_lock);
+	return buf;
+}
+
+void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest)
+{
+	tipc_nmap_add(&b_ptr->nodes, dest);
+	tipc_disc_update_link_req(b_ptr->link_req);
+	tipc_bcbearer_sort();
+}
+
+void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest)
+{
+	tipc_nmap_remove(&b_ptr->nodes, dest);
+	tipc_disc_update_link_req(b_ptr->link_req);
+	tipc_bcbearer_sort();
+}
+
+/*
+ * bearer_push(): Resolve bearer congestion. Force the waiting
+ * links to push out their unsent packets, one packet per link
+ * per iteration, until all packets are gone or congestion reoccurs.
+ * 'tipc_net_lock' is read_locked when this function is called
+ * bearer.lock must be taken before calling
+ * Returns binary true(1) ore false(0)
+ */
+static int bearer_push(struct bearer *b_ptr)
+{
+	u32 res = TIPC_OK;
+	struct link *ln, *tln;
+
+	if (b_ptr->publ.blocked)
+		return 0;
+
+	while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) {
+		list_for_each_entry_safe(ln, tln, &b_ptr->cong_links, link_list) {
+			res = tipc_link_push_packet(ln);
+			if (res == PUSH_FAILED)
+				break;
+			if (res == PUSH_FINISHED)
+				list_move_tail(&ln->link_list, &b_ptr->links);
+		}
+	}
+	return list_empty(&b_ptr->cong_links);
+}
+
+void tipc_bearer_lock_push(struct bearer *b_ptr)
+{
+	int res;
+
+	spin_lock_bh(&b_ptr->publ.lock);
+	res = bearer_push(b_ptr);
+	spin_unlock_bh(&b_ptr->publ.lock);
+	if (res)
+		tipc_bcbearer_push();
+}
+
+
+/*
+ * Interrupt enabling new requests after bearer congestion or blocking:    
+ * See bearer_send().   
+ */
+void tipc_continue(struct tipc_bearer *tb_ptr)
+{
+	struct bearer *b_ptr = (struct bearer *)tb_ptr;
+
+	spin_lock_bh(&b_ptr->publ.lock);
+	b_ptr->continue_count++;
+	if (!list_empty(&b_ptr->cong_links))
+		tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr);
+	b_ptr->publ.blocked = 0;
+	spin_unlock_bh(&b_ptr->publ.lock);
+}
+
+/*
+ * Schedule link for sending of messages after the bearer 
+ * has been deblocked by 'continue()'. This method is called 
+ * when somebody tries to send a message via this link while 
+ * the bearer is congested. 'tipc_net_lock' is in read_lock here
+ * bearer.lock is busy
+ */
+
+static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_ptr)
+{
+	list_move_tail(&l_ptr->link_list, &b_ptr->cong_links);
+}
+
+/*
+ * Schedule link for sending of messages after the bearer 
+ * has been deblocked by 'continue()'. This method is called 
+ * when somebody tries to send a message via this link while 
+ * the bearer is congested. 'tipc_net_lock' is in read_lock here,
+ * bearer.lock is free
+ */
+
+void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr)
+{
+	spin_lock_bh(&b_ptr->publ.lock);
+	tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
+	spin_unlock_bh(&b_ptr->publ.lock);
+}
+
+
+/*
+ * tipc_bearer_resolve_congestion(): Check if there is bearer congestion,
+ * and if there is, try to resolve it before returning.
+ * 'tipc_net_lock' is read_locked when this function is called
+ */
+int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
+{
+	int res = 1;
+
+	if (list_empty(&b_ptr->cong_links))
+		return 1;
+	spin_lock_bh(&b_ptr->publ.lock);
+	if (!bearer_push(b_ptr)) {
+		tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
+		res = 0;
+	}
+	spin_unlock_bh(&b_ptr->publ.lock);
+	return res;
+}
+
+
+/**
+ * tipc_enable_bearer - enable bearer with the given name
+ */              
+
+int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
+{
+	struct bearer *b_ptr;
+	struct media *m_ptr;
+	struct bearer_name b_name;
+	char addr_string[16];
+	u32 bearer_id;
+	u32 with_this_prio;
+	u32 i;
+	int res = -EINVAL;
+
+	if (tipc_mode != TIPC_NET_MODE)
+		return -ENOPROTOOPT;
+
+	if (!bearer_name_validate(name, &b_name) ||
+	    !tipc_addr_domain_valid(bcast_scope) ||
+	    !in_scope(bcast_scope, tipc_own_addr))
+		return -EINVAL;
+
+	if ((priority < TIPC_MIN_LINK_PRI ||
+	     priority > TIPC_MAX_LINK_PRI) &&
+	    (priority != TIPC_MEDIA_LINK_PRI))
+		return -EINVAL;
+
+	write_lock_bh(&tipc_net_lock);
+	if (!tipc_bearers)
+		goto failed;
+
+	m_ptr = media_find(b_name.media_name);
+	if (!m_ptr) {
+		warn("No media <%s>\n", b_name.media_name);
+		goto failed;
+	}
+
+	if (priority == TIPC_MEDIA_LINK_PRI)
+		priority = m_ptr->priority;
+
+restart:
+	bearer_id = MAX_BEARERS;
+	with_this_prio = 1;
+	for (i = MAX_BEARERS; i-- != 0; ) {
+		if (!tipc_bearers[i].active) {
+			bearer_id = i;
+			continue;
+		}
+		if (!strcmp(name, tipc_bearers[i].publ.name)) {
+			warn("Bearer <%s> already enabled\n", name);
+			goto failed;
+		}
+		if ((tipc_bearers[i].priority == priority) &&
+		    (++with_this_prio > 2)) {
+			if (priority-- == 0) {
+				warn("Third bearer <%s> with priority %u, unable to lower to %u\n",
+				     name, priority + 1, priority);
+				goto failed;
+			}
+			warn("Third bearer <%s> with priority %u, lowering to %u\n",
+			     name, priority + 1, priority);
+			goto restart;
+		}
+	}
+	if (bearer_id >= MAX_BEARERS) {
+		warn("Attempt to enable more than %d bearers\n", MAX_BEARERS);
+		goto failed;
+	}
+
+	b_ptr = &tipc_bearers[bearer_id];
+	memset(b_ptr, 0, sizeof(struct bearer));
+
+	strcpy(b_ptr->publ.name, name);
+	res = m_ptr->enable_bearer(&b_ptr->publ);
+	if (res) {
+		warn("Failed to enable bearer <%s>\n", name);
+		goto failed;
+	}
+
+	b_ptr->identity = bearer_id;
+	b_ptr->media = m_ptr;
+	b_ptr->net_plane = bearer_id + 'A';
+	b_ptr->active = 1;
+	b_ptr->detect_scope = bcast_scope;
+	b_ptr->priority = priority;
+	INIT_LIST_HEAD(&b_ptr->cong_links);
+	INIT_LIST_HEAD(&b_ptr->links);
+	if (m_ptr->bcast) {
+		b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr,
+							  bcast_scope, 2);
+	}
+	b_ptr->publ.lock = SPIN_LOCK_UNLOCKED;
+	write_unlock_bh(&tipc_net_lock);
+	info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
+	     name, addr_string_fill(addr_string, bcast_scope), priority);
+	return 0;
+failed:
+	write_unlock_bh(&tipc_net_lock);
+	return res;
+}
+
+/**
+ * tipc_block_bearer(): Block the bearer with the given name,
+ *                      and reset all its links
+ */
+
+int tipc_block_bearer(const char *name)
+{
+	struct bearer *b_ptr = 0;
+	struct link *l_ptr;
+	struct link *temp_l_ptr;
+
+	if (tipc_mode != TIPC_NET_MODE)
+		return -ENOPROTOOPT;
+
+	read_lock_bh(&tipc_net_lock);
+	b_ptr = bearer_find(name);
+	if (!b_ptr) {
+		warn("Attempt to block unknown bearer <%s>\n", name);
+		read_unlock_bh(&tipc_net_lock);
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&b_ptr->publ.lock);
+	b_ptr->publ.blocked = 1;
+	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
+		struct node *n_ptr = l_ptr->owner;
+
+		spin_lock_bh(&n_ptr->lock);
+		tipc_link_reset(l_ptr);
+		spin_unlock_bh(&n_ptr->lock);
+	}
+	spin_unlock_bh(&b_ptr->publ.lock);
+	read_unlock_bh(&tipc_net_lock);
+	info("Blocked bearer <%s>\n", name);
+	return TIPC_OK;
+}
+
+/**
+ * bearer_disable -
+ * 
+ * Note: This routine assumes caller holds tipc_net_lock.
+ */
+
+static int bearer_disable(const char *name)
+{
+	struct bearer *b_ptr;
+	struct link *l_ptr;
+	struct link *temp_l_ptr;
+
+	if (tipc_mode != TIPC_NET_MODE)
+		return -ENOPROTOOPT;
+
+	b_ptr = bearer_find(name);
+	if (!b_ptr) {
+		warn("Attempt to disable unknown bearer <%s>\n", name);
+		return -EINVAL;
+	}
+
+	tipc_disc_stop_link_req(b_ptr->link_req);
+	spin_lock_bh(&b_ptr->publ.lock);
+	b_ptr->link_req = NULL;
+	b_ptr->publ.blocked = 1;
+	if (b_ptr->media->disable_bearer) {
+		spin_unlock_bh(&b_ptr->publ.lock);
+		write_unlock_bh(&tipc_net_lock);
+		b_ptr->media->disable_bearer(&b_ptr->publ);
+		write_lock_bh(&tipc_net_lock);
+		spin_lock_bh(&b_ptr->publ.lock);
+	}
+	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
+		tipc_link_delete(l_ptr);
+	}
+	spin_unlock_bh(&b_ptr->publ.lock);
+	info("Disabled bearer <%s>\n", name);
+	memset(b_ptr, 0, sizeof(struct bearer));
+	return TIPC_OK;
+}
+
+int tipc_disable_bearer(const char *name)
+{
+	int res;
+
+	write_lock_bh(&tipc_net_lock);
+	res = bearer_disable(name);
+	write_unlock_bh(&tipc_net_lock);
+	return res;
+}
+
+
+
+int tipc_bearer_init(void)
+{
+	int res;
+
+	write_lock_bh(&tipc_net_lock);
+	tipc_bearers = kmalloc(MAX_BEARERS * sizeof(struct bearer), GFP_ATOMIC);
+	media_list = kmalloc(MAX_MEDIA * sizeof(struct media), GFP_ATOMIC);
+	if (tipc_bearers && media_list) {
+		memset(tipc_bearers, 0, MAX_BEARERS * sizeof(struct bearer));
+		memset(media_list, 0, MAX_MEDIA * sizeof(struct media));
+		res = TIPC_OK;
+	} else {
+		kfree(tipc_bearers);
+		kfree(media_list);
+		tipc_bearers = 0;
+		media_list = 0;
+		res = -ENOMEM;
+	}
+	write_unlock_bh(&tipc_net_lock);
+	return res;
+}
+
+void tipc_bearer_stop(void)
+{
+	u32 i;
+
+	if (!tipc_bearers)
+		return;
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		if (tipc_bearers[i].active)
+			tipc_bearers[i].publ.blocked = 1;
+	}
+	for (i = 0; i < MAX_BEARERS; i++) {
+		if (tipc_bearers[i].active)
+			bearer_disable(tipc_bearers[i].publ.name);
+	}
+	kfree(tipc_bearers);
+	kfree(media_list);
+	tipc_bearers = 0;
+	media_list = 0;
+	media_count = 0;
+}
+
+
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
new file mode 100644
index 00000000000..c4e7c1c3655
--- /dev/null
+++ b/net/tipc/bearer.h
@@ -0,0 +1,170 @@
+/*
+ * net/tipc/bearer.h: Include file for TIPC bearer code
+ * 
+ * Copyright (c) 1996-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_BEARER_H
+#define _TIPC_BEARER_H
+
+#include "core.h"
+#include "bcast.h"
+
+#define MAX_BEARERS 8
+#define MAX_MEDIA 4
+
+
+/**
+ * struct media - TIPC media information available to internal users
+ * @send_msg: routine which handles buffer transmission
+ * @enable_bearer: routine which enables a bearer
+ * @disable_bearer: routine which disables a bearer
+ * @addr2str: routine which converts bearer's address to string form
+ * @bcast_addr: media address used in broadcasting
+ * @bcast: non-zero if media supports broadcasting [currently mandatory]
+ * @priority: default link (and bearer) priority
+ * @tolerance: default time (in ms) before declaring link failure
+ * @window: default window (in packets) before declaring link congestion
+ * @type_id: TIPC media identifier [defined in tipc_bearer.h]
+ * @name: media name
+ */
+ 
+struct media {
+	int (*send_msg)(struct sk_buff *buf, 
+			struct tipc_bearer *b_ptr,
+			struct tipc_media_addr *dest);
+	int (*enable_bearer)(struct tipc_bearer *b_ptr);
+	void (*disable_bearer)(struct tipc_bearer *b_ptr);
+	char *(*addr2str)(struct tipc_media_addr *a, 
+			  char *str_buf, int str_size);
+	struct tipc_media_addr bcast_addr;
+	int bcast;
+	u32 priority;
+	u32 tolerance;
+	u32 window;
+	u32 type_id;
+	char name[TIPC_MAX_MEDIA_NAME];
+};
+
+/**
+ * struct bearer - TIPC bearer information available to internal users
+ * @publ: bearer information available to privileged users
+ * @media: ptr to media structure associated with bearer
+ * @priority: default link priority for bearer
+ * @detect_scope: network address mask used during automatic link creation
+ * @identity: array index of this bearer within TIPC bearer array
+ * @link_req: ptr to (optional) structure making periodic link setup requests
+ * @links: list of non-congested links associated with bearer
+ * @cong_links: list of congested links associated with bearer
+ * @continue_count: # of times bearer has resumed after congestion or blocking
+ * @active: non-zero if bearer structure is represents a bearer
+ * @net_plane: network plane ('A' through 'H') currently associated with bearer
+ * @nodes: indicates which nodes in cluster can be reached through bearer
+ */
+ 
+struct bearer {
+	struct tipc_bearer publ;
+	struct media *media;
+	u32 priority;
+	u32 detect_scope;
+	u32 identity;
+	struct link_req *link_req;
+	struct list_head links;
+	struct list_head cong_links;
+	u32 continue_count;
+	int active;
+	char net_plane;
+	struct node_map nodes;
+};
+
+struct bearer_name {
+	char media_name[TIPC_MAX_MEDIA_NAME];
+	char if_name[TIPC_MAX_IF_NAME];
+};
+
+struct link;
+
+extern struct bearer *tipc_bearers;
+
+void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
+struct sk_buff *tipc_media_get_names(void);
+
+struct sk_buff *tipc_bearer_get_names(void);
+void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest);
+void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest);
+void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
+struct bearer *tipc_bearer_find_interface(const char *if_name);
+int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
+int tipc_bearer_init(void);
+void tipc_bearer_stop(void);
+void tipc_bearer_lock_push(struct bearer *b_ptr);
+
+
+/**
+ * tipc_bearer_send- sends buffer to destination over bearer 
+ * 
+ * Returns true (1) if successful, or false (0) if unable to send
+ * 
+ * IMPORTANT:
+ * The media send routine must not alter the buffer being passed in
+ * as it may be needed for later retransmission!
+ * 
+ * If the media send routine returns a non-zero value (indicating that 
+ * it was unable to send the buffer), it must:
+ *   1) mark the bearer as blocked,
+ *   2) call tipc_continue() once the bearer is able to send again.
+ * Media types that are unable to meet these two critera must ensure their
+ * send routine always returns success -- even if the buffer was not sent --
+ * and let TIPC's link code deal with the undelivered message. 
+ */
+
+static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf,
+				   struct tipc_media_addr *dest)
+{
+	return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest);
+}
+
+/**
+ * tipc_bearer_congested - determines if bearer is currently congested
+ */
+
+static inline int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
+{
+	if (unlikely(b_ptr->publ.blocked))
+		return 1;
+	if (likely(list_empty(&b_ptr->cong_links)))
+		return 0;
+	return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
+}
+
+#endif
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
new file mode 100644
index 00000000000..ab974ca1937
--- /dev/null
+++ b/net/tipc/cluster.c
@@ -0,0 +1,576 @@
+/*
+ * net/tipc/cluster.c: TIPC cluster management routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "cluster.h"
+#include "addr.h"
+#include "node_subscr.h"
+#include "link.h"
+#include "node.h"
+#include "net.h"
+#include "msg.h"
+#include "bearer.h"
+
+void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, 
+			 u32 lower, u32 upper);
+struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest);
+
+struct node **tipc_local_nodes = 0;
+struct node_map tipc_cltr_bcast_nodes = {0,{0,}};
+u32 tipc_highest_allowed_slave = 0;
+
+struct cluster *tipc_cltr_create(u32 addr)
+{
+	struct _zone *z_ptr;
+	struct cluster *c_ptr;
+	int max_nodes; 
+	int alloc;
+
+	c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC);
+	if (c_ptr == NULL)
+		return 0;
+	memset(c_ptr, 0, sizeof(*c_ptr));
+
+	c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
+	if (in_own_cluster(addr))
+		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
+	else
+		max_nodes = tipc_max_nodes + 1;
+	alloc = sizeof(void *) * (max_nodes + 1);
+	c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC);
+	if (c_ptr->nodes == NULL) {
+		kfree(c_ptr);
+		return 0;
+	}
+	memset(c_ptr->nodes, 0, alloc);  
+	if (in_own_cluster(addr))
+		tipc_local_nodes = c_ptr->nodes;
+	c_ptr->highest_slave = LOWEST_SLAVE - 1;
+	c_ptr->highest_node = 0;
+	
+	z_ptr = tipc_zone_find(tipc_zone(addr));
+	if (z_ptr == NULL) {
+		z_ptr = tipc_zone_create(addr);
+	}
+	if (z_ptr != NULL) {
+		tipc_zone_attach_cluster(z_ptr, c_ptr);
+		c_ptr->owner = z_ptr;
+	}
+	else {
+		kfree(c_ptr);
+		c_ptr = 0;
+	}
+
+	return c_ptr;
+}
+
+void tipc_cltr_delete(struct cluster *c_ptr)
+{
+	u32 n_num;
+
+	if (!c_ptr)
+		return;
+	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
+		tipc_node_delete(c_ptr->nodes[n_num]);
+	}
+	for (n_num = LOWEST_SLAVE; n_num <= c_ptr->highest_slave; n_num++) {
+		tipc_node_delete(c_ptr->nodes[n_num]);
+	}
+	kfree(c_ptr->nodes);
+	kfree(c_ptr);
+}
+
+u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr)
+{
+	struct node *n_ptr;
+	u32 n_num = tipc_node(addr) + 1;
+
+	if (!c_ptr)
+		return addr;
+	for (; n_num <= c_ptr->highest_node; n_num++) {
+		n_ptr = c_ptr->nodes[n_num];
+		if (n_ptr && tipc_node_has_active_links(n_ptr))
+			return n_ptr->addr;
+	}
+	for (n_num = 1; n_num < tipc_node(addr); n_num++) {
+		n_ptr = c_ptr->nodes[n_num];
+		if (n_ptr && tipc_node_has_active_links(n_ptr))
+			return n_ptr->addr;
+	}
+	return 0;
+}
+
+void tipc_cltr_attach_node(struct cluster *c_ptr, struct node *n_ptr)
+{
+	u32 n_num = tipc_node(n_ptr->addr);
+	u32 max_n_num = tipc_max_nodes;
+
+	if (in_own_cluster(n_ptr->addr))
+		max_n_num = tipc_highest_allowed_slave;
+	assert(n_num > 0);
+	assert(n_num <= max_n_num);
+	assert(c_ptr->nodes[n_num] == 0);
+	c_ptr->nodes[n_num] = n_ptr;
+	if (n_num > c_ptr->highest_node)
+		c_ptr->highest_node = n_num;
+}
+
+/**
+ * tipc_cltr_select_router - select router to a cluster
+ * 
+ * Uses deterministic and fair algorithm.
+ */
+
+u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref)
+{
+	u32 n_num;
+	u32 ulim = c_ptr->highest_node;
+	u32 mask;
+	u32 tstart;
+
+	assert(!in_own_cluster(c_ptr->addr));
+	if (!ulim)
+		return 0;
+
+	/* Start entry must be random */
+	mask = tipc_max_nodes;
+	while (mask > ulim)
+		mask >>= 1;
+	tstart = ref & mask;
+	n_num = tstart;
+
+	/* Lookup upwards with wrap-around */
+	do {
+		if (tipc_node_is_up(c_ptr->nodes[n_num]))
+			break;
+	} while (++n_num <= ulim);
+	if (n_num > ulim) {
+		n_num = 1;
+		do {
+			if (tipc_node_is_up(c_ptr->nodes[n_num]))
+				break;
+		} while (++n_num < tstart);
+		if (n_num == tstart)
+			return 0;
+	}
+	assert(n_num <= ulim);
+	return tipc_node_select_router(c_ptr->nodes[n_num], ref);
+}
+
+/**
+ * tipc_cltr_select_node - select destination node within a remote cluster
+ * 
+ * Uses deterministic and fair algorithm.
+ */
+
+struct node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
+{
+	u32 n_num;
+	u32 mask = tipc_max_nodes;
+	u32 start_entry;
+
+	assert(!in_own_cluster(c_ptr->addr));
+	if (!c_ptr->highest_node)
+		return 0;
+
+	/* Start entry must be random */
+	while (mask > c_ptr->highest_node) {
+		mask >>= 1;
+	}
+	start_entry = (selector & mask) ? selector & mask : 1u;
+	assert(start_entry <= c_ptr->highest_node);
+
+	/* Lookup upwards with wrap-around */
+	for (n_num = start_entry; n_num <= c_ptr->highest_node; n_num++) {
+		if (tipc_node_has_active_links(c_ptr->nodes[n_num]))
+			return c_ptr->nodes[n_num];
+	}
+	for (n_num = 1; n_num < start_entry; n_num++) {
+		if (tipc_node_has_active_links(c_ptr->nodes[n_num]))
+			return c_ptr->nodes[n_num];
+	}
+	return 0;
+}
+
+/*
+ *    Routing table management: See description in node.c
+ */
+
+struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
+{
+	u32 size = INT_H_SIZE + data_size;
+	struct sk_buff *buf = buf_acquire(size);
+	struct tipc_msg *msg;
+
+	if (buf) {
+		msg = buf_msg(buf);
+		memset((char *)msg, 0, size);
+		msg_init(msg, ROUTE_DISTRIBUTOR, 0, TIPC_OK, INT_H_SIZE, dest);
+	}
+	return buf;
+}
+
+void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest,
+			     u32 lower, u32 upper)
+{
+	struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr);
+	struct tipc_msg *msg;
+
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_set_remote_node(msg, dest);
+		msg_set_type(msg, ROUTE_ADDITION);
+		tipc_cltr_multicast(c_ptr, buf, lower, upper);
+	} else {
+		warn("Memory squeeze: broadcast of new route failed\n");
+	}
+}
+
+void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest,
+				u32 lower, u32 upper)
+{
+	struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr);
+	struct tipc_msg *msg;
+
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_set_remote_node(msg, dest);
+		msg_set_type(msg, ROUTE_REMOVAL);
+		tipc_cltr_multicast(c_ptr, buf, lower, upper);
+	} else {
+		warn("Memory squeeze: broadcast of lost route failed\n");
+	}
+}
+
+void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest)
+{
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+	u32 highest = c_ptr->highest_slave;
+	u32 n_num;
+	int send = 0;
+
+	assert(!is_slave(dest));
+	assert(in_own_cluster(dest));
+	assert(in_own_cluster(c_ptr->addr));
+	if (highest <= LOWEST_SLAVE)
+		return;
+	buf = tipc_cltr_prepare_routing_msg(highest - LOWEST_SLAVE + 1,
+					    c_ptr->addr);
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_set_remote_node(msg, c_ptr->addr);
+		msg_set_type(msg, SLAVE_ROUTING_TABLE);
+		for (n_num = LOWEST_SLAVE; n_num <= highest; n_num++) {
+			if (c_ptr->nodes[n_num] && 
+			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
+				send = 1;
+				msg_set_dataoctet(msg, n_num);
+			}
+		}
+		if (send)
+			tipc_link_send(buf, dest, dest);
+		else
+			buf_discard(buf);
+	} else {
+		warn("Memory squeeze: broadcast of lost route failed\n");
+	}
+}
+
+void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
+{
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+	u32 highest = c_ptr->highest_node;
+	u32 n_num;
+	int send = 0;
+
+	if (in_own_cluster(c_ptr->addr))
+		return;
+	assert(!is_slave(dest));
+	assert(in_own_cluster(dest));
+	highest = c_ptr->highest_node;
+	buf = tipc_cltr_prepare_routing_msg(highest + 1, c_ptr->addr);
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_set_remote_node(msg, c_ptr->addr);
+		msg_set_type(msg, EXT_ROUTING_TABLE);
+		for (n_num = 1; n_num <= highest; n_num++) {
+			if (c_ptr->nodes[n_num] && 
+			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
+				send = 1;
+				msg_set_dataoctet(msg, n_num);
+			}
+		}
+		if (send)
+			tipc_link_send(buf, dest, dest);
+		else
+			buf_discard(buf);
+	} else {
+		warn("Memory squeeze: broadcast of external route failed\n");
+	}
+}
+
+void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest)
+{
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+	u32 highest = c_ptr->highest_node;
+	u32 n_num;
+	int send = 0;
+
+	assert(is_slave(dest));
+	assert(in_own_cluster(c_ptr->addr));
+	buf = tipc_cltr_prepare_routing_msg(highest, c_ptr->addr);
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_set_remote_node(msg, c_ptr->addr);
+		msg_set_type(msg, LOCAL_ROUTING_TABLE);
+		for (n_num = 1; n_num <= highest; n_num++) {
+			if (c_ptr->nodes[n_num] && 
+			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
+				send = 1;
+				msg_set_dataoctet(msg, n_num);
+			}
+		}
+		if (send)
+			tipc_link_send(buf, dest, dest);
+		else
+			buf_discard(buf);
+	} else {
+		warn("Memory squeeze: broadcast of local route failed\n");
+	}
+}
+
+void tipc_cltr_recv_routing_table(struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	struct cluster *c_ptr;
+	struct node *n_ptr;
+	unchar *node_table;
+	u32 table_size;
+	u32 router;
+	u32 rem_node = msg_remote_node(msg);
+	u32 z_num;
+	u32 c_num;
+	u32 n_num;
+
+	c_ptr = tipc_cltr_find(rem_node);
+	if (!c_ptr) {
+		c_ptr = tipc_cltr_create(rem_node);
+		if (!c_ptr) {
+			buf_discard(buf);
+			return;
+		}
+	}
+
+	node_table = buf->data + msg_hdr_sz(msg);
+	table_size = msg_size(msg) - msg_hdr_sz(msg);
+	router = msg_prevnode(msg);
+	z_num = tipc_zone(rem_node);
+	c_num = tipc_cluster(rem_node);
+
+	switch (msg_type(msg)) {
+	case LOCAL_ROUTING_TABLE:
+		assert(is_slave(tipc_own_addr));
+	case EXT_ROUTING_TABLE:
+		for (n_num = 1; n_num < table_size; n_num++) {
+			if (node_table[n_num]) {
+				u32 addr = tipc_addr(z_num, c_num, n_num);
+				n_ptr = c_ptr->nodes[n_num];
+				if (!n_ptr) {
+					n_ptr = tipc_node_create(addr);
+				}
+				if (n_ptr)
+					tipc_node_add_router(n_ptr, router);
+			}
+		}
+		break;
+	case SLAVE_ROUTING_TABLE:
+		assert(!is_slave(tipc_own_addr));
+		assert(in_own_cluster(c_ptr->addr));
+		for (n_num = 1; n_num < table_size; n_num++) {
+			if (node_table[n_num]) {
+				u32 slave_num = n_num + LOWEST_SLAVE;
+				u32 addr = tipc_addr(z_num, c_num, slave_num);
+				n_ptr = c_ptr->nodes[slave_num];
+				if (!n_ptr) {
+					n_ptr = tipc_node_create(addr);
+				}
+				if (n_ptr)
+					tipc_node_add_router(n_ptr, router);
+			}
+		}
+		break;
+	case ROUTE_ADDITION:
+		if (!is_slave(tipc_own_addr)) {
+			assert(!in_own_cluster(c_ptr->addr)
+			       || is_slave(rem_node));
+		} else {
+			assert(in_own_cluster(c_ptr->addr)
+			       && !is_slave(rem_node));
+		}
+		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
+		if (!n_ptr)
+			n_ptr = tipc_node_create(rem_node);
+		if (n_ptr)
+			tipc_node_add_router(n_ptr, router);
+		break;
+	case ROUTE_REMOVAL:
+		if (!is_slave(tipc_own_addr)) {
+			assert(!in_own_cluster(c_ptr->addr)
+			       || is_slave(rem_node));
+		} else {
+			assert(in_own_cluster(c_ptr->addr)
+			       && !is_slave(rem_node));
+		}
+		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
+		if (n_ptr)
+			tipc_node_remove_router(n_ptr, router);
+		break;
+	default:
+		assert(!"Illegal routing manager message received\n");
+	}
+	buf_discard(buf);
+}
+
+void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router)
+{
+	u32 start_entry;
+	u32 tstop;
+	u32 n_num;
+
+	if (is_slave(router))
+		return;	/* Slave nodes can not be routers */
+
+	if (in_own_cluster(c_ptr->addr)) {
+		start_entry = LOWEST_SLAVE;
+		tstop = c_ptr->highest_slave;
+	} else {
+		start_entry = 1;
+		tstop = c_ptr->highest_node;
+	}
+
+	for (n_num = start_entry; n_num <= tstop; n_num++) {
+		if (c_ptr->nodes[n_num]) {
+			tipc_node_remove_router(c_ptr->nodes[n_num], router);
+		}
+	}
+}
+
+/**
+ * tipc_cltr_multicast - multicast message to local nodes 
+ */
+
+void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, 
+			 u32 lower, u32 upper)
+{
+	struct sk_buff *buf_copy;
+	struct node *n_ptr;
+	u32 n_num;
+	u32 tstop;
+
+	assert(lower <= upper);
+	assert(((lower >= 1) && (lower <= tipc_max_nodes)) ||
+	       ((lower >= LOWEST_SLAVE) && (lower <= tipc_highest_allowed_slave)));
+	assert(((upper >= 1) && (upper <= tipc_max_nodes)) ||
+	       ((upper >= LOWEST_SLAVE) && (upper <= tipc_highest_allowed_slave)));
+	assert(in_own_cluster(c_ptr->addr));
+
+	tstop = is_slave(upper) ? c_ptr->highest_slave : c_ptr->highest_node;
+	if (tstop > upper)
+		tstop = upper;
+	for (n_num = lower; n_num <= tstop; n_num++) {
+		n_ptr = c_ptr->nodes[n_num];
+		if (n_ptr && tipc_node_has_active_links(n_ptr)) {
+			buf_copy = skb_copy(buf, GFP_ATOMIC);
+			if (buf_copy == NULL)
+				break;
+			msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
+			tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
+		}
+	}
+	buf_discard(buf);
+}
+
+/**
+ * tipc_cltr_broadcast - broadcast message to all nodes within cluster
+ */
+
+void tipc_cltr_broadcast(struct sk_buff *buf)
+{
+	struct sk_buff *buf_copy;
+	struct cluster *c_ptr;
+	struct node *n_ptr;
+	u32 n_num;
+	u32 tstart;
+	u32 tstop;
+	u32 node_type;
+
+	if (tipc_mode == TIPC_NET_MODE) {
+		c_ptr = tipc_cltr_find(tipc_own_addr);
+		assert(in_own_cluster(c_ptr->addr));	/* For now */
+
+		/* Send to standard nodes, then repeat loop sending to slaves */
+		tstart = 1;
+		tstop = c_ptr->highest_node;
+		for (node_type = 1; node_type <= 2; node_type++) {
+			for (n_num = tstart; n_num <= tstop; n_num++) {
+				n_ptr = c_ptr->nodes[n_num];
+				if (n_ptr && tipc_node_has_active_links(n_ptr)) {
+					buf_copy = skb_copy(buf, GFP_ATOMIC);
+					if (buf_copy == NULL)
+						goto exit;
+					msg_set_destnode(buf_msg(buf_copy), 
+							 n_ptr->addr);
+					tipc_link_send(buf_copy, n_ptr->addr, 
+						       n_ptr->addr);
+				}
+			}
+			tstart = LOWEST_SLAVE;
+			tstop = c_ptr->highest_slave;
+		}
+	}
+exit:
+	buf_discard(buf);
+}
+
+int tipc_cltr_init(void)
+{
+	tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves;
+	return tipc_cltr_create(tipc_own_addr) ? TIPC_OK : -ENOMEM;
+}
+
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
new file mode 100644
index 00000000000..9963642e105
--- /dev/null
+++ b/net/tipc/cluster.h
@@ -0,0 +1,92 @@
+/*
+ * net/tipc/cluster.h: Include file for TIPC cluster management routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_CLUSTER_H
+#define _TIPC_CLUSTER_H
+
+#include "addr.h"
+#include "zone.h"
+
+#define LOWEST_SLAVE  2048u
+
+/**
+ * struct cluster - TIPC cluster structure
+ * @addr: network address of cluster
+ * @owner: pointer to zone that cluster belongs to
+ * @nodes: array of pointers to all nodes within cluster
+ * @highest_node: id of highest numbered node within cluster
+ * @highest_slave: (used for secondary node support)
+ */
+ 
+struct cluster {
+	u32 addr;
+	struct _zone *owner;
+	struct node **nodes;
+	u32 highest_node;
+	u32 highest_slave;
+};
+
+
+extern struct node **tipc_local_nodes;
+extern u32 tipc_highest_allowed_slave;
+extern struct node_map tipc_cltr_bcast_nodes;
+
+void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router);
+void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest);
+struct node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector);
+u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref);
+void tipc_cltr_recv_routing_table(struct sk_buff *buf);
+struct cluster *tipc_cltr_create(u32 addr);
+void tipc_cltr_delete(struct cluster *c_ptr);
+void tipc_cltr_attach_node(struct cluster *c_ptr, struct node *n_ptr);
+void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
+void tipc_cltr_broadcast(struct sk_buff *buf);
+int tipc_cltr_init(void);
+u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr);
+void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
+void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest);
+void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
+
+static inline struct cluster *tipc_cltr_find(u32 addr)
+{
+	struct _zone *z_ptr = tipc_zone_find(addr);
+
+	if (z_ptr)
+		return z_ptr->clusters[1];
+	return 0;
+}
+
+#endif
diff --git a/net/tipc/config.c b/net/tipc/config.c
new file mode 100644
index 00000000000..3c8e6740e5a
--- /dev/null
+++ b/net/tipc/config.c
@@ -0,0 +1,718 @@
+/*
+ * net/tipc/config.c: TIPC configuration management code
+ * 
+ * Copyright (c) 2002-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "dbg.h"
+#include "bearer.h"
+#include "port.h"
+#include "link.h"
+#include "zone.h"
+#include "addr.h"
+#include "name_table.h"
+#include "node.h"
+#include "config.h"
+#include "discover.h"
+
+struct subscr_data {
+	char usr_handle[8];
+	u32 domain;
+	u32 port_ref;
+	struct list_head subd_list;
+};
+
+struct manager {
+	u32 user_ref;
+	u32 port_ref;
+	u32 subscr_ref;
+	u32 link_subscriptions;
+	struct list_head link_subscribers;
+};
+
+static struct manager mng = { 0};
+
+static spinlock_t config_lock = SPIN_LOCK_UNLOCKED;
+
+static const void *req_tlv_area;	/* request message TLV area */
+static int req_tlv_space;		/* request message TLV area size */
+static int rep_headroom;		/* reply message headroom to use */
+
+
+void tipc_cfg_link_event(u32 addr, char *name, int up)
+{
+	/* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
+}
+
+
+struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
+{
+	struct sk_buff *buf;
+
+	buf = alloc_skb(rep_headroom + payload_size, GFP_ATOMIC);
+	if (buf)
+		skb_reserve(buf, rep_headroom);
+	return buf;
+}
+
+int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, 
+			void *tlv_data, int tlv_data_size)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)buf->tail;
+	int new_tlv_space = TLV_SPACE(tlv_data_size);
+
+	if (skb_tailroom(buf) < new_tlv_space) {
+		dbg("tipc_cfg_append_tlv unable to append TLV\n");
+		return 0;
+	}
+	skb_put(buf, new_tlv_space);
+	tlv->tlv_type = htons(tlv_type);
+	tlv->tlv_len  = htons(TLV_LENGTH(tlv_data_size));
+	if (tlv_data_size && tlv_data)
+		memcpy(TLV_DATA(tlv), tlv_data, tlv_data_size);
+	return 1;
+}
+
+struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
+{
+	struct sk_buff *buf;
+	u32 value_net;
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value)));
+	if (buf) {
+		value_net = htonl(value);
+		tipc_cfg_append_tlv(buf, tlv_type, &value_net, 
+				    sizeof(value_net));
+	}
+	return buf;
+}
+
+struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
+{
+	struct sk_buff *buf;
+	int string_len = strlen(string) + 1;
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(string_len));
+	if (buf)
+		tipc_cfg_append_tlv(buf, tlv_type, string, string_len);
+	return buf;
+}
+
+
+
+
+#if 0
+
+/* Now obsolete code for handling commands not yet implemented the new way */
+
+int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
+		 char *data,
+		 u32 sz,
+		 u32 *ret_size,
+		 struct tipc_portid *orig)
+{
+	int rv = -EINVAL;
+	u32 cmd = msg->cmd;
+
+	*ret_size = 0;
+	switch (cmd) {
+	case TIPC_REMOVE_LINK:
+	case TIPC_CMD_BLOCK_LINK:
+	case TIPC_CMD_UNBLOCK_LINK:
+		if (!cfg_check_connection(orig))
+			rv = link_control(msg->argv.link_name, msg->cmd, 0);
+		break;
+	case TIPC_ESTABLISH:
+		{
+			int connected;
+
+			tipc_isconnected(mng.conn_port_ref, &connected);
+			if (connected || !orig) {
+				rv = TIPC_FAILURE;
+				break;
+			}
+			rv = tipc_connect2port(mng.conn_port_ref, orig);
+			if (rv == TIPC_OK)
+				orig = 0;
+			break;
+		}
+	case TIPC_GET_PEER_ADDRESS:
+		*ret_size = link_peer_addr(msg->argv.link_name, data, sz);
+		break;
+	case TIPC_GET_ROUTES:
+		rv = TIPC_OK;
+		break;
+	default: {}
+	}
+	if (*ret_size)
+		rv = TIPC_OK;
+	return rv;
+}
+
+static void cfg_cmd_event(struct tipc_cmd_msg *msg,
+			  char *data,
+			  u32 sz,        
+			  struct tipc_portid const *orig)
+{
+	int rv = -EINVAL;
+	struct tipc_cmd_result_msg rmsg;
+	struct iovec msg_sect[2];
+	int *arg;
+
+	msg->cmd = ntohl(msg->cmd);
+
+	cfg_prepare_res_msg(msg->cmd, msg->usr_handle, rv, &rmsg, msg_sect, 
+			    data, 0);
+	if (ntohl(msg->magic) != TIPC_MAGIC)
+		goto exit;
+
+	switch (msg->cmd) {
+	case TIPC_CREATE_LINK:
+		if (!cfg_check_connection(orig))
+			rv = disc_create_link(&msg->argv.create_link);
+		break;
+	case TIPC_LINK_SUBSCRIBE:
+		{
+			struct subscr_data *sub;
+
+			if (mng.link_subscriptions > 64)
+				break;
+			sub = (struct subscr_data *)kmalloc(sizeof(*sub),
+							    GFP_ATOMIC);
+			if (sub == NULL) {
+				warn("Memory squeeze; dropped remote link subscription\n");
+				break;
+			}
+			INIT_LIST_HEAD(&sub->subd_list);
+			tipc_createport(mng.user_ref,
+					(void *)sub,
+					TIPC_HIGH_IMPORTANCE,
+					0,
+					0,
+					(tipc_conn_shutdown_event)cfg_linksubscr_cancel,
+					0,
+					0,
+					(tipc_conn_msg_event)cfg_linksubscr_cancel,
+					0,
+					&sub->port_ref);
+			if (!sub->port_ref) {
+				kfree(sub);
+				break;
+			}
+			memcpy(sub->usr_handle,msg->usr_handle,
+			       sizeof(sub->usr_handle));
+			sub->domain = msg->argv.domain;
+			list_add_tail(&sub->subd_list, &mng.link_subscribers);
+			tipc_connect2port(sub->port_ref, orig);
+			rmsg.retval = TIPC_OK;
+			tipc_send(sub->port_ref, 2u, msg_sect);
+			mng.link_subscriptions++;
+			return;
+		}
+	default:
+		rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
+	}
+	exit:
+	rmsg.result_len = htonl(msg_sect[1].iov_len);
+	rmsg.retval = htonl(rv);
+	tipc_cfg_respond(msg_sect, 2u, orig);
+}
+#endif
+
+static struct sk_buff *cfg_enable_bearer(void)
+{
+	struct tipc_bearer_config *args;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
+	if (tipc_enable_bearer(args->name,
+			       ntohl(args->detect_scope),
+			       ntohl(args->priority)))
+		return tipc_cfg_reply_error_string("unable to enable bearer");
+
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_disable_bearer(void)
+{
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area)))
+		return tipc_cfg_reply_error_string("unable to disable bearer");
+
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_own_addr(void)
+{
+	u32 addr;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	addr = *(u32 *)TLV_DATA(req_tlv_area);
+	addr = ntohl(addr);
+	if (addr == tipc_own_addr)
+		return tipc_cfg_reply_none();
+	if (!tipc_addr_node_valid(addr))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (node address)");
+	if (tipc_own_addr)
+		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+						   " (cannot change node address once assigned)");
+
+	spin_unlock_bh(&config_lock);
+	tipc_core_stop_net();
+	tipc_own_addr = addr;
+	tipc_core_start_net();
+	spin_lock_bh(&config_lock);
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_remote_mng(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	tipc_remote_management = (value != 0);
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_max_publications(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 1, 65535))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (max publications must be 1-65535)");
+	tipc_max_publications = value;
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_max_subscriptions(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 1, 65535))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (max subscriptions must be 1-65535");
+	tipc_max_subscriptions = value;
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_max_ports(void)
+{
+	int orig_mode;
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 127, 65535))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (max ports must be 127-65535)");
+
+	if (value == tipc_max_ports)
+		return tipc_cfg_reply_none();
+
+	if (atomic_read(&tipc_user_count) > 2)
+		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+						   " (cannot change max ports while TIPC users exist)");
+
+	spin_unlock_bh(&config_lock);
+	orig_mode = tipc_get_mode();
+	if (orig_mode == TIPC_NET_MODE)
+		tipc_core_stop_net();
+	tipc_core_stop();
+	tipc_max_ports = value;
+	tipc_core_start();
+	if (orig_mode == TIPC_NET_MODE)
+		tipc_core_start_net();
+	spin_lock_bh(&config_lock);
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *set_net_max(int value, int *parameter)
+{
+	int orig_mode;
+
+	if (value != *parameter) {
+		orig_mode = tipc_get_mode();
+		if (orig_mode == TIPC_NET_MODE)
+			tipc_core_stop_net();
+		*parameter = value;
+		if (orig_mode == TIPC_NET_MODE)
+			tipc_core_start_net();
+	}
+
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_max_zones(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 1, 255))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (max zones must be 1-255)");
+	return set_net_max(value, &tipc_max_zones);
+}
+
+static struct sk_buff *cfg_set_max_clusters(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != 1)
+		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+						   " (max clusters fixed at 1)");
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_max_nodes(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 8, 2047))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (max nodes must be 8-2047)");
+	return set_net_max(value, &tipc_max_nodes);
+}
+
+static struct sk_buff *cfg_set_max_slaves(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != 0)
+		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+						   " (max secondary nodes fixed at 0)");
+	return tipc_cfg_reply_none();
+}
+
+static struct sk_buff *cfg_set_netid(void)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 1, 9999))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (network id must be 1-9999)");
+
+	if (tipc_own_addr)
+		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+						   " (cannot change network id once part of network)");
+	
+	return set_net_max(value, &tipc_net_id);
+}
+
+struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area,
+				int request_space, int reply_headroom)
+{
+	struct sk_buff *rep_tlv_buf;
+
+	spin_lock_bh(&config_lock);
+
+	/* Save request and reply details in a well-known location */
+
+	req_tlv_area = request_area;
+	req_tlv_space = request_space;
+	rep_headroom = reply_headroom;
+
+	/* Check command authorization */
+
+	if (likely(orig_node == tipc_own_addr)) {
+		/* command is permitted */
+	} else if (cmd >= 0x8000) {
+		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+							  " (cannot be done remotely)");
+		goto exit;
+	} else if (!tipc_remote_management) {
+		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE);
+		goto exit;
+	}
+	else if (cmd >= 0x4000) {
+		u32 domain = 0;
+
+		if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) ||
+		    (domain != orig_node)) {
+			rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_ZONE_MSTR);
+			goto exit;
+		}
+	}
+
+	/* Call appropriate processing routine */
+
+	switch (cmd) {
+	case TIPC_CMD_NOOP:
+		rep_tlv_buf = tipc_cfg_reply_none();
+		break;
+	case TIPC_CMD_GET_NODES:
+		rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_GET_LINKS:
+		rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_SHOW_LINK_STATS:
+		rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_RESET_LINK_STATS:
+		rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_SHOW_NAME_TABLE:
+		rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_GET_BEARER_NAMES:
+		rep_tlv_buf = tipc_bearer_get_names();
+		break;
+	case TIPC_CMD_GET_MEDIA_NAMES:
+		rep_tlv_buf = tipc_media_get_names();
+		break;
+	case TIPC_CMD_SHOW_PORTS:
+		rep_tlv_buf = tipc_port_get_ports();
+		break;
+#if 0
+	case TIPC_CMD_SHOW_PORT_STATS:
+		rep_tlv_buf = port_show_stats(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_RESET_PORT_STATS:
+		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED);
+		break;
+#endif
+	case TIPC_CMD_SET_LOG_SIZE:
+		rep_tlv_buf = tipc_log_resize(req_tlv_area, req_tlv_space);
+		break;
+	case TIPC_CMD_DUMP_LOG:
+		rep_tlv_buf = tipc_log_dump();
+		break;
+	case TIPC_CMD_SET_LINK_TOL:
+	case TIPC_CMD_SET_LINK_PRI:
+	case TIPC_CMD_SET_LINK_WINDOW:
+		rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd);
+		break;
+	case TIPC_CMD_ENABLE_BEARER:
+		rep_tlv_buf = cfg_enable_bearer();
+		break;
+	case TIPC_CMD_DISABLE_BEARER:
+		rep_tlv_buf = cfg_disable_bearer();
+		break;
+	case TIPC_CMD_SET_NODE_ADDR:
+		rep_tlv_buf = cfg_set_own_addr();
+		break;
+	case TIPC_CMD_SET_REMOTE_MNG:
+		rep_tlv_buf = cfg_set_remote_mng();
+		break;
+	case TIPC_CMD_SET_MAX_PORTS:
+		rep_tlv_buf = cfg_set_max_ports();
+		break;
+	case TIPC_CMD_SET_MAX_PUBL:
+		rep_tlv_buf = cfg_set_max_publications();
+		break;
+	case TIPC_CMD_SET_MAX_SUBSCR:
+		rep_tlv_buf = cfg_set_max_subscriptions();
+		break;
+	case TIPC_CMD_SET_MAX_ZONES:
+		rep_tlv_buf = cfg_set_max_zones();
+		break;
+	case TIPC_CMD_SET_MAX_CLUSTERS:
+		rep_tlv_buf = cfg_set_max_clusters();
+		break;
+	case TIPC_CMD_SET_MAX_NODES:
+		rep_tlv_buf = cfg_set_max_nodes();
+		break;
+	case TIPC_CMD_SET_MAX_SLAVES:
+		rep_tlv_buf = cfg_set_max_slaves();
+		break;
+	case TIPC_CMD_SET_NETID:
+		rep_tlv_buf = cfg_set_netid();
+		break;
+	case TIPC_CMD_GET_REMOTE_MNG:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_remote_management);
+		break;
+	case TIPC_CMD_GET_MAX_PORTS:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
+		break;
+	case TIPC_CMD_GET_MAX_PUBL:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications);
+		break;
+	case TIPC_CMD_GET_MAX_SUBSCR:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
+		break;
+	case TIPC_CMD_GET_MAX_ZONES:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_zones);
+		break;
+	case TIPC_CMD_GET_MAX_CLUSTERS:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_clusters);
+		break;
+	case TIPC_CMD_GET_MAX_NODES:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
+		break;
+	case TIPC_CMD_GET_MAX_SLAVES:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_slaves);
+		break;
+	case TIPC_CMD_GET_NETID:
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
+		break;
+	default:
+		rep_tlv_buf = NULL;
+		break;
+	}
+
+	/* Return reply buffer */
+exit:
+	spin_unlock_bh(&config_lock);
+	return rep_tlv_buf;
+}
+
+static void cfg_named_msg_event(void *userdata,
+				u32 port_ref,
+				struct sk_buff **buf,
+				const unchar *msg,
+				u32 size,
+				u32 importance, 
+				struct tipc_portid const *orig,
+				struct tipc_name_seq const *dest)
+{
+	struct tipc_cfg_msg_hdr *req_hdr;
+	struct tipc_cfg_msg_hdr *rep_hdr;
+	struct sk_buff *rep_buf;
+
+	/* Validate configuration message header (ignore invalid message) */
+
+	req_hdr = (struct tipc_cfg_msg_hdr *)msg;
+	if ((size < sizeof(*req_hdr)) ||
+	    (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
+	    (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
+		warn("discarded invalid configuration message\n");
+		return;
+	}
+
+	/* Generate reply for request (if can't, return request) */
+
+	rep_buf = tipc_cfg_do_cmd(orig->node,
+				  ntohs(req_hdr->tcm_type), 
+				  msg + sizeof(*req_hdr),
+				  size - sizeof(*req_hdr),
+				  BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr));
+	if (rep_buf) {
+		skb_push(rep_buf, sizeof(*rep_hdr));
+		rep_hdr = (struct tipc_cfg_msg_hdr *)rep_buf->data;
+		memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr));
+		rep_hdr->tcm_len = htonl(rep_buf->len);
+		rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST);
+	} else {
+		rep_buf = *buf;
+		*buf = NULL;
+	}
+
+	/* NEED TO ADD CODE TO HANDLE FAILED SEND (SUCH AS CONGESTION) */
+	tipc_send_buf2port(port_ref, orig, rep_buf, rep_buf->len);
+}
+
+int tipc_cfg_init(void)
+{
+	struct tipc_name_seq seq;
+	int res;
+
+	memset(&mng, 0, sizeof(mng));
+	INIT_LIST_HEAD(&mng.link_subscribers);
+
+	res = tipc_attach(&mng.user_ref, 0, 0);
+	if (res)
+		goto failed;
+
+	res = tipc_createport(mng.user_ref, 0, TIPC_CRITICAL_IMPORTANCE,
+			      NULL, NULL, NULL,
+			      NULL, cfg_named_msg_event, NULL,
+			      NULL, &mng.port_ref);
+	if (res)
+		goto failed;
+
+	seq.type = TIPC_CFG_SRV;
+	seq.lower = seq.upper = tipc_own_addr;
+	res = tipc_nametbl_publish_rsv(mng.port_ref, TIPC_ZONE_SCOPE, &seq);
+	if (res)
+		goto failed;
+
+	return 0;
+
+failed:
+	err("Unable to create configuration service\n");
+	tipc_detach(mng.user_ref);
+	mng.user_ref = 0;
+	return res;
+}
+
+void tipc_cfg_stop(void)
+{
+	if (mng.user_ref) {
+		tipc_detach(mng.user_ref);
+		mng.user_ref = 0;
+	}
+}
diff --git a/net/tipc/config.h b/net/tipc/config.h
new file mode 100644
index 00000000000..7a728f954d8
--- /dev/null
+++ b/net/tipc/config.h
@@ -0,0 +1,79 @@
+/*
+ * net/tipc/config.h: Include file for TIPC configuration service code
+ * 
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_CONFIG_H
+#define _TIPC_CONFIG_H
+
+/* ---------------------------------------------------------------------- */
+
+#include "core.h"
+#include "link.h"
+
+struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
+int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, 
+			void *tlv_data, int tlv_data_size);
+struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value);
+struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string);
+
+static inline struct sk_buff *tipc_cfg_reply_none(void)
+{
+	return tipc_cfg_reply_alloc(0);
+}
+
+static inline struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
+{
+	return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
+}
+
+static inline struct sk_buff *tipc_cfg_reply_error_string(char *string)
+{
+	return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string);
+}
+
+static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string)
+{
+	return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string);
+}
+
+struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, 
+				const void *req_tlv_area, int req_tlv_space, 
+				int headroom);
+
+void tipc_cfg_link_event(u32 addr, char *name, int up);
+int  tipc_cfg_init(void);
+void tipc_cfg_stop(void);
+
+#endif
diff --git a/net/tipc/core.c b/net/tipc/core.c
new file mode 100644
index 00000000000..3d0a8ee4e1d
--- /dev/null
+++ b/net/tipc/core.c
@@ -0,0 +1,284 @@
+/*
+ * net/tipc/core.c: TIPC module code
+ *
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+
+#include "core.h"
+#include "dbg.h"
+#include "ref.h"
+#include "net.h"
+#include "user_reg.h"
+#include "name_table.h"
+#include "subscr.h"
+#include "config.h"
+
+int  tipc_eth_media_start(void);
+void tipc_eth_media_stop(void);
+int  tipc_handler_start(void);
+void tipc_handler_stop(void);
+int  tipc_socket_init(void);
+void tipc_socket_stop(void);
+int  tipc_netlink_start(void);
+void tipc_netlink_stop(void);
+
+#define MOD_NAME "tipc_start: "
+
+#ifndef CONFIG_TIPC_ZONES
+#define CONFIG_TIPC_ZONES 3
+#endif
+
+#ifndef CONFIG_TIPC_CLUSTERS
+#define CONFIG_TIPC_CLUSTERS 1
+#endif
+
+#ifndef CONFIG_TIPC_NODES
+#define CONFIG_TIPC_NODES 255
+#endif
+
+#ifndef CONFIG_TIPC_SLAVE_NODES
+#define CONFIG_TIPC_SLAVE_NODES 0
+#endif
+
+#ifndef CONFIG_TIPC_PORTS
+#define CONFIG_TIPC_PORTS 8191
+#endif
+
+#ifndef CONFIG_TIPC_LOG
+#define CONFIG_TIPC_LOG 0
+#endif
+
+/* global variables used by multiple sub-systems within TIPC */
+
+int tipc_mode = TIPC_NOT_RUNNING;
+int tipc_random;
+atomic_t tipc_user_count = ATOMIC_INIT(0);
+
+const char tipc_alphabet[] = 
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
+
+/* configurable TIPC parameters */
+
+u32 tipc_own_addr;
+int tipc_max_zones;
+int tipc_max_clusters;
+int tipc_max_nodes;
+int tipc_max_slaves;
+int tipc_max_ports;
+int tipc_max_subscriptions;
+int tipc_max_publications;
+int tipc_net_id;
+int tipc_remote_management;
+
+
+int tipc_get_mode(void)
+{
+	return tipc_mode;
+}
+
+/**
+ * tipc_core_stop_net - shut down TIPC networking sub-systems
+ */
+
+void tipc_core_stop_net(void)
+{
+	tipc_eth_media_stop();
+	tipc_net_stop();
+}
+
+/**
+ * start_net - start TIPC networking sub-systems
+ */
+
+int tipc_core_start_net(void)
+{
+	int res;
+
+	if ((res = tipc_net_start()) ||
+	    (res = tipc_eth_media_start())) {
+		tipc_core_stop_net();
+	}
+	return res;
+}
+
+/**
+ * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
+ */
+
+void tipc_core_stop(void)
+{
+	if (tipc_mode != TIPC_NODE_MODE)
+		return;
+
+	tipc_mode = TIPC_NOT_RUNNING;
+
+	tipc_netlink_stop();
+	tipc_handler_stop();
+	tipc_cfg_stop();
+	tipc_subscr_stop();
+	tipc_reg_stop();
+	tipc_nametbl_stop();
+	tipc_ref_table_stop();
+	tipc_socket_stop();
+}
+
+/**
+ * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode
+ */
+
+int tipc_core_start(void)
+{
+	int res;
+
+	if (tipc_mode != TIPC_NOT_RUNNING)
+		return -ENOPROTOOPT;
+
+	get_random_bytes(&tipc_random, sizeof(tipc_random));
+	tipc_mode = TIPC_NODE_MODE;
+
+	if ((res = tipc_handler_start()) || 
+	    (res = tipc_ref_table_init(tipc_max_ports + tipc_max_subscriptions,
+				       tipc_random)) ||
+	    (res = tipc_reg_start()) ||
+	    (res = tipc_nametbl_init()) ||
+            (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) ||
+	    (res = tipc_k_signal((Handler)tipc_cfg_init, 0)) || 
+	    (res = tipc_netlink_start()) ||
+	    (res = tipc_socket_init())) {
+		tipc_core_stop();
+	}
+	return res;
+}
+
+
+static int __init tipc_init(void)
+{
+	int res;
+
+	tipc_log_reinit(CONFIG_TIPC_LOG);
+	info("Activated (compiled " __DATE__ " " __TIME__ ")\n");
+
+	tipc_own_addr = 0;
+	tipc_remote_management = 1;
+	tipc_max_publications = 10000;
+	tipc_max_subscriptions = 2000;
+	tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536);
+	tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 511);
+	tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1);
+	tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047);
+	tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047);
+	tipc_net_id = 4711;
+
+	if ((res = tipc_core_start()))
+		err("Unable to start in single node mode\n");
+	else	
+		info("Started in single node mode\n");
+        return res;
+}
+
+static void __exit tipc_exit(void)
+{
+	tipc_core_stop_net();
+	tipc_core_stop();
+	info("Deactivated\n");
+	tipc_log_stop();
+}
+
+module_init(tipc_init);
+module_exit(tipc_exit);
+
+MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication");
+MODULE_LICENSE("Dual BSD/GPL");
+
+/* Native TIPC API for kernel-space applications (see tipc.h) */
+
+EXPORT_SYMBOL(tipc_attach);
+EXPORT_SYMBOL(tipc_detach);
+EXPORT_SYMBOL(tipc_get_addr);
+EXPORT_SYMBOL(tipc_get_mode);
+EXPORT_SYMBOL(tipc_createport);
+EXPORT_SYMBOL(tipc_deleteport);
+EXPORT_SYMBOL(tipc_ownidentity);
+EXPORT_SYMBOL(tipc_portimportance);
+EXPORT_SYMBOL(tipc_set_portimportance);
+EXPORT_SYMBOL(tipc_portunreliable);
+EXPORT_SYMBOL(tipc_set_portunreliable);
+EXPORT_SYMBOL(tipc_portunreturnable);
+EXPORT_SYMBOL(tipc_set_portunreturnable);
+EXPORT_SYMBOL(tipc_publish);
+EXPORT_SYMBOL(tipc_withdraw);
+EXPORT_SYMBOL(tipc_connect2port);
+EXPORT_SYMBOL(tipc_disconnect);
+EXPORT_SYMBOL(tipc_shutdown);
+EXPORT_SYMBOL(tipc_isconnected);
+EXPORT_SYMBOL(tipc_peer);
+EXPORT_SYMBOL(tipc_ref_valid);
+EXPORT_SYMBOL(tipc_send);
+EXPORT_SYMBOL(tipc_send_buf);
+EXPORT_SYMBOL(tipc_send2name);
+EXPORT_SYMBOL(tipc_forward2name);
+EXPORT_SYMBOL(tipc_send_buf2name);
+EXPORT_SYMBOL(tipc_forward_buf2name);
+EXPORT_SYMBOL(tipc_send2port);
+EXPORT_SYMBOL(tipc_forward2port);
+EXPORT_SYMBOL(tipc_send_buf2port);
+EXPORT_SYMBOL(tipc_forward_buf2port);
+EXPORT_SYMBOL(tipc_multicast);
+/* EXPORT_SYMBOL(tipc_multicast_buf); not available yet */
+EXPORT_SYMBOL(tipc_ispublished);
+EXPORT_SYMBOL(tipc_available_nodes);
+
+/* TIPC API for external bearers (see tipc_bearer.h) */
+
+EXPORT_SYMBOL(tipc_block_bearer);
+EXPORT_SYMBOL(tipc_continue); 
+EXPORT_SYMBOL(tipc_disable_bearer);
+EXPORT_SYMBOL(tipc_enable_bearer);
+EXPORT_SYMBOL(tipc_recv_msg);
+EXPORT_SYMBOL(tipc_register_media); 
+
+/* TIPC API for external APIs (see tipc_port.h) */
+
+EXPORT_SYMBOL(tipc_createport_raw);
+EXPORT_SYMBOL(tipc_set_msg_option);
+EXPORT_SYMBOL(tipc_reject_msg);
+EXPORT_SYMBOL(tipc_send_buf_fast);
+EXPORT_SYMBOL(tipc_acknowledge);
+EXPORT_SYMBOL(tipc_get_port);
+EXPORT_SYMBOL(tipc_get_handle);
+
diff --git a/net/tipc/core.h b/net/tipc/core.h
new file mode 100644
index 00000000000..1f2e8b27a13
--- /dev/null
+++ b/net/tipc/core.h
@@ -0,0 +1,321 @@
+/*
+ * net/tipc/core.h: Include file for TIPC global declarations
+ * 
+ * Copyright (c) 2005-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_CORE_H
+#define _TIPC_CORE_H
+
+#include <linux/tipc.h>
+#include <linux/tipc_config.h>
+#include <net/tipc/tipc_msg.h>
+#include <net/tipc/tipc_port.h>
+#include <net/tipc/tipc_bearer.h>
+#include <net/tipc/tipc.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <asm/uaccess.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>	
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+
+/*
+ * TIPC debugging code
+ */
+
+#define assert(i)  BUG_ON(!(i))
+
+struct tipc_msg;
+extern struct print_buf *TIPC_CONS, *TIPC_LOG;
+extern struct print_buf *TIPC_TEE(struct print_buf *, struct print_buf *);
+void tipc_msg_print(struct print_buf*,struct tipc_msg *,const char*);
+void tipc_printf(struct print_buf *, const char *fmt, ...);
+void tipc_dump(struct print_buf*,const char *fmt, ...);
+
+#ifdef CONFIG_TIPC_DEBUG
+
+/*
+ * TIPC debug support included:
+ * - system messages are printed to TIPC_OUTPUT print buffer
+ * - debug messages are printed to DBG_OUTPUT print buffer
+ */
+
+#define err(fmt, arg...)  tipc_printf(TIPC_OUTPUT, KERN_ERR "TIPC: " fmt, ## arg)
+#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_WARNING "TIPC: " fmt, ## arg)
+#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_NOTICE "TIPC: " fmt, ## arg)
+
+#define dbg(fmt, arg...)  do {if (DBG_OUTPUT) tipc_printf(DBG_OUTPUT, fmt, ## arg);} while(0)
+#define msg_dbg(msg, txt) do {if (DBG_OUTPUT) tipc_msg_print(DBG_OUTPUT, msg, txt);} while(0)
+#define dump(fmt, arg...) do {if (DBG_OUTPUT) tipc_dump(DBG_OUTPUT, fmt, ##arg);} while(0)
+
+
+/*	
+ * By default, TIPC_OUTPUT is defined to be system console and TIPC log buffer,
+ * while DBG_OUTPUT is the null print buffer.  These defaults can be changed
+ * here, or on a per .c file basis, by redefining these symbols.  The following
+ * print buffer options are available:
+ *
+ * NULL				: Output to null print buffer (i.e. print nowhere)
+ * TIPC_CONS			: Output to system console
+ * TIPC_LOG			: Output to TIPC log buffer 
+ * &buf				: Output to user-defined buffer (struct print_buf *)
+ * TIPC_TEE(&buf_a,&buf_b)	: Output to two print buffers (eg. TIPC_TEE(TIPC_CONS,TIPC_LOG) )
+ */
+
+#ifndef TIPC_OUTPUT
+#define TIPC_OUTPUT TIPC_TEE(TIPC_CONS,TIPC_LOG)
+#endif
+
+#ifndef DBG_OUTPUT
+#define DBG_OUTPUT NULL
+#endif
+
+#else
+
+#ifndef DBG_OUTPUT
+#define DBG_OUTPUT NULL
+#endif
+
+/*
+ * TIPC debug support not included:
+ * - system messages are printed to system console
+ * - debug messages are not printed
+ */
+
+#define err(fmt, arg...)  printk(KERN_ERR "TIPC: " fmt , ## arg)
+#define info(fmt, arg...) printk(KERN_INFO "TIPC: " fmt , ## arg)
+#define warn(fmt, arg...) printk(KERN_WARNING "TIPC: " fmt , ## arg)
+
+#define dbg(fmt, arg...) do {} while (0)
+#define msg_dbg(msg,txt) do {} while (0)
+#define dump(fmt,arg...) do {} while (0)
+
+#endif			  
+
+
+/* 
+ * TIPC-specific error codes
+ */
+
+#define ELINKCONG EAGAIN	/* link congestion <=> resource unavailable */
+
+/*
+ * Global configuration variables
+ */
+
+extern u32 tipc_own_addr;
+extern int tipc_max_zones;
+extern int tipc_max_clusters;
+extern int tipc_max_nodes;
+extern int tipc_max_slaves;
+extern int tipc_max_ports;
+extern int tipc_max_subscriptions;
+extern int tipc_max_publications;
+extern int tipc_net_id;
+extern int tipc_remote_management;
+
+/*
+ * Other global variables
+ */
+
+extern int tipc_mode;
+extern int tipc_random;
+extern const char tipc_alphabet[];
+extern atomic_t tipc_user_count;
+
+
+/*
+ * Routines available to privileged subsystems
+ */
+
+extern int  tipc_core_start(void);
+extern void tipc_core_stop(void);
+extern int  tipc_core_start_net(void);
+extern void tipc_core_stop_net(void);
+
+static inline int delimit(int val, int min, int max)
+{
+	if (val > max)
+		return max;
+	if (val < min)
+		return min;
+	return val;
+}
+
+
+/*
+ * TIPC timer and signal code
+ */
+
+typedef void (*Handler) (unsigned long);
+
+u32 tipc_k_signal(Handler routine, unsigned long argument);
+
+/**
+ * k_init_timer - initialize a timer
+ * @timer: pointer to timer structure
+ * @routine: pointer to routine to invoke when timer expires
+ * @argument: value to pass to routine when timer expires
+ * 
+ * Timer must be initialized before use (and terminated when no longer needed).
+ */
+
+static inline void k_init_timer(struct timer_list *timer, Handler routine, 
+				unsigned long argument)
+{
+	dbg("initializing timer %p\n", timer);
+	init_timer(timer);
+	timer->function = routine;
+	timer->data = argument;
+}
+
+/**
+ * k_start_timer - start a timer
+ * @timer: pointer to timer structure
+ * @msec: time to delay (in ms)
+ * 
+ * Schedules a previously initialized timer for later execution.
+ * If timer is already running, the new timeout overrides the previous request.
+ * 
+ * To ensure the timer doesn't expire before the specified delay elapses,
+ * the amount of delay is rounded up when converting to the jiffies
+ * then an additional jiffy is added to account for the fact that 
+ * the starting time may be in the middle of the current jiffy.
+ */
+
+static inline void k_start_timer(struct timer_list *timer, unsigned long msec)
+{
+	dbg("starting timer %p for %u\n", timer, msec);
+	mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1);
+}
+
+/**
+ * k_cancel_timer - cancel a timer
+ * @timer: pointer to timer structure
+ * 
+ * Cancels a previously initialized timer.  
+ * Can be called safely even if the timer is already inactive.
+ * 
+ * WARNING: Must not be called when holding locks required by the timer's
+ *          timeout routine, otherwise deadlock can occur on SMP systems!
+ */
+
+static inline void k_cancel_timer(struct timer_list *timer)
+{
+	dbg("cancelling timer %p\n", timer);
+	del_timer_sync(timer);
+}
+
+/**
+ * k_term_timer - terminate a timer
+ * @timer: pointer to timer structure
+ * 
+ * Prevents further use of a previously initialized timer.
+ * 
+ * WARNING: Caller must ensure timer isn't currently running.
+ * 
+ * (Do not "enhance" this routine to automatically cancel an active timer,
+ * otherwise deadlock can arise when a timeout routine calls k_term_timer.)
+ */
+
+static inline void k_term_timer(struct timer_list *timer)
+{
+	dbg("terminating timer %p\n", timer);
+}
+
+
+/*
+ * TIPC message buffer code
+ *
+ * TIPC message buffer headroom leaves room for 14 byte Ethernet header, 
+ * while ensuring TIPC header is word aligned for quicker access
+ */
+
+#define BUF_HEADROOM 16u 
+
+struct tipc_skb_cb {
+	void *handle;
+};
+
+#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
+
+
+static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
+{
+	return (struct tipc_msg *)skb->data;
+}
+
+/**
+ * buf_acquire - creates a TIPC message buffer
+ * @size: message size (including TIPC header)
+ *
+ * Returns a new buffer.  Space is reserved for a data link header.
+ */
+
+static inline struct sk_buff *buf_acquire(u32 size)
+{
+	struct sk_buff *skb;
+	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
+
+	skb = alloc_skb(buf_size, GFP_ATOMIC);
+	if (skb) {
+		skb_reserve(skb, BUF_HEADROOM);
+		skb_put(skb, size);
+		skb->next = NULL;
+	}
+	return skb;
+}
+
+/**
+ * buf_discard - frees a TIPC message buffer
+ * @skb: message buffer
+ *
+ * Frees a new buffer.  If passed NULL, just returns.
+ */
+
+static inline void buf_discard(struct sk_buff *skb)
+{
+	if (likely(skb != NULL))
+		kfree_skb(skb);
+}
+
+#endif			
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
new file mode 100644
index 00000000000..4f4beefa783
--- /dev/null
+++ b/net/tipc/dbg.c
@@ -0,0 +1,395 @@
+/*
+ * net/tipc/dbg.c: TIPC print buffer routines for debuggign
+ * 
+ * Copyright (c) 1996-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include "dbg.h"
+
+#define MAX_STRING 512
+
+static char print_string[MAX_STRING];
+static spinlock_t print_lock = SPIN_LOCK_UNLOCKED;
+
+static struct print_buf cons_buf = { NULL, 0, NULL, NULL };
+struct print_buf *TIPC_CONS = &cons_buf;
+
+static struct print_buf log_buf = { NULL, 0, NULL, NULL };
+struct print_buf *TIPC_LOG = &log_buf;
+
+
+#define FORMAT(PTR,LEN,FMT) \
+{\
+       va_list args;\
+       va_start(args, FMT);\
+       LEN = vsprintf(PTR, FMT, args);\
+       va_end(args);\
+       *(PTR + LEN) = '\0';\
+}
+
+/*
+ * Locking policy when using print buffers.
+ *
+ * 1) Routines of the form printbuf_XXX() rely on the caller to prevent
+ *    simultaneous use of the print buffer(s) being manipulated.
+ * 2) tipc_printf() uses 'print_lock' to prevent simultaneous use of
+ *    'print_string' and to protect its print buffer(s).
+ * 3) TIPC_TEE() uses 'print_lock' to protect its print buffer(s).
+ * 4) Routines of the form log_XXX() uses 'print_lock' to protect TIPC_LOG.
+ */
+
+/**
+ * tipc_printbuf_init - initialize print buffer to empty
+ */
+
+void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 sz)
+{
+	if (!pb || !raw || (sz < (MAX_STRING + 1)))
+		return;
+
+	pb->crs = pb->buf = raw;
+	pb->size = sz;
+	pb->next = 0;
+	pb->buf[0] = 0;
+	pb->buf[sz-1] = ~0;
+}
+
+/**
+ * tipc_printbuf_reset - reinitialize print buffer to empty state
+ */
+
+void tipc_printbuf_reset(struct print_buf *pb)
+{
+	if (pb && pb->buf)
+		tipc_printbuf_init(pb, pb->buf, pb->size);
+}
+
+/**
+ * tipc_printbuf_empty - test if print buffer is in empty state
+ */
+
+int tipc_printbuf_empty(struct print_buf *pb)
+{
+	return (!pb || !pb->buf || (pb->crs == pb->buf));
+}
+
+/**
+ * tipc_printbuf_validate - check for print buffer overflow
+ * 
+ * Verifies that a print buffer has captured all data written to it. 
+ * If data has been lost, linearize buffer and prepend an error message
+ * 
+ * Returns length of print buffer data string (including trailing NULL)
+ */
+
+int tipc_printbuf_validate(struct print_buf *pb)
+{
+        char *err = "             *** PRINT BUFFER WRAPPED AROUND ***\n";
+        char *cp_buf;
+        struct print_buf cb;
+
+	if (!pb || !pb->buf)
+		return 0;
+
+	if (pb->buf[pb->size - 1] == '\0') {
+                cp_buf = kmalloc(pb->size, GFP_ATOMIC);
+                if (cp_buf != NULL){
+                        tipc_printbuf_init(&cb, cp_buf, pb->size);
+                        tipc_printbuf_move(&cb, pb);
+                        tipc_printbuf_move(pb, &cb);
+                        kfree(cp_buf);
+                        memcpy(pb->buf, err, strlen(err));
+                } else {
+                        tipc_printbuf_reset(pb);
+                        tipc_printf(pb, err);
+                }
+	}
+	return (pb->crs - pb->buf + 1);
+}
+
+/**
+ * tipc_printbuf_move - move print buffer contents to another print buffer
+ * 
+ * Current contents of destination print buffer (if any) are discarded.
+ * Source print buffer becomes empty if a successful move occurs.
+ */
+
+void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from)
+{
+	int len;
+
+	/* Handle the cases where contents can't be moved */
+
+	if (!pb_to || !pb_to->buf)
+		return;
+
+	if (!pb_from || !pb_from->buf) {
+		tipc_printbuf_reset(pb_to);
+		return;
+	}
+
+	if (pb_to->size < pb_from->size) {
+		tipc_printbuf_reset(pb_to);
+		tipc_printf(pb_to, "*** PRINT BUFFER OVERFLOW ***");
+		return;
+	}
+
+	/* Copy data from char after cursor to end (if used) */
+	len = pb_from->buf + pb_from->size - pb_from->crs - 2;
+	if ((pb_from->buf[pb_from->size-1] == 0) && (len > 0)) {
+		strcpy(pb_to->buf, pb_from->crs + 1);
+		pb_to->crs = pb_to->buf + len;
+	} else
+		pb_to->crs = pb_to->buf;
+
+	/* Copy data from start to cursor (always) */
+	len = pb_from->crs - pb_from->buf;
+	strcpy(pb_to->crs, pb_from->buf);
+	pb_to->crs += len;
+
+	tipc_printbuf_reset(pb_from);
+}
+
+/**
+ * tipc_printf - append formatted output to print buffer chain
+ */
+
+void tipc_printf(struct print_buf *pb, const char *fmt, ...)
+{
+	int chars_to_add;
+	int chars_left;
+	char save_char;
+	struct print_buf *pb_next;
+
+	spin_lock_bh(&print_lock);
+	FORMAT(print_string, chars_to_add, fmt);
+	if (chars_to_add >= MAX_STRING)
+		strcpy(print_string, "*** STRING TOO LONG ***");
+
+	while (pb) {
+		if (pb == TIPC_CONS)
+			printk(print_string);
+		else if (pb->buf) {
+			chars_left = pb->buf + pb->size - pb->crs - 1;
+			if (chars_to_add <= chars_left) {
+				strcpy(pb->crs, print_string);
+				pb->crs += chars_to_add;
+			} else {
+				strcpy(pb->buf, print_string + chars_left);
+                                save_char = print_string[chars_left];
+                                print_string[chars_left] = 0;
+                                strcpy(pb->crs, print_string);
+                                print_string[chars_left] = save_char;
+                                pb->crs = pb->buf + chars_to_add - chars_left;
+                        }
+                }
+		pb_next = pb->next;
+		pb->next = 0;
+		pb = pb_next;
+	}
+	spin_unlock_bh(&print_lock);
+}
+
+/**
+ * TIPC_TEE - perform next output operation on both print buffers  
+ */
+
+struct print_buf *TIPC_TEE(struct print_buf *b0, struct print_buf *b1)
+{
+	struct print_buf *pb = b0;
+
+	if (!b0 || (b0 == b1))
+		return b1;
+	if (!b1)
+		return b0;
+
+	spin_lock_bh(&print_lock);
+	while (pb->next) {
+		if ((pb->next == b1) || (pb->next == b0))
+			pb->next = pb->next->next;
+		else
+			pb = pb->next;
+	}
+	pb->next = b1;
+	spin_unlock_bh(&print_lock);
+	return b0;
+}
+
+/**
+ * print_to_console - write string of bytes to console in multiple chunks
+ */
+
+static void print_to_console(char *crs, int len)
+{
+	int rest = len;
+
+	while (rest > 0) {
+		int sz = rest < MAX_STRING ? rest : MAX_STRING;
+		char c = crs[sz];
+
+		crs[sz] = 0;
+		printk((const char *)crs);
+		crs[sz] = c;
+		rest -= sz;
+		crs += sz;
+	}
+}
+
+/**
+ * printbuf_dump - write print buffer contents to console
+ */
+
+static void printbuf_dump(struct print_buf *pb)
+{
+	int len;
+
+	/* Dump print buffer from char after cursor to end (if used) */
+	len = pb->buf + pb->size - pb->crs - 2;
+	if ((pb->buf[pb->size - 1] == 0) && (len > 0))
+		print_to_console(pb->crs + 1, len);
+
+	/* Dump print buffer from start to cursor (always) */
+	len = pb->crs - pb->buf;
+	print_to_console(pb->buf, len);
+}
+
+/**
+ * tipc_dump - dump non-console print buffer(s) to console
+ */
+
+void tipc_dump(struct print_buf *pb, const char *fmt, ...)
+{
+	int len;
+
+	spin_lock_bh(&print_lock);
+	FORMAT(TIPC_CONS->buf, len, fmt);
+	printk(TIPC_CONS->buf);
+
+	for (; pb; pb = pb->next) {
+		if (pb == TIPC_CONS)
+			continue;
+		printk("\n---- Start of dump,%s log ----\n\n", 
+		       (pb == TIPC_LOG) ? "global" : "local");
+		printbuf_dump(pb);
+		tipc_printbuf_reset(pb);
+		printk("\n-------- End of dump --------\n");
+	}
+	spin_unlock_bh(&print_lock);
+}
+
+/**
+ * tipc_log_stop - free up TIPC log print buffer 
+ */
+
+void tipc_log_stop(void)
+{
+	spin_lock_bh(&print_lock);
+	if (TIPC_LOG->buf) {
+		kfree(TIPC_LOG->buf);
+		TIPC_LOG->buf = NULL;
+	}
+	spin_unlock_bh(&print_lock);
+}
+
+/**
+ * tipc_log_reinit - set TIPC log print buffer to specified size
+ */
+
+void tipc_log_reinit(int log_size)
+{
+	tipc_log_stop();
+
+	if (log_size) {
+		if (log_size <= MAX_STRING)
+			log_size = MAX_STRING + 1;
+		spin_lock_bh(&print_lock);
+		tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC), log_size);
+		spin_unlock_bh(&print_lock);
+	}
+}
+
+/**
+ * tipc_log_resize - reconfigure size of TIPC log buffer
+ */
+
+struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = *(u32 *)TLV_DATA(req_tlv_area);
+	value = ntohl(value);
+	if (value != delimit(value, 0, 32768))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (log size must be 0-32768)");
+	tipc_log_reinit(value);
+	return tipc_cfg_reply_none();
+}
+
+/**
+ * tipc_log_dump - capture TIPC log buffer contents in configuration message
+ */
+
+struct sk_buff *tipc_log_dump(void)
+{
+	struct sk_buff *reply;
+
+	spin_lock_bh(&print_lock);
+	if (!TIPC_LOG->buf)
+		reply = tipc_cfg_reply_ultra_string("log not activated\n");
+	else if (tipc_printbuf_empty(TIPC_LOG))
+		reply = tipc_cfg_reply_ultra_string("log is empty\n");
+	else {
+		struct tlv_desc *rep_tlv;
+		struct print_buf pb;
+		int str_len;
+
+		str_len = min(TIPC_LOG->size, 32768u);
+		reply = tipc_cfg_reply_alloc(TLV_SPACE(str_len));
+		if (reply) {
+			rep_tlv = (struct tlv_desc *)reply->data;
+			tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), str_len);
+			tipc_printbuf_move(&pb, TIPC_LOG);
+			str_len = strlen(TLV_DATA(rep_tlv)) + 1;
+			skb_put(reply, TLV_SPACE(str_len));
+			TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+		}
+	}
+	spin_unlock_bh(&print_lock);
+	return reply;
+}
+
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
new file mode 100644
index 00000000000..227f050d2a5
--- /dev/null
+++ b/net/tipc/dbg.h
@@ -0,0 +1,59 @@
+/*
+ * net/tipc/dbg.h: Include file for TIPC print buffer routines
+ * 
+ * Copyright (c) 1997-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_DBG_H
+#define _TIPC_DBG_H
+
+struct print_buf {
+	char *buf;
+	u32 size;
+	char *crs;
+	struct print_buf *next;
+};
+
+void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 sz);
+void tipc_printbuf_reset(struct print_buf *pb);
+int  tipc_printbuf_empty(struct print_buf *pb);
+int  tipc_printbuf_validate(struct print_buf *pb);
+void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from);
+
+void tipc_log_reinit(int log_size);
+void tipc_log_stop(void);
+
+struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space);
+struct sk_buff *tipc_log_dump(void);
+
+#endif
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
new file mode 100644
index 00000000000..53ba4630c10
--- /dev/null
+++ b/net/tipc/discover.c
@@ -0,0 +1,318 @@
+/*
+ * net/tipc/discover.c
+ * 
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "dbg.h"
+#include "link.h"
+#include "zone.h"
+#include "discover.h"
+#include "port.h"
+#include "name_table.h"
+
+#define TIPC_LINK_REQ_INIT	125	/* min delay during bearer start up */
+#define TIPC_LINK_REQ_FAST	2000	/* normal delay if bearer has no links */
+#define TIPC_LINK_REQ_SLOW	600000	/* normal delay if bearer has links */
+
+#if 0
+#define  GET_NODE_INFO         300
+#define  GET_NODE_INFO_RESULT  301
+#define  FORWARD_LINK_PROBE    302
+#define  LINK_REQUEST_REJECTED 303
+#define  LINK_REQUEST_ACCEPTED 304
+#define  DROP_LINK_REQUEST     305
+#define  CHECK_LINK_COUNT      306
+#endif
+
+/* 
+ * TODO: Most of the inter-cluster setup stuff should be
+ * rewritten, and be made conformant with specification.
+ */ 
+
+
+/**
+ * struct link_req - information about an ongoing link setup request
+ * @bearer: bearer issuing requests
+ * @dest: destination address for request messages
+ * @buf: request message to be (repeatedly) sent
+ * @timer: timer governing period between requests
+ * @timer_intv: current interval between requests (in ms)
+ */
+struct link_req {
+	struct bearer *bearer;
+	struct tipc_media_addr dest;
+	struct sk_buff *buf;
+	struct timer_list timer;
+	unsigned int timer_intv;
+};
+
+
+#if 0
+int disc_create_link(const struct tipc_link_create *argv) 
+{
+	/* 
+	 * Code for inter cluster link setup here 
+	 */
+	return TIPC_OK;
+}
+#endif
+
+/*
+ * disc_lost_link(): A link has lost contact
+ */
+
+void tipc_disc_link_event(u32 addr, char *name, int up) 
+{
+	if (in_own_cluster(addr))
+		return;
+	/* 
+	 * Code for inter cluster link setup here 
+	 */
+}
+
+/** 
+ * tipc_disc_init_msg - initialize a link setup message
+ * @type: message type (request or response)
+ * @req_links: number of links associated with message
+ * @dest_domain: network domain of node(s) which should respond to message
+ * @b_ptr: ptr to bearer issuing message
+ */
+
+struct sk_buff *tipc_disc_init_msg(u32 type,
+				   u32 req_links,
+				   u32 dest_domain,
+				   struct bearer *b_ptr)
+{
+	struct sk_buff *buf = buf_acquire(DSC_H_SIZE);
+	struct tipc_msg *msg;
+
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_init(msg, LINK_CONFIG, type, TIPC_OK, DSC_H_SIZE,
+			 dest_domain);
+		msg_set_non_seq(msg);
+		msg_set_req_links(msg, req_links);
+		msg_set_dest_domain(msg, dest_domain);
+		msg_set_bc_netid(msg, tipc_net_id);
+		msg_set_media_addr(msg, &b_ptr->publ.addr);
+	}
+	return buf;
+}
+
+/**
+ * tipc_disc_recv_msg - handle incoming link setup message (request or response)
+ * @buf: buffer containing message
+ */
+
+void tipc_disc_recv_msg(struct sk_buff *buf)
+{
+	struct bearer *b_ptr = (struct bearer *)TIPC_SKB_CB(buf)->handle;
+	struct link *link;
+	struct tipc_media_addr media_addr;
+	struct tipc_msg *msg = buf_msg(buf);
+	u32 dest = msg_dest_domain(msg);
+	u32 orig = msg_prevnode(msg);
+	u32 net_id = msg_bc_netid(msg);
+	u32 type = msg_type(msg);
+
+	msg_get_media_addr(msg,&media_addr);
+	msg_dbg(msg, "RECV:");
+	buf_discard(buf);
+
+	if (net_id != tipc_net_id)
+		return;
+	if (!tipc_addr_domain_valid(dest))
+		return;
+	if (!tipc_addr_node_valid(orig))
+		return;
+	if (orig == tipc_own_addr)
+		return;
+	if (!in_scope(dest, tipc_own_addr))
+		return;
+	if (is_slave(tipc_own_addr) && is_slave(orig))
+		return;
+	if (is_slave(orig) && !in_own_cluster(orig))
+		return;
+	if (in_own_cluster(orig)) {
+		/* Always accept link here */
+		struct sk_buff *rbuf;
+		struct tipc_media_addr *addr;
+		struct node *n_ptr = tipc_node_find(orig);
+		int link_up;
+		dbg(" in own cluster\n");
+		if (n_ptr == NULL) {
+			n_ptr = tipc_node_create(orig);
+		}
+		if (n_ptr == NULL) {
+			warn("Memory squeeze; Failed to create node\n");
+			return;
+		}
+		spin_lock_bh(&n_ptr->lock);
+		link = n_ptr->links[b_ptr->identity];
+		if (!link) {
+			dbg("creating link\n");
+			link = tipc_link_create(b_ptr, orig, &media_addr);
+			if (!link) {
+				spin_unlock_bh(&n_ptr->lock);                
+				return;
+			}
+		}
+		addr = &link->media_addr;
+		if (memcmp(addr, &media_addr, sizeof(*addr))) {
+			char addr_string[16];
+
+			warn("New bearer address for %s\n", 
+			     addr_string_fill(addr_string, orig));
+			memcpy(addr, &media_addr, sizeof(*addr));
+			tipc_link_reset(link);     
+		}
+		link_up = tipc_link_is_up(link);
+		spin_unlock_bh(&n_ptr->lock);                
+		if ((type == DSC_RESP_MSG) || link_up)
+			return;
+		rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr);
+		if (rbuf != NULL) {
+			msg_dbg(buf_msg(rbuf),"SEND:");
+			b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr);
+			buf_discard(rbuf);
+		}
+	}
+}
+
+/**
+ * tipc_disc_stop_link_req - stop sending periodic link setup requests
+ * @req: ptr to link request structure
+ */
+
+void tipc_disc_stop_link_req(struct link_req *req) 
+{
+	if (!req)
+		return;
+		
+	k_cancel_timer(&req->timer);
+	k_term_timer(&req->timer);
+	buf_discard(req->buf);
+	kfree(req);
+} 
+
+/**
+ * tipc_disc_update_link_req - update frequency of periodic link setup requests
+ * @req: ptr to link request structure
+ */
+
+void tipc_disc_update_link_req(struct link_req *req) 
+{
+	if (!req)
+		return;
+
+	if (req->timer_intv == TIPC_LINK_REQ_SLOW) {
+		if (!req->bearer->nodes.count) {
+			req->timer_intv = TIPC_LINK_REQ_FAST;
+			k_start_timer(&req->timer, req->timer_intv);
+		}
+	} else if (req->timer_intv == TIPC_LINK_REQ_FAST) {
+		if (req->bearer->nodes.count) {
+			req->timer_intv = TIPC_LINK_REQ_SLOW;
+			k_start_timer(&req->timer, req->timer_intv);
+		}
+	} else {
+		/* leave timer "as is" if haven't yet reached a "normal" rate */
+	}
+} 
+
+/**
+ * disc_timeout - send a periodic link setup request
+ * @req: ptr to link request structure
+ * 
+ * Called whenever a link setup request timer associated with a bearer expires.
+ */
+
+static void disc_timeout(struct link_req *req) 
+{
+	spin_lock_bh(&req->bearer->publ.lock);
+
+	req->bearer->media->send_msg(req->buf, &req->bearer->publ, &req->dest);
+
+	if ((req->timer_intv == TIPC_LINK_REQ_SLOW) ||
+	    (req->timer_intv == TIPC_LINK_REQ_FAST)) {
+		/* leave timer interval "as is" if already at a "normal" rate */
+	} else {
+		req->timer_intv *= 2;
+		if (req->timer_intv > TIPC_LINK_REQ_SLOW)
+			req->timer_intv = TIPC_LINK_REQ_SLOW;
+		if ((req->timer_intv == TIPC_LINK_REQ_FAST) && 
+		    (req->bearer->nodes.count))
+			req->timer_intv = TIPC_LINK_REQ_SLOW;
+	}
+	k_start_timer(&req->timer, req->timer_intv);
+
+	spin_unlock_bh(&req->bearer->publ.lock);
+}
+
+/**
+ * tipc_disc_init_link_req - start sending periodic link setup requests
+ * @b_ptr: ptr to bearer issuing requests
+ * @dest: destination address for request messages
+ * @dest_domain: network domain of node(s) which should respond to message
+ * @req_links: max number of desired links
+ * 
+ * Returns pointer to link request structure, or NULL if unable to create.
+ */
+
+struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 
+					 const struct tipc_media_addr *dest,
+					 u32 dest_domain,
+					 u32 req_links) 
+{
+	struct link_req *req;
+
+	req = (struct link_req *)kmalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req)
+		return NULL;
+
+	req->buf = tipc_disc_init_msg(DSC_REQ_MSG, req_links, dest_domain, b_ptr);
+	if (!req->buf) {
+		kfree(req);
+		return NULL;
+	}
+
+	memcpy(&req->dest, dest, sizeof(*dest));
+	req->bearer = b_ptr;
+	req->timer_intv = TIPC_LINK_REQ_INIT;
+	k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
+	k_start_timer(&req->timer, req->timer_intv);
+	return req;
+} 
+
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
new file mode 100644
index 00000000000..0454fd1ae7f
--- /dev/null
+++ b/net/tipc/discover.h
@@ -0,0 +1,58 @@
+/*
+ * net/tipc/discover.h
+ *
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_DISCOVER_H
+#define _TIPC_DISCOVER_H
+
+#include "core.h"
+
+struct link_req;
+
+struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 
+					 const struct tipc_media_addr *dest,
+					 u32 dest_domain,
+					 u32 req_links);
+void tipc_disc_update_link_req(struct link_req *req);
+void tipc_disc_stop_link_req(struct link_req *req);
+
+void tipc_disc_recv_msg(struct sk_buff *buf);
+
+void tipc_disc_link_event(u32 addr, char *name, int up);
+#if 0
+int  disc_create_link(const struct tipc_link_create *argv);
+#endif
+
+#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
new file mode 100644
index 00000000000..1f8d83b9c8b
--- /dev/null
+++ b/net/tipc/eth_media.c
@@ -0,0 +1,297 @@
+/*
+ * net/tipc/eth_media.c: Ethernet bearer support for TIPC
+ * 
+ * Copyright (c) 2001-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <net/tipc/tipc.h>
+#include <net/tipc/tipc_bearer.h>
+#include <net/tipc/tipc_msg.h>
+#include <linux/netdevice.h>
+
+#define MAX_ETH_BEARERS		2
+#define ETH_LINK_PRIORITY	TIPC_DEF_LINK_PRI
+#define ETH_LINK_TOLERANCE	TIPC_DEF_LINK_TOL
+#define ETH_LINK_WINDOW		TIPC_DEF_LINK_WIN
+
+/**
+ * struct eth_bearer - Ethernet bearer data structure
+ * @bearer: ptr to associated "generic" bearer structure
+ * @dev: ptr to associated Ethernet network device
+ * @tipc_packet_type: used in binding TIPC to Ethernet driver
+ */
+ 
+struct eth_bearer {
+	struct tipc_bearer *bearer;
+	struct net_device *dev;
+	struct packet_type tipc_packet_type;
+};
+
+static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
+static int eth_started = 0;
+static struct notifier_block notifier;
+
+/**
+ * send_msg - send a TIPC message out over an Ethernet interface 
+ */
+
+static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, 
+		    struct tipc_media_addr *dest)
+{
+	struct sk_buff *clone;
+	struct net_device *dev;
+
+	clone = skb_clone(buf, GFP_ATOMIC);
+	if (clone) {
+		clone->nh.raw = clone->data;
+		dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
+		clone->dev = dev;
+		dev->hard_header(clone, dev, ETH_P_TIPC, 
+				 &dest->dev_addr.eth_addr,
+				 dev->dev_addr, clone->len);
+		dev_queue_xmit(clone);
+	}
+	return TIPC_OK;
+}
+
+/**
+ * recv_msg - handle incoming TIPC message from an Ethernet interface
+ * 
+ * Routine truncates any Ethernet padding/CRC appended to the message,
+ * and ensures message size matches actual length
+ */
+
+static int recv_msg(struct sk_buff *buf, struct net_device *dev, 
+		    struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
+	u32 size;
+
+	if (likely(eb_ptr->bearer)) {
+		size = msg_size((struct tipc_msg *)buf->data);
+		skb_trim(buf, size);
+		if (likely(buf->len == size)) {
+			buf->next = NULL;
+			tipc_recv_msg(buf, eb_ptr->bearer);
+		} else {
+			kfree_skb(buf);
+		}
+	} else {
+		kfree_skb(buf);
+	}
+	return TIPC_OK;
+}
+
+/**
+ * enable_bearer - attach TIPC bearer to an Ethernet interface 
+ */
+
+static int enable_bearer(struct tipc_bearer *tb_ptr)
+{
+	struct net_device *dev = dev_base;
+	struct eth_bearer *eb_ptr = &eth_bearers[0];
+	struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
+	char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
+
+	/* Find device with specified name */
+
+	while (dev && dev->name &&
+	       (memcmp(dev->name, driver_name, strlen(dev->name)))) {
+		dev = dev->next;
+	}
+	if (!dev)
+		return -ENODEV;
+
+	/* Find Ethernet bearer for device (or create one) */
+
+	for (;(eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev); eb_ptr++);
+	if (eb_ptr == stop)
+		return -EDQUOT;
+	if (!eb_ptr->dev) {
+		eb_ptr->dev = dev;
+		eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC);
+		eb_ptr->tipc_packet_type.dev = dev;
+		eb_ptr->tipc_packet_type.func = recv_msg;
+		eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
+		INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
+		dev_hold(dev);
+		dev_add_pack(&eb_ptr->tipc_packet_type);
+	}
+
+	/* Associate TIPC bearer with Ethernet bearer */
+
+	eb_ptr->bearer = tb_ptr;
+	tb_ptr->usr_handle = (void *)eb_ptr;
+	tb_ptr->mtu = dev->mtu;
+	tb_ptr->blocked = 0; 
+	tb_ptr->addr.type = htonl(TIPC_MEDIA_TYPE_ETH);
+	memcpy(&tb_ptr->addr.dev_addr, &dev->dev_addr, ETH_ALEN);
+	return 0;
+}
+
+/**
+ * disable_bearer - detach TIPC bearer from an Ethernet interface 
+ *
+ * We really should do dev_remove_pack() here, but this function can not be
+ * called at tasklet level. => Use eth_bearer->bearer as a flag to throw away
+ * incoming buffers, & postpone dev_remove_pack() to eth_media_stop() on exit.
+ */
+
+static void disable_bearer(struct tipc_bearer *tb_ptr)
+{
+	((struct eth_bearer *)tb_ptr->usr_handle)->bearer = 0;
+}
+
+/**
+ * recv_notification - handle device updates from OS
+ *
+ * Change the state of the Ethernet bearer (if any) associated with the 
+ * specified device.
+ */
+
+static int recv_notification(struct notifier_block *nb, unsigned long evt, 
+			     void *dv)
+{
+	struct net_device *dev = (struct net_device *)dv;
+	struct eth_bearer *eb_ptr = &eth_bearers[0];
+	struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
+
+	while ((eb_ptr->dev != dev)) {
+		if (++eb_ptr == stop)
+			return NOTIFY_DONE;	/* couldn't find device */
+	}
+	if (!eb_ptr->bearer)
+		return NOTIFY_DONE;		/* bearer had been disabled */
+
+        eb_ptr->bearer->mtu = dev->mtu;
+
+	switch (evt) {
+	case NETDEV_CHANGE:
+		if (netif_carrier_ok(dev))
+			tipc_continue(eb_ptr->bearer);
+		else
+			tipc_block_bearer(eb_ptr->bearer->name);
+		break;
+	case NETDEV_UP:
+		tipc_continue(eb_ptr->bearer);
+		break;
+	case NETDEV_DOWN:
+		tipc_block_bearer(eb_ptr->bearer->name);
+		break;
+	case NETDEV_CHANGEMTU:
+        case NETDEV_CHANGEADDR:
+		tipc_block_bearer(eb_ptr->bearer->name);
+                tipc_continue(eb_ptr->bearer);
+		break;
+	case NETDEV_UNREGISTER:
+        case NETDEV_CHANGENAME:
+		tipc_disable_bearer(eb_ptr->bearer->name);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+/**
+ * eth_addr2str - convert Ethernet address to string
+ */
+
+static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
+{                       
+	unchar *addr = (unchar *)&a->dev_addr;
+
+	if (str_size < 18)
+		*str_buf = '\0';
+	else
+		sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x",
+			addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+	return str_buf;
+}
+
+/**
+ * tipc_eth_media_start - activate Ethernet bearer support
+ *
+ * Register Ethernet media type with TIPC bearer code.  Also register
+ * with OS for notifications about device state changes.
+ */
+
+int tipc_eth_media_start(void)
+{                       
+	struct tipc_media_addr bcast_addr;
+	int res;
+
+	if (eth_started)
+		return -EINVAL;
+
+	memset(&bcast_addr, 0xff, sizeof(bcast_addr));
+	memset(eth_bearers, 0, sizeof(eth_bearers));
+
+	res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth",
+				  enable_bearer, disable_bearer, send_msg, 
+				  eth_addr2str, &bcast_addr, ETH_LINK_PRIORITY, 
+				  ETH_LINK_TOLERANCE, ETH_LINK_WINDOW);
+	if (res)
+		return res;
+
+	notifier.notifier_call = &recv_notification;
+	notifier.priority = 0;
+	res = register_netdevice_notifier(&notifier);
+	if (!res)
+		eth_started = 1;
+	return res;
+}
+
+/**
+ * tipc_eth_media_stop - deactivate Ethernet bearer support
+ */
+
+void tipc_eth_media_stop(void)
+{
+	int i;
+
+	if (!eth_started)
+		return;
+
+	unregister_netdevice_notifier(&notifier);
+	for (i = 0; i < MAX_ETH_BEARERS ; i++) {
+		if (eth_bearers[i].bearer) {
+			eth_bearers[i].bearer->blocked = 1;
+			eth_bearers[i].bearer = 0;
+		}
+		if (eth_bearers[i].dev) {
+			dev_remove_pack(&eth_bearers[i].tipc_packet_type);
+			dev_put(eth_bearers[i].dev);
+		}
+	}
+	memset(&eth_bearers, 0, sizeof(eth_bearers));
+	eth_started = 0;
+}
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
new file mode 100644
index 00000000000..966f70a1b60
--- /dev/null
+++ b/net/tipc/handler.c
@@ -0,0 +1,132 @@
+/*
+ * net/tipc/handler.c: TIPC signal handling
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+
+struct queue_item {
+	struct list_head next_signal;
+	void (*handler) (unsigned long);
+	unsigned long data;
+};
+
+static kmem_cache_t *tipc_queue_item_cache;
+static struct list_head signal_queue_head;
+static spinlock_t qitem_lock = SPIN_LOCK_UNLOCKED;
+static int handler_enabled = 0;
+
+static void process_signal_queue(unsigned long dummy);
+
+static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0);
+
+
+unsigned int tipc_k_signal(Handler routine, unsigned long argument)
+{
+	struct queue_item *item;
+
+	if (!handler_enabled) {
+		err("Signal request ignored by handler\n");
+		return -ENOPROTOOPT;
+	}
+
+	spin_lock_bh(&qitem_lock);
+	item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC);
+	if (!item) {
+		err("Signal queue out of memory\n");
+		spin_unlock_bh(&qitem_lock);
+		return -ENOMEM;
+	}
+	item->handler = routine;
+	item->data = argument;
+	list_add_tail(&item->next_signal, &signal_queue_head);
+	spin_unlock_bh(&qitem_lock);
+	tasklet_schedule(&tipc_tasklet);
+	return 0;
+}
+
+static void process_signal_queue(unsigned long dummy)
+{
+	struct queue_item *__volatile__ item;
+	struct list_head *l, *n;
+
+	spin_lock_bh(&qitem_lock);
+	list_for_each_safe(l, n, &signal_queue_head) {
+		item = list_entry(l, struct queue_item, next_signal);
+		list_del(&item->next_signal);
+		spin_unlock_bh(&qitem_lock);
+		item->handler(item->data);
+		spin_lock_bh(&qitem_lock);
+		kmem_cache_free(tipc_queue_item_cache, item);
+	}
+	spin_unlock_bh(&qitem_lock);
+}
+
+int tipc_handler_start(void)
+{
+	tipc_queue_item_cache = 
+		kmem_cache_create("tipc_queue_items", sizeof(struct queue_item),
+				  0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!tipc_queue_item_cache)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&signal_queue_head);
+	tasklet_enable(&tipc_tasklet);
+	handler_enabled = 1;
+	return 0;
+}
+
+void tipc_handler_stop(void)
+{
+	struct list_head *l, *n;
+	struct queue_item *item; 
+
+	if (!handler_enabled)
+		return;
+
+	handler_enabled = 0;
+	tasklet_disable(&tipc_tasklet);
+	tasklet_kill(&tipc_tasklet);
+
+	spin_lock_bh(&qitem_lock);
+	list_for_each_safe(l, n, &signal_queue_head) {
+		item = list_entry(l, struct queue_item, next_signal);
+		list_del(&item->next_signal);
+		kmem_cache_free(tipc_queue_item_cache, item);
+	}
+	spin_unlock_bh(&qitem_lock);
+
+	kmem_cache_destroy(tipc_queue_item_cache);
+}
+
diff --git a/net/tipc/link.c b/net/tipc/link.c
new file mode 100644
index 00000000000..511872afa45
--- /dev/null
+++ b/net/tipc/link.c
@@ -0,0 +1,3166 @@
+/*
+ * net/tipc/link.c: TIPC link code
+ * 
+ * Copyright (c) 1996-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "dbg.h"
+#include "link.h"
+#include "net.h"
+#include "node.h"
+#include "port.h"
+#include "addr.h"
+#include "node_subscr.h"
+#include "name_distr.h"
+#include "bearer.h"
+#include "name_table.h"
+#include "discover.h"
+#include "config.h"
+#include "bcast.h"
+
+
+/* 
+ * Limit for deferred reception queue: 
+ */
+
+#define DEF_QUEUE_LIMIT 256u
+
+/* 
+ * Link state events: 
+ */
+
+#define  STARTING_EVT    856384768	/* link processing trigger */
+#define  TRAFFIC_MSG_EVT 560815u	/* rx'd ??? */
+#define  TIMEOUT_EVT     560817u	/* link timer expired */
+
+/*   
+ * The following two 'message types' is really just implementation 
+ * data conveniently stored in the message header. 
+ * They must not be considered part of the protocol
+ */
+#define OPEN_MSG   0
+#define CLOSED_MSG 1
+
+/* 
+ * State value stored in 'exp_msg_count'
+ */
+
+#define START_CHANGEOVER 100000u
+
+/**
+ * struct link_name - deconstructed link name
+ * @addr_local: network address of node at this end
+ * @if_local: name of interface at this end
+ * @addr_peer: network address of node at far end
+ * @if_peer: name of interface at far end
+ */
+
+struct link_name {
+	u32 addr_local;
+	char if_local[TIPC_MAX_IF_NAME];
+	u32 addr_peer;
+	char if_peer[TIPC_MAX_IF_NAME];
+};
+
+#if 0
+
+/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
+
+/** 
+ * struct link_event - link up/down event notification
+ */
+
+struct link_event {
+	u32 addr;
+	int up;
+	void (*fcn)(u32, char *, int);
+	char name[TIPC_MAX_LINK_NAME];
+};
+
+#endif
+
+static void link_handle_out_of_seq_msg(struct link *l_ptr,
+				       struct sk_buff *buf);
+static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
+static int  link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf);
+static void link_set_supervision_props(struct link *l_ptr, u32 tolerance);
+static int  link_send_sections_long(struct port *sender,
+				    struct iovec const *msg_sect,
+				    u32 num_sect, u32 destnode);
+static void link_check_defragm_bufs(struct link *l_ptr);
+static void link_state_event(struct link *l_ptr, u32 event);
+static void link_reset_statistics(struct link *l_ptr);
+static void link_print(struct link *l_ptr, struct print_buf *buf, 
+		       const char *str);
+
+/*
+ * Debugging code used by link routines only
+ *
+ * When debugging link problems on a system that has multiple links,
+ * the standard TIPC debugging routines may not be useful since they
+ * allow the output from multiple links to be intermixed.  For this reason
+ * routines of the form "dbg_link_XXX()" have been created that will capture
+ * debug info into a link's personal print buffer, which can then be dumped
+ * into the TIPC system log (LOG) upon request.
+ *
+ * To enable per-link debugging, use LINK_LOG_BUF_SIZE to specify the size
+ * of the print buffer used by each link.  If LINK_LOG_BUF_SIZE is set to 0,
+ * the dbg_link_XXX() routines simply send their output to the standard 
+ * debug print buffer (DBG_OUTPUT), if it has been defined; this can be useful
+ * when there is only a single link in the system being debugged.
+ *
+ * Notes:
+ * - When enabled, LINK_LOG_BUF_SIZE should be set to at least 1000 (bytes)
+ * - "l_ptr" must be valid when using dbg_link_XXX() macros  
+ */
+
+#define LINK_LOG_BUF_SIZE 0
+
+#define dbg_link(fmt, arg...)  do {if (LINK_LOG_BUF_SIZE) tipc_printf(&l_ptr->print_buf, fmt, ## arg); } while(0)
+#define dbg_link_msg(msg, txt) do {if (LINK_LOG_BUF_SIZE) tipc_msg_print(&l_ptr->print_buf, msg, txt); } while(0)
+#define dbg_link_state(txt) do {if (LINK_LOG_BUF_SIZE) link_print(l_ptr, &l_ptr->print_buf, txt); } while(0)
+#define dbg_link_dump() do { \
+	if (LINK_LOG_BUF_SIZE) { \
+		tipc_printf(LOG, "\n\nDumping link <%s>:\n", l_ptr->name); \
+		tipc_printbuf_move(LOG, &l_ptr->print_buf); \
+	} \
+} while (0)
+
+static inline void dbg_print_link(struct link *l_ptr, const char *str)
+{
+	if (DBG_OUTPUT)
+		link_print(l_ptr, DBG_OUTPUT, str);
+}
+
+static inline void dbg_print_buf_chain(struct sk_buff *root_buf)
+{
+	if (DBG_OUTPUT) {
+		struct sk_buff *buf = root_buf;
+
+		while (buf) {
+			msg_dbg(buf_msg(buf), "In chain: ");
+			buf = buf->next;
+		}
+	}
+}
+
+/*
+ *  Simple inlined link routines
+ */
+
+static inline unsigned int align(unsigned int i)
+{
+	return (i + 3) & ~3u;
+}
+
+static inline int link_working_working(struct link *l_ptr)
+{
+	return (l_ptr->state == WORKING_WORKING);
+}
+
+static inline int link_working_unknown(struct link *l_ptr)
+{
+	return (l_ptr->state == WORKING_UNKNOWN);
+}
+
+static inline int link_reset_unknown(struct link *l_ptr)
+{
+	return (l_ptr->state == RESET_UNKNOWN);
+}
+
+static inline int link_reset_reset(struct link *l_ptr)
+{
+	return (l_ptr->state == RESET_RESET);
+}
+
+static inline int link_blocked(struct link *l_ptr)
+{
+	return (l_ptr->exp_msg_count || l_ptr->blocked);
+}
+
+static inline int link_congested(struct link *l_ptr)
+{
+	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
+}
+
+static inline u32 link_max_pkt(struct link *l_ptr)
+{
+	return l_ptr->max_pkt;
+}
+
+static inline void link_init_max_pkt(struct link *l_ptr)
+{
+	u32 max_pkt;
+	
+	max_pkt = (l_ptr->b_ptr->publ.mtu & ~3);
+	if (max_pkt > MAX_MSG_SIZE)
+		max_pkt = MAX_MSG_SIZE;
+
+        l_ptr->max_pkt_target = max_pkt;
+	if (l_ptr->max_pkt_target < MAX_PKT_DEFAULT)
+		l_ptr->max_pkt = l_ptr->max_pkt_target;
+	else 
+		l_ptr->max_pkt = MAX_PKT_DEFAULT;
+
+        l_ptr->max_pkt_probes = 0;
+}
+
+static inline u32 link_next_sent(struct link *l_ptr)
+{
+	if (l_ptr->next_out)
+		return msg_seqno(buf_msg(l_ptr->next_out));
+	return mod(l_ptr->next_out_no);
+}
+
+static inline u32 link_last_sent(struct link *l_ptr)
+{
+	return mod(link_next_sent(l_ptr) - 1);
+}
+
+/*
+ *  Simple non-inlined link routines (i.e. referenced outside this file)
+ */
+
+int tipc_link_is_up(struct link *l_ptr)
+{
+	if (!l_ptr)
+		return 0;
+	return (link_working_working(l_ptr) || link_working_unknown(l_ptr));
+}
+
+int tipc_link_is_active(struct link *l_ptr)
+{
+	return ((l_ptr->owner->active_links[0] == l_ptr) ||
+		(l_ptr->owner->active_links[1] == l_ptr));
+}
+
+/**
+ * link_name_validate - validate & (optionally) deconstruct link name
+ * @name - ptr to link name string
+ * @name_parts - ptr to area for link name components (or NULL if not needed)
+ * 
+ * Returns 1 if link name is valid, otherwise 0.
+ */
+
+static int link_name_validate(const char *name, struct link_name *name_parts)
+{
+	char name_copy[TIPC_MAX_LINK_NAME];
+	char *addr_local;
+	char *if_local;
+	char *addr_peer;
+	char *if_peer;
+	char dummy;
+	u32 z_local, c_local, n_local;
+	u32 z_peer, c_peer, n_peer;
+	u32 if_local_len;
+	u32 if_peer_len;
+
+	/* copy link name & ensure length is OK */
+
+	name_copy[TIPC_MAX_LINK_NAME - 1] = 0;
+	/* need above in case non-Posix strncpy() doesn't pad with nulls */
+	strncpy(name_copy, name, TIPC_MAX_LINK_NAME);
+	if (name_copy[TIPC_MAX_LINK_NAME - 1] != 0)
+		return 0;
+
+	/* ensure all component parts of link name are present */
+
+	addr_local = name_copy;
+	if ((if_local = strchr(addr_local, ':')) == NULL)
+		return 0;
+	*(if_local++) = 0;
+	if ((addr_peer = strchr(if_local, '-')) == NULL)
+		return 0;
+	*(addr_peer++) = 0;
+	if_local_len = addr_peer - if_local;
+	if ((if_peer = strchr(addr_peer, ':')) == NULL)
+		return 0;
+	*(if_peer++) = 0;
+	if_peer_len = strlen(if_peer) + 1;
+
+	/* validate component parts of link name */
+
+	if ((sscanf(addr_local, "%u.%u.%u%c",
+		    &z_local, &c_local, &n_local, &dummy) != 3) ||
+	    (sscanf(addr_peer, "%u.%u.%u%c",
+		    &z_peer, &c_peer, &n_peer, &dummy) != 3) ||
+	    (z_local > 255) || (c_local > 4095) || (n_local > 4095) ||
+	    (z_peer  > 255) || (c_peer  > 4095) || (n_peer  > 4095) ||
+	    (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) || 
+	    (if_peer_len  <= 1) || (if_peer_len  > TIPC_MAX_IF_NAME) || 
+	    (strspn(if_local, tipc_alphabet) != (if_local_len - 1)) ||
+	    (strspn(if_peer, tipc_alphabet) != (if_peer_len - 1)))
+		return 0;
+
+	/* return link name components, if necessary */
+
+	if (name_parts) {
+		name_parts->addr_local = tipc_addr(z_local, c_local, n_local);
+		strcpy(name_parts->if_local, if_local);
+		name_parts->addr_peer = tipc_addr(z_peer, c_peer, n_peer);
+		strcpy(name_parts->if_peer, if_peer);
+	}
+	return 1;
+}
+
+/**
+ * link_timeout - handle expiration of link timer
+ * @l_ptr: pointer to link
+ * 
+ * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict
+ * with tipc_link_delete().  (There is no risk that the node will be deleted by
+ * another thread because tipc_link_delete() always cancels the link timer before
+ * tipc_node_delete() is called.)
+ */
+
+static void link_timeout(struct link *l_ptr)
+{
+	tipc_node_lock(l_ptr->owner);
+
+	/* update counters used in statistical profiling of send traffic */
+
+	l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size;
+	l_ptr->stats.queue_sz_counts++;
+
+	if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
+		l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
+
+	if (l_ptr->first_out) {
+		struct tipc_msg *msg = buf_msg(l_ptr->first_out);
+		u32 length = msg_size(msg);
+
+		if ((msg_user(msg) == MSG_FRAGMENTER)
+		    && (msg_type(msg) == FIRST_FRAGMENT)) {
+			length = msg_size(msg_get_wrapped(msg));
+		}
+		if (length) {
+			l_ptr->stats.msg_lengths_total += length;
+			l_ptr->stats.msg_length_counts++;
+			if (length <= 64)
+				l_ptr->stats.msg_length_profile[0]++;
+			else if (length <= 256)
+				l_ptr->stats.msg_length_profile[1]++;
+			else if (length <= 1024)
+				l_ptr->stats.msg_length_profile[2]++;
+			else if (length <= 4096)
+				l_ptr->stats.msg_length_profile[3]++;
+			else if (length <= 16384)
+				l_ptr->stats.msg_length_profile[4]++;
+			else if (length <= 32768)
+				l_ptr->stats.msg_length_profile[5]++;
+			else
+				l_ptr->stats.msg_length_profile[6]++;
+		}
+	}
+
+	/* do all other link processing performed on a periodic basis */
+
+	link_check_defragm_bufs(l_ptr);
+
+	link_state_event(l_ptr, TIMEOUT_EVT);
+
+	if (l_ptr->next_out)
+		tipc_link_push_queue(l_ptr);
+
+	tipc_node_unlock(l_ptr->owner);
+}
+
+static inline void link_set_timer(struct link *l_ptr, u32 time)
+{
+	k_start_timer(&l_ptr->timer, time);
+}
+
+/**
+ * tipc_link_create - create a new link
+ * @b_ptr: pointer to associated bearer
+ * @peer: network address of node at other end of link
+ * @media_addr: media address to use when sending messages over link
+ * 
+ * Returns pointer to link.
+ */
+
+struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
+			      const struct tipc_media_addr *media_addr)
+{
+	struct link *l_ptr;
+	struct tipc_msg *msg;
+	char *if_name;
+
+	l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC);
+	if (!l_ptr) {
+		warn("Memory squeeze; Failed to create link\n");
+		return NULL;
+	}
+	memset(l_ptr, 0, sizeof(*l_ptr));
+
+	l_ptr->addr = peer;
+	if_name = strchr(b_ptr->publ.name, ':') + 1;
+	sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
+		tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
+		tipc_node(tipc_own_addr), 
+		if_name,
+		tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+		/* note: peer i/f is appended to link name by reset/activate */
+	memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
+	k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
+	list_add_tail(&l_ptr->link_list, &b_ptr->links);
+	l_ptr->checkpoint = 1;
+	l_ptr->b_ptr = b_ptr;
+	link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
+	l_ptr->state = RESET_UNKNOWN;
+
+	l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
+	msg = l_ptr->pmsg;
+	msg_init(msg, LINK_PROTOCOL, RESET_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
+	msg_set_size(msg, sizeof(l_ptr->proto_msg));
+	msg_set_session(msg, tipc_random);
+	msg_set_bearer_id(msg, b_ptr->identity);
+	strcpy((char *)msg_data(msg), if_name);
+
+	l_ptr->priority = b_ptr->priority;
+	tipc_link_set_queue_limits(l_ptr, b_ptr->media->window);
+
+	link_init_max_pkt(l_ptr);
+
+	l_ptr->next_out_no = 1;
+	INIT_LIST_HEAD(&l_ptr->waiting_ports);
+
+	link_reset_statistics(l_ptr);
+
+	l_ptr->owner = tipc_node_attach_link(l_ptr);
+	if (!l_ptr->owner) {
+		kfree(l_ptr);
+		return NULL;
+	}
+
+	if (LINK_LOG_BUF_SIZE) {
+		char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC);
+
+		if (!pb) {
+			kfree(l_ptr);
+			warn("Memory squeeze; Failed to create link\n");
+			return NULL;
+		}
+		tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE);
+	}
+
+	tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr);
+
+	dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n",
+	    l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit);
+	
+	return l_ptr;
+}
+
+/** 
+ * tipc_link_delete - delete a link
+ * @l_ptr: pointer to link
+ * 
+ * Note: 'tipc_net_lock' is write_locked, bearer is locked.
+ * This routine must not grab the node lock until after link timer cancellation
+ * to avoid a potential deadlock situation.  
+ */
+
+void tipc_link_delete(struct link *l_ptr)
+{
+	if (!l_ptr) {
+		err("Attempt to delete non-existent link\n");
+		return;
+	}
+
+	dbg("tipc_link_delete()\n");
+
+	k_cancel_timer(&l_ptr->timer);
+	
+	tipc_node_lock(l_ptr->owner);
+	tipc_link_reset(l_ptr);
+	tipc_node_detach_link(l_ptr->owner, l_ptr);
+	tipc_link_stop(l_ptr);
+	list_del_init(&l_ptr->link_list);
+	if (LINK_LOG_BUF_SIZE)
+		kfree(l_ptr->print_buf.buf);
+	tipc_node_unlock(l_ptr->owner);
+	k_term_timer(&l_ptr->timer);
+	kfree(l_ptr);
+}
+
+void tipc_link_start(struct link *l_ptr)
+{
+	dbg("tipc_link_start %x\n", l_ptr);
+	link_state_event(l_ptr, STARTING_EVT);
+}
+
+/**
+ * link_schedule_port - schedule port for deferred sending 
+ * @l_ptr: pointer to link
+ * @origport: reference to sending port
+ * @sz: amount of data to be sent
+ * 
+ * Schedules port for renewed sending of messages after link congestion 
+ * has abated.
+ */
+
+static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
+{
+	struct port *p_ptr;
+
+	spin_lock_bh(&tipc_port_list_lock);
+	p_ptr = tipc_port_lock(origport);
+	if (p_ptr) {
+		if (!p_ptr->wakeup)
+			goto exit;
+		if (!list_empty(&p_ptr->wait_list))
+			goto exit;
+		p_ptr->congested_link = l_ptr;
+		p_ptr->publ.congested = 1;
+		p_ptr->waiting_pkts = 1 + ((sz - 1) / link_max_pkt(l_ptr));
+		list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
+		l_ptr->stats.link_congs++;
+exit:
+		tipc_port_unlock(p_ptr);
+	}
+	spin_unlock_bh(&tipc_port_list_lock);
+	return -ELINKCONG;
+}
+
+void tipc_link_wakeup_ports(struct link *l_ptr, int all)
+{
+	struct port *p_ptr;
+	struct port *temp_p_ptr;
+	int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size;
+
+	if (all)
+		win = 100000;
+	if (win <= 0)
+		return;
+	if (!spin_trylock_bh(&tipc_port_list_lock))
+		return;
+	if (link_congested(l_ptr))
+		goto exit;
+	list_for_each_entry_safe(p_ptr, temp_p_ptr, &l_ptr->waiting_ports, 
+				 wait_list) {
+		if (win <= 0)
+			break;
+		list_del_init(&p_ptr->wait_list);
+		p_ptr->congested_link = 0;
+		assert(p_ptr->wakeup);
+		spin_lock_bh(p_ptr->publ.lock);
+		p_ptr->publ.congested = 0;
+		p_ptr->wakeup(&p_ptr->publ);
+		win -= p_ptr->waiting_pkts;
+		spin_unlock_bh(p_ptr->publ.lock);
+	}
+
+exit:
+	spin_unlock_bh(&tipc_port_list_lock);
+}
+
+/** 
+ * link_release_outqueue - purge link's outbound message queue
+ * @l_ptr: pointer to link
+ */
+
+static void link_release_outqueue(struct link *l_ptr)
+{
+	struct sk_buff *buf = l_ptr->first_out;
+	struct sk_buff *next;
+
+	while (buf) {
+		next = buf->next;
+		buf_discard(buf);
+		buf = next;
+	}
+	l_ptr->first_out = NULL;
+	l_ptr->out_queue_size = 0;
+}
+
+/**
+ * tipc_link_reset_fragments - purge link's inbound message fragments queue
+ * @l_ptr: pointer to link
+ */
+
+void tipc_link_reset_fragments(struct link *l_ptr)
+{
+	struct sk_buff *buf = l_ptr->defragm_buf;
+	struct sk_buff *next;
+
+	while (buf) {
+		next = buf->next;
+		buf_discard(buf);
+		buf = next;
+	}
+	l_ptr->defragm_buf = NULL;
+}
+
+/** 
+ * tipc_link_stop - purge all inbound and outbound messages associated with link
+ * @l_ptr: pointer to link
+ */
+
+void tipc_link_stop(struct link *l_ptr)
+{
+	struct sk_buff *buf;
+	struct sk_buff *next;
+
+	buf = l_ptr->oldest_deferred_in;
+	while (buf) {
+		next = buf->next;
+		buf_discard(buf);
+		buf = next;
+	}
+
+	buf = l_ptr->first_out;
+	while (buf) {
+		next = buf->next;
+		buf_discard(buf);
+		buf = next;
+	}
+
+	tipc_link_reset_fragments(l_ptr);
+
+	buf_discard(l_ptr->proto_msg_queue);
+	l_ptr->proto_msg_queue = NULL;
+}
+
+#if 0
+
+/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
+
+static void link_recv_event(struct link_event *ev)
+{
+	ev->fcn(ev->addr, ev->name, ev->up);
+	kfree(ev);
+}
+
+static void link_send_event(void (*fcn)(u32 a, char *n, int up),
+			    struct link *l_ptr, int up)
+{
+	struct link_event *ev;
+	
+	ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
+	if (!ev) {
+		warn("Link event allocation failure\n");
+		return;
+	}
+	ev->addr = l_ptr->addr;
+	ev->up = up;
+	ev->fcn = fcn;
+	memcpy(ev->name, l_ptr->name, TIPC_MAX_LINK_NAME);
+	tipc_k_signal((Handler)link_recv_event, (unsigned long)ev);
+}
+
+#else
+
+#define link_send_event(fcn, l_ptr, up) do { } while (0)
+
+#endif
+
+void tipc_link_reset(struct link *l_ptr)
+{
+	struct sk_buff *buf;
+	u32 prev_state = l_ptr->state;
+	u32 checkpoint = l_ptr->next_in_no;
+	
+	msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1);
+
+        /* Link is down, accept any session: */
+	l_ptr->peer_session = 0;
+
+        /* Prepare for max packet size negotiation */
+	link_init_max_pkt(l_ptr);
+	
+	l_ptr->state = RESET_UNKNOWN;
+	dbg_link_state("Resetting Link\n");
+
+	if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET))
+		return;
+
+	tipc_node_link_down(l_ptr->owner, l_ptr);
+	tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
+#if 0
+	tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name);
+	dbg_link_dump();
+#endif
+	if (tipc_node_has_active_links(l_ptr->owner) &&
+	    l_ptr->owner->permit_changeover) {
+		l_ptr->reset_checkpoint = checkpoint;
+		l_ptr->exp_msg_count = START_CHANGEOVER;
+	}
+
+	/* Clean up all queues: */
+
+	link_release_outqueue(l_ptr);
+	buf_discard(l_ptr->proto_msg_queue);
+	l_ptr->proto_msg_queue = NULL;
+	buf = l_ptr->oldest_deferred_in;
+	while (buf) {
+		struct sk_buff *next = buf->next;
+		buf_discard(buf);
+		buf = next;
+	}
+	if (!list_empty(&l_ptr->waiting_ports))
+		tipc_link_wakeup_ports(l_ptr, 1);
+
+	l_ptr->retransm_queue_head = 0;
+	l_ptr->retransm_queue_size = 0;
+	l_ptr->last_out = NULL;
+	l_ptr->first_out = NULL;
+	l_ptr->next_out = NULL;
+	l_ptr->unacked_window = 0;
+	l_ptr->checkpoint = 1;
+	l_ptr->next_out_no = 1;
+	l_ptr->deferred_inqueue_sz = 0;
+	l_ptr->oldest_deferred_in = NULL;
+	l_ptr->newest_deferred_in = NULL;
+	l_ptr->fsm_msg_cnt = 0;
+	l_ptr->stale_count = 0;
+	link_reset_statistics(l_ptr);
+
+	link_send_event(tipc_cfg_link_event, l_ptr, 0);
+	if (!in_own_cluster(l_ptr->addr))
+		link_send_event(tipc_disc_link_event, l_ptr, 0);
+}
+
+
+static void link_activate(struct link *l_ptr)
+{
+	l_ptr->next_in_no = 1;
+	tipc_node_link_up(l_ptr->owner, l_ptr);
+	tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
+	link_send_event(tipc_cfg_link_event, l_ptr, 1);
+	if (!in_own_cluster(l_ptr->addr))
+		link_send_event(tipc_disc_link_event, l_ptr, 1);
+}
+
+/**
+ * link_state_event - link finite state machine
+ * @l_ptr: pointer to link
+ * @event: state machine event to process
+ */
+
+static void link_state_event(struct link *l_ptr, unsigned event)
+{
+	struct link *other; 
+	u32 cont_intv = l_ptr->continuity_interval;
+
+	if (!l_ptr->started && (event != STARTING_EVT))
+		return;		/* Not yet. */
+
+	if (link_blocked(l_ptr)) {
+		if (event == TIMEOUT_EVT) {
+			link_set_timer(l_ptr, cont_intv);
+		}
+		return;	  /* Changeover going on */
+	}
+	dbg_link("STATE_EV: <%s> ", l_ptr->name);
+
+	switch (l_ptr->state) {
+	case WORKING_WORKING:
+		dbg_link("WW/");
+		switch (event) {
+		case TRAFFIC_MSG_EVT:
+			dbg_link("TRF-");
+			/* fall through */
+		case ACTIVATE_MSG:
+			dbg_link("ACT\n");
+			break;
+		case TIMEOUT_EVT:
+			dbg_link("TIM ");
+			if (l_ptr->next_in_no != l_ptr->checkpoint) {
+				l_ptr->checkpoint = l_ptr->next_in_no;
+				if (tipc_bclink_acks_missing(l_ptr->owner)) {
+					tipc_link_send_proto_msg(l_ptr, STATE_MSG, 
+								 0, 0, 0, 0, 0);
+					l_ptr->fsm_msg_cnt++;
+				} else if (l_ptr->max_pkt < l_ptr->max_pkt_target) {
+					tipc_link_send_proto_msg(l_ptr, STATE_MSG, 
+								 1, 0, 0, 0, 0);
+					l_ptr->fsm_msg_cnt++;
+				}
+				link_set_timer(l_ptr, cont_intv);
+				break;
+			}
+			dbg_link(" -> WU\n");
+			l_ptr->state = WORKING_UNKNOWN;
+			l_ptr->fsm_msg_cnt = 0;
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv / 4);
+			break;
+		case RESET_MSG:
+			dbg_link("RES -> RR\n");
+			tipc_link_reset(l_ptr);
+			l_ptr->state = RESET_RESET;
+			l_ptr->fsm_msg_cnt = 0;
+			tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		default:
+			err("Unknown link event %u in WW state\n", event);
+		}
+		break;
+	case WORKING_UNKNOWN:
+		dbg_link("WU/");
+		switch (event) {
+		case TRAFFIC_MSG_EVT:
+			dbg_link("TRF-");
+		case ACTIVATE_MSG:
+			dbg_link("ACT -> WW\n");
+			l_ptr->state = WORKING_WORKING;
+			l_ptr->fsm_msg_cnt = 0;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		case RESET_MSG:
+			dbg_link("RES -> RR\n");
+			tipc_link_reset(l_ptr);
+			l_ptr->state = RESET_RESET;
+			l_ptr->fsm_msg_cnt = 0;
+			tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		case TIMEOUT_EVT:
+			dbg_link("TIM ");
+			if (l_ptr->next_in_no != l_ptr->checkpoint) {
+				dbg_link("-> WW \n");
+				l_ptr->state = WORKING_WORKING;
+				l_ptr->fsm_msg_cnt = 0;
+				l_ptr->checkpoint = l_ptr->next_in_no;
+				if (tipc_bclink_acks_missing(l_ptr->owner)) {
+					tipc_link_send_proto_msg(l_ptr, STATE_MSG,
+								 0, 0, 0, 0, 0);
+					l_ptr->fsm_msg_cnt++;
+				}
+				link_set_timer(l_ptr, cont_intv);
+			} else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
+				dbg_link("Probing %u/%u,timer = %u ms)\n",
+					 l_ptr->fsm_msg_cnt, l_ptr->abort_limit,
+					 cont_intv / 4);
+				tipc_link_send_proto_msg(l_ptr, STATE_MSG, 
+							 1, 0, 0, 0, 0);
+				l_ptr->fsm_msg_cnt++;
+				link_set_timer(l_ptr, cont_intv / 4);
+			} else {	/* Link has failed */
+				dbg_link("-> RU (%u probes unanswered)\n",
+					 l_ptr->fsm_msg_cnt);
+				tipc_link_reset(l_ptr);
+				l_ptr->state = RESET_UNKNOWN;
+				l_ptr->fsm_msg_cnt = 0;
+				tipc_link_send_proto_msg(l_ptr, RESET_MSG,
+							 0, 0, 0, 0, 0);
+				l_ptr->fsm_msg_cnt++;
+				link_set_timer(l_ptr, cont_intv);
+			}
+			break;
+		default:
+			err("Unknown link event %u in WU state\n", event);
+		}
+		break;
+	case RESET_UNKNOWN:
+		dbg_link("RU/");
+		switch (event) {
+		case TRAFFIC_MSG_EVT:
+			dbg_link("TRF-\n");
+			break;
+		case ACTIVATE_MSG:
+			other = l_ptr->owner->active_links[0];
+			if (other && link_working_unknown(other)) {
+				dbg_link("ACT\n");
+				break;
+			}
+			dbg_link("ACT -> WW\n");
+			l_ptr->state = WORKING_WORKING;
+			l_ptr->fsm_msg_cnt = 0;
+			link_activate(l_ptr);
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		case RESET_MSG:
+			dbg_link("RES \n");
+			dbg_link(" -> RR\n");
+			l_ptr->state = RESET_RESET;
+			l_ptr->fsm_msg_cnt = 0;
+			tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		case STARTING_EVT:
+			dbg_link("START-");
+			l_ptr->started = 1;
+			/* fall through */
+		case TIMEOUT_EVT:
+			dbg_link("TIM \n");
+			tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		default:
+			err("Unknown link event %u in RU state\n", event);
+		}
+		break;
+	case RESET_RESET:
+		dbg_link("RR/ ");
+		switch (event) {
+		case TRAFFIC_MSG_EVT:
+			dbg_link("TRF-");
+			/* fall through */
+		case ACTIVATE_MSG:
+			other = l_ptr->owner->active_links[0];
+			if (other && link_working_unknown(other)) {
+				dbg_link("ACT\n");
+				break;
+			}
+			dbg_link("ACT -> WW\n");
+			l_ptr->state = WORKING_WORKING;
+			l_ptr->fsm_msg_cnt = 0;
+			link_activate(l_ptr);
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
+		case RESET_MSG:
+			dbg_link("RES\n");
+			break;
+		case TIMEOUT_EVT:
+			dbg_link("TIM\n");
+			tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			dbg_link("fsm_msg_cnt %u\n", l_ptr->fsm_msg_cnt);
+			break;
+		default:
+			err("Unknown link event %u in RR state\n", event);
+		}
+		break;
+	default:
+		err("Unknown link state %u/%u\n", l_ptr->state, event);
+	}
+}
+
+/*
+ * link_bundle_buf(): Append contents of a buffer to
+ * the tail of an existing one. 
+ */
+
+static int link_bundle_buf(struct link *l_ptr,
+			   struct sk_buff *bundler, 
+			   struct sk_buff *buf)
+{
+	struct tipc_msg *bundler_msg = buf_msg(bundler);
+	struct tipc_msg *msg = buf_msg(buf);
+	u32 size = msg_size(msg);
+	u32 to_pos = align(msg_size(bundler_msg));
+	u32 rest = link_max_pkt(l_ptr) - to_pos;
+
+	if (msg_user(bundler_msg) != MSG_BUNDLER)
+		return 0;
+	if (msg_type(bundler_msg) != OPEN_MSG)
+		return 0;
+	if (rest < align(size))
+		return 0;
+
+	skb_put(bundler, (to_pos - msg_size(bundler_msg)) + size);
+	memcpy(bundler->data + to_pos, buf->data, size);
+	msg_set_size(bundler_msg, to_pos + size);
+	msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
+	dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
+	    msg_msgcnt(bundler_msg), size, to_pos, msg_seqno(bundler_msg));
+	msg_dbg(msg, "PACKD:");
+	buf_discard(buf);
+	l_ptr->stats.sent_bundled++;
+	return 1;
+}
+
+static inline void link_add_to_outqueue(struct link *l_ptr, 
+					struct sk_buff *buf, 
+					struct tipc_msg *msg)
+{
+	u32 ack = mod(l_ptr->next_in_no - 1);
+	u32 seqno = mod(l_ptr->next_out_no++);
+
+	msg_set_word(msg, 2, ((ack << 16) | seqno));
+	msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
+	buf->next = NULL;
+	if (l_ptr->first_out) {
+		l_ptr->last_out->next = buf;
+		l_ptr->last_out = buf;
+	} else
+		l_ptr->first_out = l_ptr->last_out = buf;
+	l_ptr->out_queue_size++;
+}
+
+/* 
+ * tipc_link_send_buf() is the 'full path' for messages, called from 
+ * inside TIPC when the 'fast path' in tipc_send_buf
+ * has failed, and from link_send()
+ */
+
+int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	u32 size = msg_size(msg);
+	u32 dsz = msg_data_sz(msg);
+	u32 queue_size = l_ptr->out_queue_size;
+	u32 imp = msg_tot_importance(msg);
+	u32 queue_limit = l_ptr->queue_limit[imp];
+	u32 max_packet = link_max_pkt(l_ptr);
+
+	msg_set_prevnode(msg, tipc_own_addr);	/* If routed message */
+
+	/* Match msg importance against queue limits: */
+
+	if (unlikely(queue_size >= queue_limit)) {
+		if (imp <= TIPC_CRITICAL_IMPORTANCE) {
+			return link_schedule_port(l_ptr, msg_origport(msg),
+						  size);
+		}
+		msg_dbg(msg, "TIPC: Congestion, throwing away\n");
+		buf_discard(buf);
+		if (imp > CONN_MANAGER) {
+			warn("Resetting <%s>, send queue full", l_ptr->name);
+			tipc_link_reset(l_ptr);
+		}
+		return dsz;
+	}
+
+	/* Fragmentation needed ? */
+
+	if (size > max_packet)
+		return tipc_link_send_long_buf(l_ptr, buf);
+
+	/* Packet can be queued or sent: */
+
+	if (queue_size > l_ptr->stats.max_queue_sz)
+		l_ptr->stats.max_queue_sz = queue_size;
+
+	if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && 
+		   !link_congested(l_ptr))) {
+		link_add_to_outqueue(l_ptr, buf, msg);
+
+		if (likely(tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr))) {
+			l_ptr->unacked_window = 0;
+		} else {
+			tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
+			l_ptr->stats.bearer_congs++;
+			l_ptr->next_out = buf;
+		}
+		return dsz;
+	}
+	/* Congestion: can message be bundled ?: */
+
+	if ((msg_user(msg) != CHANGEOVER_PROTOCOL) &&
+	    (msg_user(msg) != MSG_FRAGMENTER)) {
+
+		/* Try adding message to an existing bundle */
+
+		if (l_ptr->next_out && 
+		    link_bundle_buf(l_ptr, l_ptr->last_out, buf)) {
+			tipc_bearer_resolve_congestion(l_ptr->b_ptr, l_ptr);
+			return dsz;
+		}
+
+		/* Try creating a new bundle */
+
+		if (size <= max_packet * 2 / 3) {
+			struct sk_buff *bundler = buf_acquire(max_packet);
+			struct tipc_msg bundler_hdr;
+
+			if (bundler) {
+				msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
+					 TIPC_OK, INT_H_SIZE, l_ptr->addr);
+				memcpy(bundler->data, (unchar *)&bundler_hdr, 
+				       INT_H_SIZE);
+				skb_trim(bundler, INT_H_SIZE);
+				link_bundle_buf(l_ptr, bundler, buf);
+				buf = bundler;
+				msg = buf_msg(buf);
+				l_ptr->stats.sent_bundles++;
+			}
+		}
+	}
+	if (!l_ptr->next_out)
+		l_ptr->next_out = buf;
+	link_add_to_outqueue(l_ptr, buf, msg);
+	tipc_bearer_resolve_congestion(l_ptr->b_ptr, l_ptr);
+	return dsz;
+}
+
+/* 
+ * tipc_link_send(): same as tipc_link_send_buf(), but the link to use has 
+ * not been selected yet, and the the owner node is not locked
+ * Called by TIPC internal users, e.g. the name distributor
+ */
+
+int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
+{
+	struct link *l_ptr;
+	struct node *n_ptr;
+	int res = -ELINKCONG;
+
+	read_lock_bh(&tipc_net_lock);
+	n_ptr = tipc_node_select(dest, selector);
+	if (n_ptr) {
+		tipc_node_lock(n_ptr);
+		l_ptr = n_ptr->active_links[selector & 1];
+		dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest);
+		if (l_ptr) {
+			res = tipc_link_send_buf(l_ptr, buf);
+		}
+		tipc_node_unlock(n_ptr);
+	} else {
+		dbg("Attempt to send msg to unknown node:\n");
+		msg_dbg(buf_msg(buf),">>>");
+		buf_discard(buf);
+	}
+	read_unlock_bh(&tipc_net_lock);
+	return res;
+}
+
+/* 
+ * link_send_buf_fast: Entry for data messages where the 
+ * destination link is known and the header is complete,
+ * inclusive total message length. Very time critical.
+ * Link is locked. Returns user data length.
+ */
+
+static inline int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
+				     u32 *used_max_pkt)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	int res = msg_data_sz(msg);
+
+	if (likely(!link_congested(l_ptr))) {
+		if (likely(msg_size(msg) <= link_max_pkt(l_ptr))) {
+			if (likely(list_empty(&l_ptr->b_ptr->cong_links))) {
+				link_add_to_outqueue(l_ptr, buf, msg);
+				if (likely(tipc_bearer_send(l_ptr->b_ptr, buf,
+							    &l_ptr->media_addr))) {
+					l_ptr->unacked_window = 0;
+					msg_dbg(msg,"SENT_FAST:");
+					return res;
+				}
+				dbg("failed sent fast...\n");
+				tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
+				l_ptr->stats.bearer_congs++;
+				l_ptr->next_out = buf;
+				return res;
+			}
+		}
+		else
+			*used_max_pkt = link_max_pkt(l_ptr);
+	}
+	return tipc_link_send_buf(l_ptr, buf);  /* All other cases */
+}
+
+/* 
+ * tipc_send_buf_fast: Entry for data messages where the 
+ * destination node is known and the header is complete,
+ * inclusive total message length.
+ * Returns user data length.
+ */
+int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
+{
+	struct link *l_ptr;
+	struct node *n_ptr;
+	int res;
+	u32 selector = msg_origport(buf_msg(buf)) & 1;
+	u32 dummy;
+
+	if (destnode == tipc_own_addr)
+		return tipc_port_recv_msg(buf);
+
+	read_lock_bh(&tipc_net_lock);
+	n_ptr = tipc_node_select(destnode, selector);
+	if (likely(n_ptr)) {
+		tipc_node_lock(n_ptr);
+		l_ptr = n_ptr->active_links[selector];
+		dbg("send_fast: buf %x selected %x, destnode = %x\n",
+		    buf, l_ptr, destnode);
+		if (likely(l_ptr)) {
+			res = link_send_buf_fast(l_ptr, buf, &dummy);
+			tipc_node_unlock(n_ptr);
+			read_unlock_bh(&tipc_net_lock);
+			return res;
+		}
+		tipc_node_unlock(n_ptr);
+	}
+	read_unlock_bh(&tipc_net_lock);
+	res = msg_data_sz(buf_msg(buf));
+	tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
+	return res;
+}
+
+
+/* 
+ * tipc_link_send_sections_fast: Entry for messages where the 
+ * destination processor is known and the header is complete,
+ * except for total message length. 
+ * Returns user data length or errno.
+ */
+int tipc_link_send_sections_fast(struct port *sender, 
+				 struct iovec const *msg_sect,
+				 const u32 num_sect, 
+				 u32 destaddr)
+{
+	struct tipc_msg *hdr = &sender->publ.phdr;
+	struct link *l_ptr;
+	struct sk_buff *buf;
+	struct node *node;
+	int res;
+	u32 selector = msg_origport(hdr) & 1;
+
+	assert(destaddr != tipc_own_addr);
+
+again:
+	/*
+	 * Try building message using port's max_pkt hint.
+	 * (Must not hold any locks while building message.)
+	 */
+
+	res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt,
+			!sender->user_port, &buf);
+
+	read_lock_bh(&tipc_net_lock);
+	node = tipc_node_select(destaddr, selector);
+	if (likely(node)) {
+		tipc_node_lock(node);
+		l_ptr = node->active_links[selector];
+		if (likely(l_ptr)) {
+			if (likely(buf)) {
+				res = link_send_buf_fast(l_ptr, buf,
+							 &sender->max_pkt);
+				if (unlikely(res < 0))
+					buf_discard(buf);
+exit:
+				tipc_node_unlock(node);
+				read_unlock_bh(&tipc_net_lock);
+				return res;
+			}
+
+			/* Exit if build request was invalid */
+
+			if (unlikely(res < 0))
+				goto exit;
+
+			/* Exit if link (or bearer) is congested */
+
+			if (link_congested(l_ptr) || 
+			    !list_empty(&l_ptr->b_ptr->cong_links)) {
+				res = link_schedule_port(l_ptr,
+							 sender->publ.ref, res);
+				goto exit;
+			}
+
+			/* 
+			 * Message size exceeds max_pkt hint; update hint,
+			 * then re-try fast path or fragment the message
+			 */
+
+			sender->max_pkt = link_max_pkt(l_ptr);
+			tipc_node_unlock(node);
+			read_unlock_bh(&tipc_net_lock);
+
+
+			if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
+				goto again;
+
+			return link_send_sections_long(sender, msg_sect,
+						       num_sect, destaddr);
+		}
+		tipc_node_unlock(node);
+	}
+	read_unlock_bh(&tipc_net_lock);
+
+	/* Couldn't find a link to the destination node */
+
+	if (buf)
+		return tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
+	if (res >= 0)
+		return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
+						 TIPC_ERR_NO_NODE);
+	return res;
+}
+
+/* 
+ * link_send_sections_long(): Entry for long messages where the 
+ * destination node is known and the header is complete,
+ * inclusive total message length. 
+ * Link and bearer congestion status have been checked to be ok,
+ * and are ignored if they change.
+ *
+ * Note that fragments do not use the full link MTU so that they won't have
+ * to undergo refragmentation if link changeover causes them to be sent
+ * over another link with an additional tunnel header added as prefix.
+ * (Refragmentation will still occur if the other link has a smaller MTU.)
+ *
+ * Returns user data length or errno.
+ */
+static int link_send_sections_long(struct port *sender,
+				   struct iovec const *msg_sect,
+				   u32 num_sect,
+				   u32 destaddr)
+{
+	struct link *l_ptr;
+	struct node *node;
+	struct tipc_msg *hdr = &sender->publ.phdr;
+	u32 dsz = msg_data_sz(hdr);
+	u32 max_pkt,fragm_sz,rest;
+	struct tipc_msg fragm_hdr;
+	struct sk_buff *buf,*buf_chain,*prev;
+	u32 fragm_crs,fragm_rest,hsz,sect_rest;
+	const unchar *sect_crs;
+	int curr_sect;
+	u32 fragm_no;
+
+again:
+	fragm_no = 1;
+	max_pkt = sender->max_pkt - INT_H_SIZE;  
+		/* leave room for tunnel header in case of link changeover */
+	fragm_sz = max_pkt - INT_H_SIZE; 
+		/* leave room for fragmentation header in each fragment */
+	rest = dsz;
+	fragm_crs = 0;
+	fragm_rest = 0;
+	sect_rest = 0;
+	sect_crs = 0;
+	curr_sect = -1;
+
+	/* Prepare reusable fragment header: */
+
+	msg_dbg(hdr, ">FRAGMENTING>");
+	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+		 TIPC_OK, INT_H_SIZE, msg_destnode(hdr));
+	msg_set_link_selector(&fragm_hdr, sender->publ.ref);
+	msg_set_size(&fragm_hdr, max_pkt);
+	msg_set_fragm_no(&fragm_hdr, 1);
+
+	/* Prepare header of first fragment: */
+
+	buf_chain = buf = buf_acquire(max_pkt);
+	if (!buf)
+		return -ENOMEM;
+	buf->next = NULL;
+	memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+	hsz = msg_hdr_sz(hdr);
+	memcpy(buf->data + INT_H_SIZE, (unchar *)hdr, hsz);
+	msg_dbg(buf_msg(buf), ">BUILD>");
+
+	/* Chop up message: */
+
+	fragm_crs = INT_H_SIZE + hsz;
+	fragm_rest = fragm_sz - hsz;
+
+	do {		/* For all sections */
+		u32 sz;
+
+		if (!sect_rest) {
+			sect_rest = msg_sect[++curr_sect].iov_len;
+			sect_crs = (const unchar *)msg_sect[curr_sect].iov_base;
+		}
+
+		if (sect_rest < fragm_rest)
+			sz = sect_rest;
+		else
+			sz = fragm_rest;
+
+		if (likely(!sender->user_port)) {
+			if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) {
+error:
+				for (; buf_chain; buf_chain = buf) {
+					buf = buf_chain->next;
+					buf_discard(buf_chain);
+				}
+				return -EFAULT;
+			}
+		} else
+			memcpy(buf->data + fragm_crs, sect_crs, sz);
+
+		sect_crs += sz;
+		sect_rest -= sz;
+		fragm_crs += sz;
+		fragm_rest -= sz;
+		rest -= sz;
+
+		if (!fragm_rest && rest) {
+
+			/* Initiate new fragment: */
+			if (rest <= fragm_sz) {
+				fragm_sz = rest;
+				msg_set_type(&fragm_hdr,LAST_FRAGMENT);
+			} else {
+				msg_set_type(&fragm_hdr, FRAGMENT);
+			}
+			msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
+			msg_set_fragm_no(&fragm_hdr, ++fragm_no);
+			prev = buf;
+			buf = buf_acquire(fragm_sz + INT_H_SIZE);
+			if (!buf)
+				goto error;
+
+			buf->next = NULL;                                
+			prev->next = buf;
+			memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+			fragm_crs = INT_H_SIZE;
+			fragm_rest = fragm_sz;
+			msg_dbg(buf_msg(buf),"  >BUILD>");
+		}
+	}
+	while (rest > 0);
+
+	/* 
+	 * Now we have a buffer chain. Select a link and check
+	 * that packet size is still OK
+	 */
+	node = tipc_node_select(destaddr, sender->publ.ref & 1);
+	if (likely(node)) {
+		tipc_node_lock(node);
+		l_ptr = node->active_links[sender->publ.ref & 1];
+		if (!l_ptr) {
+			tipc_node_unlock(node);
+			goto reject;
+		}
+		if (link_max_pkt(l_ptr) < max_pkt) {
+			sender->max_pkt = link_max_pkt(l_ptr);
+			tipc_node_unlock(node);
+			for (; buf_chain; buf_chain = buf) {
+				buf = buf_chain->next;
+				buf_discard(buf_chain);
+			}
+			goto again;
+		}
+	} else {
+reject:
+		for (; buf_chain; buf_chain = buf) {
+			buf = buf_chain->next;
+			buf_discard(buf_chain);
+		}
+		return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
+						 TIPC_ERR_NO_NODE);
+	}
+
+	/* Append whole chain to send queue: */
+
+	buf = buf_chain;
+	l_ptr->long_msg_seq_no = mod(l_ptr->long_msg_seq_no + 1);
+	if (!l_ptr->next_out)
+		l_ptr->next_out = buf_chain;
+	l_ptr->stats.sent_fragmented++;
+	while (buf) {
+		struct sk_buff *next = buf->next;
+		struct tipc_msg *msg = buf_msg(buf);
+
+		l_ptr->stats.sent_fragments++;
+		msg_set_long_msgno(msg, l_ptr->long_msg_seq_no);
+		link_add_to_outqueue(l_ptr, buf, msg);
+		msg_dbg(msg, ">ADD>");
+		buf = next;
+	}
+
+	/* Send it, if possible: */
+
+	tipc_link_push_queue(l_ptr);
+	tipc_node_unlock(node);
+	return dsz;
+}
+
+/* 
+ * tipc_link_push_packet: Push one unsent packet to the media
+ */
+u32 tipc_link_push_packet(struct link *l_ptr)
+{
+	struct sk_buff *buf = l_ptr->first_out;
+	u32 r_q_size = l_ptr->retransm_queue_size;
+	u32 r_q_head = l_ptr->retransm_queue_head;
+
+	/* Step to position where retransmission failed, if any,    */
+	/* consider that buffers may have been released in meantime */
+
+	if (r_q_size && buf) {
+		u32 last = lesser(mod(r_q_head + r_q_size), 
+				  link_last_sent(l_ptr));
+		u32 first = msg_seqno(buf_msg(buf));
+
+		while (buf && less(first, r_q_head)) {
+			first = mod(first + 1);
+			buf = buf->next;
+		}
+		l_ptr->retransm_queue_head = r_q_head = first;
+		l_ptr->retransm_queue_size = r_q_size = mod(last - first);
+	}
+
+	/* Continue retransmission now, if there is anything: */
+
+	if (r_q_size && buf && !skb_cloned(buf)) {
+		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
+		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); 
+		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
+			msg_dbg(buf_msg(buf), ">DEF-RETR>");
+			l_ptr->retransm_queue_head = mod(++r_q_head);
+			l_ptr->retransm_queue_size = --r_q_size;
+			l_ptr->stats.retransmitted++;
+			return TIPC_OK;
+		} else {
+			l_ptr->stats.bearer_congs++;
+			msg_dbg(buf_msg(buf), "|>DEF-RETR>");
+			return PUSH_FAILED;
+		}
+	}
+
+	/* Send deferred protocol message, if any: */
+
+	buf = l_ptr->proto_msg_queue;
+	if (buf) {
+		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
+		msg_set_bcast_ack(buf_msg(buf),l_ptr->owner->bclink.last_in); 
+		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
+			msg_dbg(buf_msg(buf), ">DEF-PROT>");
+			l_ptr->unacked_window = 0;
+			buf_discard(buf);
+			l_ptr->proto_msg_queue = 0;
+			return TIPC_OK;
+		} else {
+			msg_dbg(buf_msg(buf), "|>DEF-PROT>");
+			l_ptr->stats.bearer_congs++;
+			return PUSH_FAILED;
+		}
+	}
+
+	/* Send one deferred data message, if send window not full: */
+
+	buf = l_ptr->next_out;
+	if (buf) {
+		struct tipc_msg *msg = buf_msg(buf);
+		u32 next = msg_seqno(msg);
+		u32 first = msg_seqno(buf_msg(l_ptr->first_out));
+
+		if (mod(next - first) < l_ptr->queue_limit[0]) {
+			msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
+			msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 
+			if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
+				if (msg_user(msg) == MSG_BUNDLER)
+					msg_set_type(msg, CLOSED_MSG);
+				msg_dbg(msg, ">PUSH-DATA>");
+				l_ptr->next_out = buf->next;
+				return TIPC_OK;
+			} else {
+				msg_dbg(msg, "|PUSH-DATA|");
+				l_ptr->stats.bearer_congs++;
+				return PUSH_FAILED;
+			}
+		}
+	}
+	return PUSH_FINISHED;
+}
+
+/*
+ * push_queue(): push out the unsent messages of a link where
+ *               congestion has abated. Node is locked
+ */
+void tipc_link_push_queue(struct link *l_ptr)
+{
+	u32 res;
+
+	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr))
+		return;
+
+	do {
+		res = tipc_link_push_packet(l_ptr);
+	}
+	while (res == TIPC_OK);
+	if (res == PUSH_FAILED)
+		tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
+}
+
+void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, 
+			  u32 retransmits)
+{
+	struct tipc_msg *msg;
+
+	dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
+
+	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && buf && !skb_cloned(buf)) {
+		msg_dbg(buf_msg(buf), ">NO_RETR->BCONG>");
+		dbg_print_link(l_ptr, "   ");
+		l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf));
+		l_ptr->retransm_queue_size = retransmits;
+		return;
+	}
+	while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
+		msg = buf_msg(buf);
+		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
+		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 
+		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
+                        /* Catch if retransmissions fail repeatedly: */
+                        if (l_ptr->last_retransmitted == msg_seqno(msg)) {
+                                if (++l_ptr->stale_count > 100) {
+                                        tipc_msg_print(TIPC_CONS, buf_msg(buf), ">RETR>");
+                                        info("...Retransmitted %u times\n",
+					     l_ptr->stale_count);
+                                        link_print(l_ptr, TIPC_CONS, "Resetting Link\n");;
+                                        tipc_link_reset(l_ptr);
+                                        break;
+                                }
+                        } else {
+                                l_ptr->stale_count = 0;
+                        }
+                        l_ptr->last_retransmitted = msg_seqno(msg);
+
+			msg_dbg(buf_msg(buf), ">RETR>");
+			buf = buf->next;
+			retransmits--;
+			l_ptr->stats.retransmitted++;
+		} else {
+			tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
+			l_ptr->stats.bearer_congs++;
+			l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf));
+			l_ptr->retransm_queue_size = retransmits;
+			return;
+		}
+	}
+	l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0;
+}
+
+/* 
+ * link_recv_non_seq: Receive packets which are outside
+ *                    the link sequence flow
+ */
+
+static void link_recv_non_seq(struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+
+	if (msg_user(msg) ==  LINK_CONFIG)
+		tipc_disc_recv_msg(buf);
+	else
+		tipc_bclink_recv_pkt(buf);
+}
+
+/** 
+ * link_insert_deferred_queue - insert deferred messages back into receive chain
+ */
+
+static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr, 
+						  struct sk_buff *buf)
+{
+	u32 seq_no;
+
+	if (l_ptr->oldest_deferred_in == NULL)
+		return buf;
+
+	seq_no = msg_seqno(buf_msg(l_ptr->oldest_deferred_in));
+	if (seq_no == mod(l_ptr->next_in_no)) {
+		l_ptr->newest_deferred_in->next = buf;
+		buf = l_ptr->oldest_deferred_in;
+		l_ptr->oldest_deferred_in = NULL;
+		l_ptr->deferred_inqueue_sz = 0;
+	}
+	return buf;
+}
+
+void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
+{
+	read_lock_bh(&tipc_net_lock);
+	while (head) {
+		struct bearer *b_ptr;
+		struct node *n_ptr;
+		struct link *l_ptr;
+		struct sk_buff *crs;
+		struct sk_buff *buf = head;
+		struct tipc_msg *msg = buf_msg(buf);
+		u32 seq_no = msg_seqno(msg);
+		u32 ackd = msg_ack(msg);
+		u32 released = 0;
+		int type;
+
+		b_ptr = (struct bearer *)tb_ptr;
+		TIPC_SKB_CB(buf)->handle = b_ptr;
+
+		head = head->next;
+		if (unlikely(msg_version(msg) != TIPC_VERSION))
+			goto cont;
+#if 0
+		if (msg_user(msg) != LINK_PROTOCOL)
+#endif
+			msg_dbg(msg,"<REC<");
+
+		if (unlikely(msg_non_seq(msg))) {
+			link_recv_non_seq(buf);
+			continue;
+		}
+		n_ptr = tipc_node_find(msg_prevnode(msg));
+		if (unlikely(!n_ptr))
+			goto cont;
+
+		tipc_node_lock(n_ptr);
+		l_ptr = n_ptr->links[b_ptr->identity];
+		if (unlikely(!l_ptr)) {
+			tipc_node_unlock(n_ptr);
+			goto cont;
+		}
+		/* 
+		 * Release acked messages 
+		 */
+		if (less(n_ptr->bclink.acked, msg_bcast_ack(msg))) {
+			if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported)
+				tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
+		}
+
+		crs = l_ptr->first_out;
+		while ((crs != l_ptr->next_out) && 
+		       less_eq(msg_seqno(buf_msg(crs)), ackd)) {
+			struct sk_buff *next = crs->next;
+
+			buf_discard(crs);
+			crs = next;
+			released++;
+		}
+		if (released) {
+			l_ptr->first_out = crs;
+			l_ptr->out_queue_size -= released;
+		}
+		if (unlikely(l_ptr->next_out))
+			tipc_link_push_queue(l_ptr);
+		if (unlikely(!list_empty(&l_ptr->waiting_ports)))
+			tipc_link_wakeup_ports(l_ptr, 0);
+		if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) {
+			l_ptr->stats.sent_acks++;
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+		}
+
+protocol_check:
+		if (likely(link_working_working(l_ptr))) {
+			if (likely(seq_no == mod(l_ptr->next_in_no))) {
+				l_ptr->next_in_no++;
+				if (unlikely(l_ptr->oldest_deferred_in))
+					head = link_insert_deferred_queue(l_ptr,
+									  head);
+				if (likely(msg_is_dest(msg, tipc_own_addr))) {
+deliver:
+					if (likely(msg_isdata(msg))) {
+						tipc_node_unlock(n_ptr);
+						tipc_port_recv_msg(buf);
+						continue;
+					}
+					switch (msg_user(msg)) {
+					case MSG_BUNDLER:
+						l_ptr->stats.recv_bundles++;
+						l_ptr->stats.recv_bundled += 
+							msg_msgcnt(msg);
+						tipc_node_unlock(n_ptr);
+						tipc_link_recv_bundle(buf);
+						continue;
+					case ROUTE_DISTRIBUTOR:
+						tipc_node_unlock(n_ptr);
+						tipc_cltr_recv_routing_table(buf);
+						continue;
+					case NAME_DISTRIBUTOR:
+						tipc_node_unlock(n_ptr);
+						tipc_named_recv(buf);
+						continue;
+					case CONN_MANAGER:
+						tipc_node_unlock(n_ptr);
+						tipc_port_recv_proto_msg(buf);
+						continue;
+					case MSG_FRAGMENTER:
+						l_ptr->stats.recv_fragments++;
+						if (tipc_link_recv_fragment(&l_ptr->defragm_buf, 
+									    &buf, &msg)) {
+							l_ptr->stats.recv_fragmented++;
+							goto deliver;
+						}
+						break;
+					case CHANGEOVER_PROTOCOL:
+						type = msg_type(msg);
+						if (link_recv_changeover_msg(&l_ptr, &buf)) {
+							msg = buf_msg(buf);
+							seq_no = msg_seqno(msg);
+							TIPC_SKB_CB(buf)->handle 
+								= b_ptr;
+							if (type == ORIGINAL_MSG)
+								goto deliver;
+							goto protocol_check;
+						}
+						break;
+					}
+				}
+				tipc_node_unlock(n_ptr);
+				tipc_net_route_msg(buf);
+				continue;
+			}
+			link_handle_out_of_seq_msg(l_ptr, buf);
+			head = link_insert_deferred_queue(l_ptr, head);
+			tipc_node_unlock(n_ptr);
+			continue;
+		}
+
+		if (msg_user(msg) == LINK_PROTOCOL) {
+			link_recv_proto_msg(l_ptr, buf);
+			head = link_insert_deferred_queue(l_ptr, head);
+			tipc_node_unlock(n_ptr);
+			continue;
+		}
+		msg_dbg(msg,"NSEQ<REC<");
+		link_state_event(l_ptr, TRAFFIC_MSG_EVT);
+
+		if (link_working_working(l_ptr)) {
+			/* Re-insert in front of queue */
+			msg_dbg(msg,"RECV-REINS:");
+			buf->next = head;
+			head = buf;
+			tipc_node_unlock(n_ptr);
+			continue;
+		}
+		tipc_node_unlock(n_ptr);
+cont:
+		buf_discard(buf);
+	}
+	read_unlock_bh(&tipc_net_lock);
+}
+
+/* 
+ * link_defer_buf(): Sort a received out-of-sequence packet 
+ *                   into the deferred reception queue.
+ * Returns the increase of the queue length,i.e. 0 or 1
+ */
+
+u32 tipc_link_defer_pkt(struct sk_buff **head,
+			struct sk_buff **tail,
+			struct sk_buff *buf)
+{
+	struct sk_buff *prev = 0;
+	struct sk_buff *crs = *head;
+	u32 seq_no = msg_seqno(buf_msg(buf));
+
+	buf->next = NULL;
+
+	/* Empty queue ? */
+	if (*head == NULL) {
+		*head = *tail = buf;
+		return 1;
+	}
+
+	/* Last ? */
+	if (less(msg_seqno(buf_msg(*tail)), seq_no)) {
+		(*tail)->next = buf;
+		*tail = buf;
+		return 1;
+	}
+
+	/* Scan through queue and sort it in */
+	do {
+		struct tipc_msg *msg = buf_msg(crs);
+
+		if (less(seq_no, msg_seqno(msg))) {
+			buf->next = crs;
+			if (prev)
+				prev->next = buf;
+			else
+				*head = buf;   
+			return 1;
+		}
+		if (seq_no == msg_seqno(msg)) {
+			break;
+		}
+		prev = crs;
+		crs = crs->next;
+	}
+	while (crs);
+
+	/* Message is a duplicate of an existing message */
+
+	buf_discard(buf);
+	return 0;
+}
+
+/** 
+ * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
+ */
+
+static void link_handle_out_of_seq_msg(struct link *l_ptr, 
+				       struct sk_buff *buf)
+{
+	u32 seq_no = msg_seqno(buf_msg(buf));
+
+	if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
+		link_recv_proto_msg(l_ptr, buf);
+		return;
+	}
+
+	dbg("rx OOS msg: seq_no %u, expecting %u (%u)\n", 
+	    seq_no, mod(l_ptr->next_in_no), l_ptr->next_in_no);
+
+	/* Record OOS packet arrival (force mismatch on next timeout) */
+
+	l_ptr->checkpoint--;
+
+	/* 
+	 * Discard packet if a duplicate; otherwise add it to deferred queue
+	 * and notify peer of gap as per protocol specification
+	 */
+
+	if (less(seq_no, mod(l_ptr->next_in_no))) {
+		l_ptr->stats.duplicates++;
+		buf_discard(buf);
+		return;
+	}
+
+	if (tipc_link_defer_pkt(&l_ptr->oldest_deferred_in,
+				&l_ptr->newest_deferred_in, buf)) {
+		l_ptr->deferred_inqueue_sz++;
+		l_ptr->stats.deferred_recv++;
+		if ((l_ptr->deferred_inqueue_sz % 16) == 1)
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+	} else
+		l_ptr->stats.duplicates++;
+}
+
+/*
+ * Send protocol message to the other endpoint.
+ */
+void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
+			      u32 gap, u32 tolerance, u32 priority, u32 ack_mtu)
+{
+	struct sk_buff *buf = 0;
+	struct tipc_msg *msg = l_ptr->pmsg;
+        u32 msg_size = sizeof(l_ptr->proto_msg);
+
+	if (link_blocked(l_ptr))
+		return;
+	msg_set_type(msg, msg_typ);
+	msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
+	msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in)); 
+	msg_set_last_bcast(msg, tipc_bclink_get_last_sent());
+
+	if (msg_typ == STATE_MSG) {
+		u32 next_sent = mod(l_ptr->next_out_no);
+
+		if (!tipc_link_is_up(l_ptr))
+			return;
+		if (l_ptr->next_out)
+			next_sent = msg_seqno(buf_msg(l_ptr->next_out));
+		msg_set_next_sent(msg, next_sent);
+		if (l_ptr->oldest_deferred_in) {
+			u32 rec = msg_seqno(buf_msg(l_ptr->oldest_deferred_in));
+			gap = mod(rec - mod(l_ptr->next_in_no));
+		}
+		msg_set_seq_gap(msg, gap);
+		if (gap)
+			l_ptr->stats.sent_nacks++;
+		msg_set_link_tolerance(msg, tolerance);
+		msg_set_linkprio(msg, priority);
+		msg_set_max_pkt(msg, ack_mtu);
+		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
+		msg_set_probe(msg, probe_msg != 0);
+		if (probe_msg) { 
+			u32 mtu = l_ptr->max_pkt;
+
+                        if ((mtu < l_ptr->max_pkt_target) &&
+			    link_working_working(l_ptr) &&
+			    l_ptr->fsm_msg_cnt) {
+				msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3;
+                                if (l_ptr->max_pkt_probes == 10) {
+                                        l_ptr->max_pkt_target = (msg_size - 4);
+                                        l_ptr->max_pkt_probes = 0;
+					msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3;
+                                }
+				l_ptr->max_pkt_probes++;
+                        }
+
+			l_ptr->stats.sent_probes++;
+                }
+		l_ptr->stats.sent_states++;
+	} else {		/* RESET_MSG or ACTIVATE_MSG */
+		msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1));
+		msg_set_seq_gap(msg, 0);
+		msg_set_next_sent(msg, 1);
+		msg_set_link_tolerance(msg, l_ptr->tolerance);
+		msg_set_linkprio(msg, l_ptr->priority);
+		msg_set_max_pkt(msg, l_ptr->max_pkt_target);
+	}
+
+	if (tipc_node_has_redundant_links(l_ptr->owner)) {
+		msg_set_redundant_link(msg);
+	} else {
+		msg_clear_redundant_link(msg);
+	}
+	msg_set_linkprio(msg, l_ptr->priority);
+
+	/* Ensure sequence number will not fit : */
+
+	msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2)));
+
+	/* Congestion? */
+
+	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
+		if (!l_ptr->proto_msg_queue) {
+			l_ptr->proto_msg_queue =
+				buf_acquire(sizeof(l_ptr->proto_msg));
+		}
+		buf = l_ptr->proto_msg_queue;
+		if (!buf)
+			return;
+		memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+		return;
+	}
+	msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
+
+	/* Message can be sent */
+
+	msg_dbg(msg, ">>");
+
+	buf = buf_acquire(msg_size);
+	if (!buf)
+		return;
+
+	memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+        msg_set_size(buf_msg(buf), msg_size);
+
+	if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
+		l_ptr->unacked_window = 0;
+		buf_discard(buf);
+		return;
+	}
+
+	/* New congestion */
+	tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
+	l_ptr->proto_msg_queue = buf;
+	l_ptr->stats.bearer_congs++;
+}
+
+/*
+ * Receive protocol message :
+ * Note that network plane id propagates through the network, and may 
+ * change at any time. The node with lowest address rules    
+ */
+
+static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
+{
+	u32 rec_gap = 0;
+	u32 max_pkt_info;
+        u32 max_pkt_ack;
+	u32 msg_tol;
+	struct tipc_msg *msg = buf_msg(buf);
+
+	dbg("AT(%u):", jiffies_to_msecs(jiffies));
+	msg_dbg(msg, "<<");
+	if (link_blocked(l_ptr))
+		goto exit;
+
+	/* record unnumbered packet arrival (force mismatch on next timeout) */
+
+	l_ptr->checkpoint--;
+
+	if (l_ptr->b_ptr->net_plane != msg_net_plane(msg))
+		if (tipc_own_addr > msg_prevnode(msg))
+			l_ptr->b_ptr->net_plane = msg_net_plane(msg);
+
+	l_ptr->owner->permit_changeover = msg_redundant_link(msg);
+
+	switch (msg_type(msg)) {
+	
+	case RESET_MSG:
+		if (!link_working_unknown(l_ptr) && l_ptr->peer_session) {
+			if (msg_session(msg) == l_ptr->peer_session) {
+				dbg("Duplicate RESET: %u<->%u\n",
+				    msg_session(msg), l_ptr->peer_session);                                     
+				break; /* duplicate: ignore */
+			}
+		}
+		/* fall thru' */
+	case ACTIVATE_MSG:
+		/* Update link settings according other endpoint's values */
+
+		strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg));
+
+		if ((msg_tol = msg_link_tolerance(msg)) &&
+		    (msg_tol > l_ptr->tolerance))
+			link_set_supervision_props(l_ptr, msg_tol);
+
+		if (msg_linkprio(msg) > l_ptr->priority)
+			l_ptr->priority = msg_linkprio(msg);
+
+		max_pkt_info = msg_max_pkt(msg);
+                if (max_pkt_info) {
+			if (max_pkt_info < l_ptr->max_pkt_target)
+				l_ptr->max_pkt_target = max_pkt_info;
+			if (l_ptr->max_pkt > l_ptr->max_pkt_target)
+				l_ptr->max_pkt = l_ptr->max_pkt_target;
+		} else {
+                        l_ptr->max_pkt = l_ptr->max_pkt_target;
+		}
+		l_ptr->owner->bclink.supported = (max_pkt_info != 0);
+
+		link_state_event(l_ptr, msg_type(msg));
+
+		l_ptr->peer_session = msg_session(msg);
+		l_ptr->peer_bearer_id = msg_bearer_id(msg);
+
+		/* Synchronize broadcast sequence numbers */
+		if (!tipc_node_has_redundant_links(l_ptr->owner)) {
+			l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg));
+		}
+		break;
+	case STATE_MSG:
+
+		if ((msg_tol = msg_link_tolerance(msg)))
+			link_set_supervision_props(l_ptr, msg_tol);
+		
+		if (msg_linkprio(msg) && 
+		    (msg_linkprio(msg) != l_ptr->priority)) {
+			warn("Changing prio <%s>: %u->%u\n",
+			     l_ptr->name, l_ptr->priority, msg_linkprio(msg));
+			l_ptr->priority = msg_linkprio(msg);
+			tipc_link_reset(l_ptr); /* Enforce change to take effect */
+			break;
+		}
+		link_state_event(l_ptr, TRAFFIC_MSG_EVT);
+		l_ptr->stats.recv_states++;
+		if (link_reset_unknown(l_ptr))
+			break;
+
+		if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) {
+			rec_gap = mod(msg_next_sent(msg) - 
+				      mod(l_ptr->next_in_no));
+		}
+
+		max_pkt_ack = msg_max_pkt(msg);
+                if (max_pkt_ack > l_ptr->max_pkt) {
+                        dbg("Link <%s> updated MTU %u -> %u\n",
+                            l_ptr->name, l_ptr->max_pkt, max_pkt_ack);
+                        l_ptr->max_pkt = max_pkt_ack;
+                        l_ptr->max_pkt_probes = 0;
+                }
+
+		max_pkt_ack = 0;
+                if (msg_probe(msg)) {
+			l_ptr->stats.recv_probes++;
+                        if (msg_size(msg) > sizeof(l_ptr->proto_msg)) {
+                                max_pkt_ack = msg_size(msg);
+                        }
+                }
+
+		/* Protocol message before retransmits, reduce loss risk */
+
+		tipc_bclink_check_gap(l_ptr->owner, msg_last_bcast(msg));
+
+		if (rec_gap || (msg_probe(msg))) {
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG,
+						 0, rec_gap, 0, 0, max_pkt_ack);
+		}
+		if (msg_seq_gap(msg)) {
+			msg_dbg(msg, "With Gap:");
+			l_ptr->stats.recv_nacks++;
+			tipc_link_retransmit(l_ptr, l_ptr->first_out,
+					     msg_seq_gap(msg));
+		}
+		break;
+	default:
+		msg_dbg(buf_msg(buf), "<DISCARDING UNKNOWN<");
+	}
+exit:
+	buf_discard(buf);
+}
+
+
+/*
+ * tipc_link_tunnel(): Send one message via a link belonging to 
+ * another bearer. Owner node is locked.
+ */
+void tipc_link_tunnel(struct link *l_ptr, 
+		      struct tipc_msg *tunnel_hdr, 
+		      struct tipc_msg  *msg,
+		      u32 selector)
+{
+	struct link *tunnel;
+	struct sk_buff *buf;
+	u32 length = msg_size(msg);
+
+	tunnel = l_ptr->owner->active_links[selector & 1];
+	if (!tipc_link_is_up(tunnel))
+		return;
+	msg_set_size(tunnel_hdr, length + INT_H_SIZE);
+	buf = buf_acquire(length + INT_H_SIZE);
+	if (!buf)
+		return;
+	memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
+	memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
+	dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
+	msg_dbg(buf_msg(buf), ">SEND>");
+	assert(tunnel);
+	tipc_link_send_buf(tunnel, buf);
+}
+
+
+
+/*
+ * changeover(): Send whole message queue via the remaining link
+ *               Owner node is locked.
+ */
+
+void tipc_link_changeover(struct link *l_ptr)
+{
+	u32 msgcount = l_ptr->out_queue_size;
+	struct sk_buff *crs = l_ptr->first_out;
+	struct link *tunnel = l_ptr->owner->active_links[0];
+	int split_bundles = tipc_node_has_redundant_links(l_ptr->owner);
+	struct tipc_msg tunnel_hdr;
+
+	if (!tunnel)
+		return;
+
+	if (!l_ptr->owner->permit_changeover)
+		return;
+
+	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
+		 ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
+	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
+	msg_set_msgcnt(&tunnel_hdr, msgcount);
+	if (!l_ptr->first_out) {
+		struct sk_buff *buf;
+
+		assert(!msgcount);
+		buf = buf_acquire(INT_H_SIZE);
+		if (buf) {
+			memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
+			msg_set_size(&tunnel_hdr, INT_H_SIZE);
+			dbg("%c->%c:", l_ptr->b_ptr->net_plane,
+			    tunnel->b_ptr->net_plane);
+			msg_dbg(&tunnel_hdr, "EMPTY>SEND>");
+			tipc_link_send_buf(tunnel, buf);
+		} else {
+			warn("Memory squeeze; link changeover failed\n");
+		}
+		return;
+	}
+	while (crs) {
+		struct tipc_msg *msg = buf_msg(crs);
+
+		if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) {
+			u32 msgcount = msg_msgcnt(msg);
+			struct tipc_msg *m = msg_get_wrapped(msg);
+			unchar* pos = (unchar*)m;
+
+			while (msgcount--) {
+				msg_set_seqno(m,msg_seqno(msg));
+				tipc_link_tunnel(l_ptr, &tunnel_hdr, m,
+						 msg_link_selector(m));
+				pos += align(msg_size(m));
+				m = (struct tipc_msg *)pos;
+			}
+		} else {
+			tipc_link_tunnel(l_ptr, &tunnel_hdr, msg,
+					 msg_link_selector(msg));
+		}
+		crs = crs->next;
+	}
+}
+
+void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
+{
+	struct sk_buff *iter;
+	struct tipc_msg tunnel_hdr;
+
+	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
+		 DUPLICATE_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
+	msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size);
+	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
+	iter = l_ptr->first_out;
+	while (iter) {
+		struct sk_buff *outbuf;
+		struct tipc_msg *msg = buf_msg(iter);
+		u32 length = msg_size(msg);
+
+		if (msg_user(msg) == MSG_BUNDLER)
+			msg_set_type(msg, CLOSED_MSG);
+		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));	/* Update */
+		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 
+		msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
+		outbuf = buf_acquire(length + INT_H_SIZE);
+		if (outbuf == NULL) {
+			warn("Memory squeeze; buffer duplication failed\n");
+			return;
+		}
+		memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
+		memcpy(outbuf->data + INT_H_SIZE, iter->data, length);
+		dbg("%c->%c:", l_ptr->b_ptr->net_plane,
+		    tunnel->b_ptr->net_plane);
+		msg_dbg(buf_msg(outbuf), ">SEND>");
+		tipc_link_send_buf(tunnel, outbuf);
+		if (!tipc_link_is_up(l_ptr))
+			return;
+		iter = iter->next;
+	}
+}
+
+
+
+/**
+ * buf_extract - extracts embedded TIPC message from another message
+ * @skb: encapsulating message buffer
+ * @from_pos: offset to extract from
+ *
+ * Returns a new message buffer containing an embedded message.  The 
+ * encapsulating message itself is left unchanged.
+ */
+
+static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
+{
+	struct tipc_msg *msg = (struct tipc_msg *)(skb->data + from_pos);
+	u32 size = msg_size(msg);
+	struct sk_buff *eb;
+
+	eb = buf_acquire(size);
+	if (eb)
+		memcpy(eb->data, (unchar *)msg, size);
+	return eb;
+}
+
+/* 
+ *  link_recv_changeover_msg(): Receive tunneled packet sent
+ *  via other link. Node is locked. Return extracted buffer.
+ */
+
+static int link_recv_changeover_msg(struct link **l_ptr,
+				    struct sk_buff **buf)
+{
+	struct sk_buff *tunnel_buf = *buf;
+	struct link *dest_link;
+	struct tipc_msg *msg;
+	struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
+	u32 msg_typ = msg_type(tunnel_msg);
+	u32 msg_count = msg_msgcnt(tunnel_msg);
+
+	dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)];
+	assert(dest_link != *l_ptr);
+	if (!dest_link) {
+		msg_dbg(tunnel_msg, "NOLINK/<REC<");
+		goto exit;
+	}
+	dbg("%c<-%c:", dest_link->b_ptr->net_plane,
+	    (*l_ptr)->b_ptr->net_plane);
+	*l_ptr = dest_link;
+	msg = msg_get_wrapped(tunnel_msg);
+
+	if (msg_typ == DUPLICATE_MSG) {
+		if (less(msg_seqno(msg), mod(dest_link->next_in_no))) {
+			msg_dbg(tunnel_msg, "DROP/<REC<");
+			goto exit;
+		}
+		*buf = buf_extract(tunnel_buf,INT_H_SIZE);
+		if (*buf == NULL) {
+			warn("Memory squeeze; failed to extract msg\n");
+			goto exit;
+		}
+		msg_dbg(tunnel_msg, "TNL<REC<");
+		buf_discard(tunnel_buf);
+		return 1;
+	}
+
+	/* First original message ?: */
+
+	if (tipc_link_is_up(dest_link)) {
+		msg_dbg(tunnel_msg, "UP/FIRST/<REC<");
+		tipc_link_reset(dest_link);
+		dest_link->exp_msg_count = msg_count;
+		if (!msg_count)
+			goto exit;
+	} else if (dest_link->exp_msg_count == START_CHANGEOVER) {
+		msg_dbg(tunnel_msg, "BLK/FIRST/<REC<");
+		dest_link->exp_msg_count = msg_count;
+		if (!msg_count)
+			goto exit;
+	}
+
+	/* Receive original message */
+
+	if (dest_link->exp_msg_count == 0) {
+		msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<");
+		dbg_print_link(dest_link, "LINK:");
+		goto exit;
+	}
+	dest_link->exp_msg_count--;
+	if (less(msg_seqno(msg), dest_link->reset_checkpoint)) {
+		msg_dbg(tunnel_msg, "DROP/DUPL/<REC<");
+		goto exit;
+	} else {
+		*buf = buf_extract(tunnel_buf, INT_H_SIZE);
+		if (*buf != NULL) {
+			msg_dbg(tunnel_msg, "TNL<REC<");
+			buf_discard(tunnel_buf);
+			return 1;
+		} else {
+			warn("Memory squeeze; dropped incoming msg\n");
+		}
+	}
+exit:
+	*buf = 0;
+	buf_discard(tunnel_buf);
+	return 0;
+}
+
+/*
+ *  Bundler functionality:
+ */
+void tipc_link_recv_bundle(struct sk_buff *buf)
+{
+	u32 msgcount = msg_msgcnt(buf_msg(buf));
+	u32 pos = INT_H_SIZE;
+	struct sk_buff *obuf;
+
+	msg_dbg(buf_msg(buf), "<BNDL<: ");
+	while (msgcount--) {
+		obuf = buf_extract(buf, pos);
+		if (obuf == NULL) {
+			char addr_string[16];
+
+			warn("Buffer allocation failure;\n");
+			warn("  incoming message(s) from %s lost\n",
+			     addr_string_fill(addr_string, 
+					      msg_orignode(buf_msg(buf))));
+			return;
+		};
+		pos += align(msg_size(buf_msg(obuf)));
+		msg_dbg(buf_msg(obuf), "     /");
+		tipc_net_route_msg(obuf);
+	}
+	buf_discard(buf);
+}
+
+/*
+ *  Fragmentation/defragmentation:
+ */
+
+
+/* 
+ * tipc_link_send_long_buf: Entry for buffers needing fragmentation.
+ * The buffer is complete, inclusive total message length. 
+ * Returns user data length.
+ */
+int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
+{
+	struct tipc_msg *inmsg = buf_msg(buf);
+	struct tipc_msg fragm_hdr;
+	u32 insize = msg_size(inmsg);
+	u32 dsz = msg_data_sz(inmsg);
+	unchar *crs = buf->data;
+	u32 rest = insize;
+	u32 pack_sz = link_max_pkt(l_ptr);
+	u32 fragm_sz = pack_sz - INT_H_SIZE;
+	u32 fragm_no = 1;
+	u32 destaddr = msg_destnode(inmsg);
+
+	if (msg_short(inmsg))
+		destaddr = l_ptr->addr;
+
+	if (msg_routed(inmsg))
+		msg_set_prevnode(inmsg, tipc_own_addr);
+
+	/* Prepare reusable fragment header: */
+
+	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+		 TIPC_OK, INT_H_SIZE, destaddr);
+	msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg));
+	msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++));
+	msg_set_fragm_no(&fragm_hdr, fragm_no);
+	l_ptr->stats.sent_fragmented++;
+
+	/* Chop up message: */
+
+	while (rest > 0) {
+		struct sk_buff *fragm;
+
+		if (rest <= fragm_sz) {
+			fragm_sz = rest;
+			msg_set_type(&fragm_hdr, LAST_FRAGMENT);
+		}
+		fragm = buf_acquire(fragm_sz + INT_H_SIZE);
+		if (fragm == NULL) {
+			warn("Memory squeeze; failed to fragment msg\n");
+			dsz = -ENOMEM;
+			goto exit;
+		}
+		msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
+		memcpy(fragm->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+		memcpy(fragm->data + INT_H_SIZE, crs, fragm_sz);
+
+		/*  Send queued messages first, if any: */
+
+		l_ptr->stats.sent_fragments++;
+		tipc_link_send_buf(l_ptr, fragm);
+		if (!tipc_link_is_up(l_ptr))
+			return dsz;
+		msg_set_fragm_no(&fragm_hdr, ++fragm_no);
+		rest -= fragm_sz;
+		crs += fragm_sz;
+		msg_set_type(&fragm_hdr, FRAGMENT);
+	}
+exit:
+	buf_discard(buf);
+	return dsz;
+}
+
+/* 
+ * A pending message being re-assembled must store certain values 
+ * to handle subsequent fragments correctly. The following functions 
+ * help storing these values in unused, available fields in the
+ * pending message. This makes dynamic memory allocation unecessary.
+ */
+
+static inline u32 get_long_msg_seqno(struct sk_buff *buf)
+{
+	return msg_seqno(buf_msg(buf));
+}
+
+static inline void set_long_msg_seqno(struct sk_buff *buf, u32 seqno)
+{
+	msg_set_seqno(buf_msg(buf), seqno);
+}
+
+static inline u32 get_fragm_size(struct sk_buff *buf)
+{
+	return msg_ack(buf_msg(buf));
+}
+
+static inline void set_fragm_size(struct sk_buff *buf, u32 sz)
+{
+	msg_set_ack(buf_msg(buf), sz);
+}
+
+static inline u32 get_expected_frags(struct sk_buff *buf)
+{
+	return msg_bcast_ack(buf_msg(buf));
+}
+
+static inline void set_expected_frags(struct sk_buff *buf, u32 exp)
+{
+	msg_set_bcast_ack(buf_msg(buf), exp);
+}
+
+static inline u32 get_timer_cnt(struct sk_buff *buf)
+{
+	return msg_reroute_cnt(buf_msg(buf));
+}
+
+static inline void incr_timer_cnt(struct sk_buff *buf)
+{
+	msg_incr_reroute_cnt(buf_msg(buf));
+}
+
+/* 
+ * tipc_link_recv_fragment(): Called with node lock on. Returns 
+ * the reassembled buffer if message is complete.
+ */
+int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, 
+			    struct tipc_msg **m)
+{
+	struct sk_buff *prev = 0;
+	struct sk_buff *fbuf = *fb;
+	struct tipc_msg *fragm = buf_msg(fbuf);
+	struct sk_buff *pbuf = *pending;
+	u32 long_msg_seq_no = msg_long_msgno(fragm);
+
+	*fb = 0;
+	msg_dbg(fragm,"FRG<REC<");
+
+	/* Is there an incomplete message waiting for this fragment? */
+
+	while (pbuf && ((msg_seqno(buf_msg(pbuf)) != long_msg_seq_no)
+			|| (msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) {
+		prev = pbuf;
+		pbuf = pbuf->next;
+	}
+
+	if (!pbuf && (msg_type(fragm) == FIRST_FRAGMENT)) {
+		struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
+		u32 msg_sz = msg_size(imsg);
+		u32 fragm_sz = msg_data_sz(fragm);
+		u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz);
+		u32 max =  TIPC_MAX_USER_MSG_SIZE + LONG_H_SIZE;
+		if (msg_type(imsg) == TIPC_MCAST_MSG)
+			max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
+		if (msg_size(imsg) > max) {
+			msg_dbg(fragm,"<REC<Oversized: ");
+			buf_discard(fbuf);
+			return 0;
+		}
+		pbuf = buf_acquire(msg_size(imsg));
+		if (pbuf != NULL) {
+			pbuf->next = *pending;
+			*pending = pbuf;
+			memcpy(pbuf->data, (unchar *)imsg, msg_data_sz(fragm));
+
+			/*  Prepare buffer for subsequent fragments. */
+
+			set_long_msg_seqno(pbuf, long_msg_seq_no); 
+			set_fragm_size(pbuf,fragm_sz); 
+			set_expected_frags(pbuf,exp_fragm_cnt - 1); 
+		} else {
+			warn("Memory squeeze; got no defragmenting buffer\n");
+		}
+		buf_discard(fbuf);
+		return 0;
+	} else if (pbuf && (msg_type(fragm) != FIRST_FRAGMENT)) {
+		u32 dsz = msg_data_sz(fragm);
+		u32 fsz = get_fragm_size(pbuf);
+		u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
+		u32 exp_frags = get_expected_frags(pbuf) - 1;
+		memcpy(pbuf->data + crs, msg_data(fragm), dsz);
+		buf_discard(fbuf);
+
+		/* Is message complete? */
+
+		if (exp_frags == 0) {
+			if (prev)
+				prev->next = pbuf->next;
+			else
+				*pending = pbuf->next;
+			msg_reset_reroute_cnt(buf_msg(pbuf));
+			*fb = pbuf;
+			*m = buf_msg(pbuf);
+			return 1;
+		}
+		set_expected_frags(pbuf,exp_frags);     
+		return 0;
+	}
+	dbg(" Discarding orphan fragment %x\n",fbuf);
+	msg_dbg(fragm,"ORPHAN:");
+	dbg("Pending long buffers:\n");
+	dbg_print_buf_chain(*pending);
+	buf_discard(fbuf);
+	return 0;
+}
+
+/**
+ * link_check_defragm_bufs - flush stale incoming message fragments
+ * @l_ptr: pointer to link
+ */
+
+static void link_check_defragm_bufs(struct link *l_ptr)
+{
+	struct sk_buff *prev = 0;
+	struct sk_buff *next = 0;
+	struct sk_buff *buf = l_ptr->defragm_buf;
+
+	if (!buf)
+		return;
+	if (!link_working_working(l_ptr))
+		return;
+	while (buf) {
+		u32 cnt = get_timer_cnt(buf);
+
+		next = buf->next;
+		if (cnt < 4) {
+			incr_timer_cnt(buf);
+			prev = buf;
+		} else {
+			dbg(" Discarding incomplete long buffer\n");
+			msg_dbg(buf_msg(buf), "LONG:");
+			dbg_print_link(l_ptr, "curr:");
+			dbg("Pending long buffers:\n");
+			dbg_print_buf_chain(l_ptr->defragm_buf);
+			if (prev)
+				prev->next = buf->next;
+			else
+				l_ptr->defragm_buf = buf->next;
+			buf_discard(buf);
+		}
+		buf = next;
+	}
+}
+
+
+
+static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
+{
+	l_ptr->tolerance = tolerance;
+	l_ptr->continuity_interval =
+		((tolerance / 4) > 500) ? 500 : tolerance / 4;
+	l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4);
+}
+
+
+void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
+{
+	/* Data messages from this node, inclusive FIRST_FRAGM */
+	l_ptr->queue_limit[DATA_LOW] = window;
+	l_ptr->queue_limit[DATA_MEDIUM] = (window / 3) * 4;
+	l_ptr->queue_limit[DATA_HIGH] = (window / 3) * 5;
+	l_ptr->queue_limit[DATA_CRITICAL] = (window / 3) * 6;
+	/* Transiting data messages,inclusive FIRST_FRAGM */
+	l_ptr->queue_limit[DATA_LOW + 4] = 300;
+	l_ptr->queue_limit[DATA_MEDIUM + 4] = 600;
+	l_ptr->queue_limit[DATA_HIGH + 4] = 900;
+	l_ptr->queue_limit[DATA_CRITICAL + 4] = 1200;
+	l_ptr->queue_limit[CONN_MANAGER] = 1200;
+	l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200;
+	l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500;
+	l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000;
+	/* FRAGMENT and LAST_FRAGMENT packets */
+	l_ptr->queue_limit[MSG_FRAGMENTER] = 4000;
+}
+
+/**
+ * link_find_link - locate link by name
+ * @name - ptr to link name string
+ * @node - ptr to area to be filled with ptr to associated node
+ * 
+ * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted;
+ * this also prevents link deletion.
+ * 
+ * Returns pointer to link (or 0 if invalid link name).
+ */
+
+static struct link *link_find_link(const char *name, struct node **node)
+{
+	struct link_name link_name_parts;
+	struct bearer *b_ptr;
+	struct link *l_ptr; 
+
+	if (!link_name_validate(name, &link_name_parts))
+		return 0;
+
+	b_ptr = tipc_bearer_find_interface(link_name_parts.if_local);
+	if (!b_ptr)
+		return 0;
+
+	*node = tipc_node_find(link_name_parts.addr_peer); 
+	if (!*node)
+		return 0;
+
+	l_ptr = (*node)->links[b_ptr->identity];
+	if (!l_ptr || strcmp(l_ptr->name, name))
+		return 0;
+
+	return l_ptr;
+}
+
+struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, 
+				     u16 cmd)
+{
+	struct tipc_link_config *args;
+        u32 new_value;
+	struct link *l_ptr;
+	struct node *node;
+        int res;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	args = (struct tipc_link_config *)TLV_DATA(req_tlv_area);
+	new_value = ntohl(args->value);
+
+	if (!strcmp(args->name, tipc_bclink_name)) {
+		if ((cmd == TIPC_CMD_SET_LINK_WINDOW) &&
+		    (tipc_bclink_set_queue_limits(new_value) == 0))
+			return tipc_cfg_reply_none();
+	       	return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+						   " (cannot change setting on broadcast link)");
+	}
+
+	read_lock_bh(&tipc_net_lock);
+	l_ptr = link_find_link(args->name, &node); 
+	if (!l_ptr) {
+		read_unlock_bh(&tipc_net_lock);
+	       	return tipc_cfg_reply_error_string("link not found");
+	}
+
+	tipc_node_lock(node);
+	res = -EINVAL;
+	switch (cmd) {
+	case TIPC_CMD_SET_LINK_TOL: 
+		if ((new_value >= TIPC_MIN_LINK_TOL) && 
+		    (new_value <= TIPC_MAX_LINK_TOL)) {
+			link_set_supervision_props(l_ptr, new_value);
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 
+						 0, 0, new_value, 0, 0);
+			res = TIPC_OK;
+		}
+		break;
+	case TIPC_CMD_SET_LINK_PRI: 
+		if ((new_value >= TIPC_MIN_LINK_PRI) &&
+		    (new_value <= TIPC_MAX_LINK_PRI)) {
+			l_ptr->priority = new_value;
+			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 
+						 0, 0, 0, new_value, 0);
+			res = TIPC_OK;
+		}
+		break;
+	case TIPC_CMD_SET_LINK_WINDOW: 
+		if ((new_value >= TIPC_MIN_LINK_WIN) && 
+		    (new_value <= TIPC_MAX_LINK_WIN)) {
+			tipc_link_set_queue_limits(l_ptr, new_value);
+			res = TIPC_OK;
+		}
+		break;
+	}
+	tipc_node_unlock(node);
+
+	read_unlock_bh(&tipc_net_lock);
+	if (res)
+	       	return tipc_cfg_reply_error_string("cannot change link setting");
+
+	return tipc_cfg_reply_none();
+}
+
+/**
+ * link_reset_statistics - reset link statistics
+ * @l_ptr: pointer to link
+ */
+
+static void link_reset_statistics(struct link *l_ptr)
+{
+	memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
+	l_ptr->stats.sent_info = l_ptr->next_out_no;
+	l_ptr->stats.recv_info = l_ptr->next_in_no;
+}
+
+struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space)
+{
+	char *link_name;
+	struct link *l_ptr; 
+	struct node *node;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	link_name = (char *)TLV_DATA(req_tlv_area);
+	if (!strcmp(link_name, tipc_bclink_name)) {
+		if (tipc_bclink_reset_stats())
+			return tipc_cfg_reply_error_string("link not found");
+		return tipc_cfg_reply_none();
+	}
+
+	read_lock_bh(&tipc_net_lock);
+	l_ptr = link_find_link(link_name, &node); 
+	if (!l_ptr) {
+		read_unlock_bh(&tipc_net_lock);
+		return tipc_cfg_reply_error_string("link not found");
+	}
+
+	tipc_node_lock(node);
+	link_reset_statistics(l_ptr);
+	tipc_node_unlock(node);
+	read_unlock_bh(&tipc_net_lock);
+	return tipc_cfg_reply_none();
+}
+
+/**
+ * percent - convert count to a percentage of total (rounding up or down)
+ */
+
+static u32 percent(u32 count, u32 total)
+{
+	return (count * 100 + (total / 2)) / total;
+}
+
+/**
+ * tipc_link_stats - print link statistics
+ * @name: link name
+ * @buf: print buffer area
+ * @buf_size: size of print buffer area
+ * 
+ * Returns length of print buffer data string (or 0 if error)
+ */
+
+static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
+{
+	struct print_buf pb;
+	struct link *l_ptr; 
+	struct node *node;
+	char *status;
+	u32 profile_total = 0;
+
+	if (!strcmp(name, tipc_bclink_name))
+		return tipc_bclink_stats(buf, buf_size);
+
+	tipc_printbuf_init(&pb, buf, buf_size);
+
+	read_lock_bh(&tipc_net_lock);
+	l_ptr = link_find_link(name, &node); 
+	if (!l_ptr) {
+		read_unlock_bh(&tipc_net_lock);
+		return 0;
+	}
+	tipc_node_lock(node);
+
+	if (tipc_link_is_active(l_ptr))
+		status = "ACTIVE";
+	else if (tipc_link_is_up(l_ptr))
+		status = "STANDBY";
+	else
+		status = "DEFUNCT";
+	tipc_printf(&pb, "Link <%s>\n"
+		         "  %s  MTU:%u  Priority:%u  Tolerance:%u ms"
+		         "  Window:%u packets\n", 
+		    l_ptr->name, status, link_max_pkt(l_ptr), 
+		    l_ptr->priority, l_ptr->tolerance, l_ptr->queue_limit[0]);
+	tipc_printf(&pb, "  RX packets:%u fragments:%u/%u bundles:%u/%u\n", 
+		    l_ptr->next_in_no - l_ptr->stats.recv_info,
+		    l_ptr->stats.recv_fragments,
+		    l_ptr->stats.recv_fragmented,
+		    l_ptr->stats.recv_bundles,
+		    l_ptr->stats.recv_bundled);
+	tipc_printf(&pb, "  TX packets:%u fragments:%u/%u bundles:%u/%u\n", 
+		    l_ptr->next_out_no - l_ptr->stats.sent_info,
+		    l_ptr->stats.sent_fragments,
+		    l_ptr->stats.sent_fragmented, 
+		    l_ptr->stats.sent_bundles,
+		    l_ptr->stats.sent_bundled);
+	profile_total = l_ptr->stats.msg_length_counts;
+	if (!profile_total)
+		profile_total = 1;
+	tipc_printf(&pb, "  TX profile sample:%u packets  average:%u octets\n"
+		         "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
+		         "-16354:%u%% -32768:%u%% -66000:%u%%\n",
+		    l_ptr->stats.msg_length_counts,
+		    l_ptr->stats.msg_lengths_total / profile_total,
+		    percent(l_ptr->stats.msg_length_profile[0], profile_total),
+		    percent(l_ptr->stats.msg_length_profile[1], profile_total),
+		    percent(l_ptr->stats.msg_length_profile[2], profile_total),
+		    percent(l_ptr->stats.msg_length_profile[3], profile_total),
+		    percent(l_ptr->stats.msg_length_profile[4], profile_total),
+		    percent(l_ptr->stats.msg_length_profile[5], profile_total),
+		    percent(l_ptr->stats.msg_length_profile[6], profile_total));
+	tipc_printf(&pb, "  RX states:%u probes:%u naks:%u defs:%u dups:%u\n", 
+		    l_ptr->stats.recv_states,
+		    l_ptr->stats.recv_probes,
+		    l_ptr->stats.recv_nacks,
+		    l_ptr->stats.deferred_recv, 
+		    l_ptr->stats.duplicates);
+	tipc_printf(&pb, "  TX states:%u probes:%u naks:%u acks:%u dups:%u\n", 
+		    l_ptr->stats.sent_states, 
+		    l_ptr->stats.sent_probes, 
+		    l_ptr->stats.sent_nacks, 
+		    l_ptr->stats.sent_acks, 
+		    l_ptr->stats.retransmitted);
+	tipc_printf(&pb, "  Congestion bearer:%u link:%u  Send queue max:%u avg:%u\n",
+		    l_ptr->stats.bearer_congs,
+		    l_ptr->stats.link_congs, 
+		    l_ptr->stats.max_queue_sz,
+		    l_ptr->stats.queue_sz_counts
+		    ? (l_ptr->stats.accu_queue_sz / l_ptr->stats.queue_sz_counts)
+		    : 0);
+
+	tipc_node_unlock(node);
+	read_unlock_bh(&tipc_net_lock);
+	return tipc_printbuf_validate(&pb);
+}
+
+#define MAX_LINK_STATS_INFO 2000
+
+struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space)
+{
+	struct sk_buff *buf;
+	struct tlv_desc *rep_tlv;
+	int str_len;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_LINK_STATS_INFO));
+	if (!buf)
+		return NULL;
+
+	rep_tlv = (struct tlv_desc *)buf->data;
+
+	str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area),
+				  (char *)TLV_DATA(rep_tlv), MAX_LINK_STATS_INFO);
+	if (!str_len) {
+		buf_discard(buf);
+	       	return tipc_cfg_reply_error_string("link not found");
+	}
+
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
+#if 0
+int link_control(const char *name, u32 op, u32 val)
+{
+	int res = -EINVAL;
+	struct link *l_ptr;
+	u32 bearer_id;
+	struct node * node;
+	u32 a;
+
+	a = link_name2addr(name, &bearer_id);
+	read_lock_bh(&tipc_net_lock);
+	node = tipc_node_find(a);
+	if (node) {
+		tipc_node_lock(node);
+		l_ptr = node->links[bearer_id];
+		if (l_ptr) {
+			if (op == TIPC_REMOVE_LINK) {
+				struct bearer *b_ptr = l_ptr->b_ptr;
+				spin_lock_bh(&b_ptr->publ.lock);
+				tipc_link_delete(l_ptr);
+				spin_unlock_bh(&b_ptr->publ.lock);
+			}
+			if (op == TIPC_CMD_BLOCK_LINK) {
+				tipc_link_reset(l_ptr);
+				l_ptr->blocked = 1;
+			}
+			if (op == TIPC_CMD_UNBLOCK_LINK) {
+				l_ptr->blocked = 0;
+			}
+			res = TIPC_OK;
+		}
+		tipc_node_unlock(node);
+	}
+	read_unlock_bh(&tipc_net_lock);
+	return res;
+}
+#endif
+
+/**
+ * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
+ * @dest: network address of destination node
+ * @selector: used to select from set of active links
+ * 
+ * If no active link can be found, uses default maximum packet size.
+ */
+
+u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
+{
+	struct node *n_ptr;
+	struct link *l_ptr;
+	u32 res = MAX_PKT_DEFAULT;
+	
+	if (dest == tipc_own_addr)
+		return MAX_MSG_SIZE;
+
+	read_lock_bh(&tipc_net_lock);        
+	n_ptr = tipc_node_select(dest, selector);
+	if (n_ptr) {
+		tipc_node_lock(n_ptr);
+		l_ptr = n_ptr->active_links[selector & 1];
+		if (l_ptr)
+			res = link_max_pkt(l_ptr);
+		tipc_node_unlock(n_ptr);
+	}
+	read_unlock_bh(&tipc_net_lock);       
+	return res;
+}
+
+#if 0
+static void link_dump_rec_queue(struct link *l_ptr)
+{
+	struct sk_buff *crs;
+
+	if (!l_ptr->oldest_deferred_in) {
+		info("Reception queue empty\n");
+		return;
+	}
+	info("Contents of Reception queue:\n");
+	crs = l_ptr->oldest_deferred_in;
+	while (crs) {
+		if (crs->data == (void *)0x0000a3a3) {
+			info("buffer %x invalid\n", crs);
+			return;
+		}
+		msg_dbg(buf_msg(crs), "In rec queue: \n");
+		crs = crs->next;
+	}
+}
+#endif
+
+static void link_dump_send_queue(struct link *l_ptr)
+{
+	if (l_ptr->next_out) {
+		info("\nContents of unsent queue:\n");
+		dbg_print_buf_chain(l_ptr->next_out);
+	}
+	info("\nContents of send queue:\n");
+	if (l_ptr->first_out) {
+		dbg_print_buf_chain(l_ptr->first_out);
+	}
+	info("Empty send queue\n");
+}
+
+static void link_print(struct link *l_ptr, struct print_buf *buf,
+		       const char *str)
+{
+	tipc_printf(buf, str);
+	if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr))
+		return;
+	tipc_printf(buf, "Link %x<%s>:",
+		    l_ptr->addr, l_ptr->b_ptr->publ.name);
+	tipc_printf(buf, ": NXO(%u):", mod(l_ptr->next_out_no));
+	tipc_printf(buf, "NXI(%u):", mod(l_ptr->next_in_no));
+	tipc_printf(buf, "SQUE");
+	if (l_ptr->first_out) {
+		tipc_printf(buf, "[%u..", msg_seqno(buf_msg(l_ptr->first_out)));
+		if (l_ptr->next_out)
+			tipc_printf(buf, "%u..",
+				    msg_seqno(buf_msg(l_ptr->next_out)));
+		tipc_printf(buf, "%u]",
+			    msg_seqno(buf_msg
+				      (l_ptr->last_out)), l_ptr->out_queue_size);
+		if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) - 
+			 msg_seqno(buf_msg(l_ptr->first_out))) 
+		     != (l_ptr->out_queue_size - 1))
+		    || (l_ptr->last_out->next != 0)) {
+			tipc_printf(buf, "\nSend queue inconsistency\n");
+			tipc_printf(buf, "first_out= %x ", l_ptr->first_out);
+			tipc_printf(buf, "next_out= %x ", l_ptr->next_out);
+			tipc_printf(buf, "last_out= %x ", l_ptr->last_out);
+			link_dump_send_queue(l_ptr);
+		}
+	} else
+		tipc_printf(buf, "[]");
+	tipc_printf(buf, "SQSIZ(%u)", l_ptr->out_queue_size);
+	if (l_ptr->oldest_deferred_in) {
+		u32 o = msg_seqno(buf_msg(l_ptr->oldest_deferred_in));
+		u32 n = msg_seqno(buf_msg(l_ptr->newest_deferred_in));
+		tipc_printf(buf, ":RQUE[%u..%u]", o, n);
+		if (l_ptr->deferred_inqueue_sz != mod((n + 1) - o)) {
+			tipc_printf(buf, ":RQSIZ(%u)",
+				    l_ptr->deferred_inqueue_sz);
+		}
+	}
+	if (link_working_unknown(l_ptr))
+		tipc_printf(buf, ":WU");
+	if (link_reset_reset(l_ptr))
+		tipc_printf(buf, ":RR");
+	if (link_reset_unknown(l_ptr))
+		tipc_printf(buf, ":RU");
+	if (link_working_working(l_ptr))
+		tipc_printf(buf, ":WW");
+	tipc_printf(buf, "\n");
+}
+
diff --git a/net/tipc/link.h b/net/tipc/link.h
new file mode 100644
index 00000000000..2d3c157f707
--- /dev/null
+++ b/net/tipc/link.h
@@ -0,0 +1,295 @@
+/*
+ * net/tipc/link.h: Include file for TIPC link code
+ * 
+ * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_LINK_H
+#define _TIPC_LINK_H
+
+#include "dbg.h"
+#include "msg.h"
+#include "bearer.h"
+#include "node.h"
+
+#define PUSH_FAILED   1
+#define PUSH_FINISHED 2
+
+/* 
+ * Link states 
+ */
+
+#define WORKING_WORKING 560810u
+#define WORKING_UNKNOWN 560811u
+#define RESET_UNKNOWN   560812u
+#define RESET_RESET     560813u
+
+/* 
+ * Starting value for maximum packet size negotiation on unicast links
+ * (unless bearer MTU is less)
+ */
+
+#define MAX_PKT_DEFAULT 1500
+
+/**
+ * struct link - TIPC link data structure
+ * @addr: network address of link's peer node
+ * @name: link name character string
+ * @media_addr: media address to use when sending messages over link
+ * @timer: link timer
+ * @owner: pointer to peer node
+ * @link_list: adjacent links in bearer's list of links
+ * @started: indicates if link has been started
+ * @checkpoint: reference point for triggering link continuity checking
+ * @peer_session: link session # being used by peer end of link
+ * @peer_bearer_id: bearer id used by link's peer endpoint
+ * @b_ptr: pointer to bearer used by link
+ * @tolerance: minimum link continuity loss needed to reset link [in ms] 
+ * @continuity_interval: link continuity testing interval [in ms]
+ * @abort_limit: # of unacknowledged continuity probes needed to reset link
+ * @state: current state of link FSM
+ * @blocked: indicates if link has been administratively blocked
+ * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state
+ * @proto_msg: template for control messages generated by link
+ * @pmsg: convenience pointer to "proto_msg" field
+ * @priority: current link priority
+ * @queue_limit: outbound message queue congestion thresholds (indexed by user)
+ * @exp_msg_count: # of tunnelled messages expected during link changeover
+ * @reset_checkpoint: seq # of last acknowledged message at time of link reset
+ * @max_pkt: current maximum packet size for this link
+ * @max_pkt_target: desired maximum packet size for this link
+ * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target)
+ * @out_queue_size: # of messages in outbound message queue
+ * @first_out: ptr to first outbound message in queue
+ * @last_out: ptr to last outbound message in queue
+ * @next_out_no: next sequence number to use for outbound messages
+ * @last_retransmitted: sequence number of most recently retransmitted message
+ * @stale_count: # of identical retransmit requests made by peer
+ * @next_in_no: next sequence number to expect for inbound messages
+ * @deferred_inqueue_sz: # of messages in inbound message queue
+ * @oldest_deferred_in: ptr to first inbound message in queue
+ * @newest_deferred_in: ptr to last inbound message in queue
+ * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
+ * @proto_msg_queue: ptr to (single) outbound control message
+ * @retransm_queue_size: number of messages to retransmit
+ * @retransm_queue_head: sequence number of first message to retransmit
+ * @next_out: ptr to first unsent outbound message in queue
+ * @waiting_ports: linked list of ports waiting for link congestion to abate
+ * @long_msg_seq_no: next identifier to use for outbound fragmented messages
+ * @defragm_buf: list of partially reassembled inbound message fragments
+ * @stats: collects statistics regarding link activity
+ * @print_buf: print buffer used to log link activity
+ */
+ 
+struct link {
+	u32 addr;
+	char name[TIPC_MAX_LINK_NAME];
+	struct tipc_media_addr media_addr;
+	struct timer_list timer;
+	struct node *owner;
+	struct list_head link_list;
+
+	/* Management and link supervision data */
+	int started;
+	u32 checkpoint;
+	u32 peer_session;
+	u32 peer_bearer_id;
+	struct bearer *b_ptr;
+	u32 tolerance;
+	u32 continuity_interval;
+	u32 abort_limit;
+	int state;
+	int blocked;
+	u32 fsm_msg_cnt;
+	struct {
+		unchar hdr[INT_H_SIZE];
+		unchar body[TIPC_MAX_IF_NAME];
+	} proto_msg;
+	struct tipc_msg *pmsg;
+	u32 priority;
+	u32 queue_limit[15];	/* queue_limit[0]==window limit */
+
+	/* Changeover */
+	u32 exp_msg_count;
+	u32 reset_checkpoint;
+
+        /* Max packet negotiation */
+        u32 max_pkt;
+        u32 max_pkt_target;
+        u32 max_pkt_probes;
+
+	/* Sending */
+	u32 out_queue_size;
+	struct sk_buff *first_out;
+	struct sk_buff *last_out;
+	u32 next_out_no;
+        u32 last_retransmitted;
+        u32 stale_count;
+
+	/* Reception */
+	u32 next_in_no;
+	u32 deferred_inqueue_sz;
+	struct sk_buff *oldest_deferred_in;
+	struct sk_buff *newest_deferred_in;
+	u32 unacked_window;
+
+	/* Congestion handling */
+	struct sk_buff *proto_msg_queue;
+	u32 retransm_queue_size;
+	u32 retransm_queue_head;
+	struct sk_buff *next_out;
+	struct list_head waiting_ports;
+
+	/* Fragmentation/defragmentation */
+	u32 long_msg_seq_no;
+	struct sk_buff *defragm_buf;
+
+        /* Statistics */
+	struct {
+		u32 sent_info;		/* used in counting # sent packets */
+		u32 recv_info;		/* used in counting # recv'd packets */
+		u32 sent_states;
+		u32 recv_states;
+		u32 sent_probes;
+		u32 recv_probes;
+		u32 sent_nacks;
+		u32 recv_nacks;
+		u32 sent_acks;
+		u32 sent_bundled;
+		u32 sent_bundles;
+		u32 recv_bundled;
+		u32 recv_bundles;
+		u32 retransmitted;
+		u32 sent_fragmented;
+		u32 sent_fragments;
+		u32 recv_fragmented;
+		u32 recv_fragments;
+		u32 link_congs;		/* # port sends blocked by congestion */
+		u32 bearer_congs;
+		u32 deferred_recv;
+		u32 duplicates;
+
+		/* for statistical profiling of send queue size */
+
+		u32 max_queue_sz;
+		u32 accu_queue_sz;
+		u32 queue_sz_counts;
+
+		/* for statistical profiling of message lengths */
+
+		u32 msg_length_counts;
+		u32 msg_lengths_total;
+		u32 msg_length_profile[7];
+#if 0
+		u32 sent_tunneled;
+		u32 recv_tunneled;
+#endif
+	} stats;
+
+	struct print_buf print_buf;
+};
+
+struct port;
+
+struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
+			      const struct tipc_media_addr *media_addr);
+void tipc_link_delete(struct link *l_ptr);
+void tipc_link_changeover(struct link *l_ptr);
+void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest);
+void tipc_link_reset_fragments(struct link *l_ptr);
+int tipc_link_is_up(struct link *l_ptr);
+int tipc_link_is_active(struct link *l_ptr);
+void tipc_link_start(struct link *l_ptr);
+u32 tipc_link_push_packet(struct link *l_ptr);
+void tipc_link_stop(struct link *l_ptr);
+struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd);
+struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space);
+struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space);
+void tipc_link_reset(struct link *l_ptr);
+int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
+int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
+u32 tipc_link_get_max_pkt(u32 dest,u32 selector);
+int tipc_link_send_sections_fast(struct port* sender, 
+				 struct iovec const *msg_sect,
+				 const u32 num_sect, 
+				 u32 destnode);
+int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
+void tipc_link_tunnel(struct link *l_ptr, struct tipc_msg *tnl_hdr,
+		      struct tipc_msg *msg, u32 selector);
+void tipc_link_recv_bundle(struct sk_buff *buf);
+int  tipc_link_recv_fragment(struct sk_buff **pending,
+			     struct sk_buff **fb,
+			     struct tipc_msg **msg);
+void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int prob, u32 gap, 
+			      u32 tolerance, u32 priority, u32 acked_mtu);
+void tipc_link_push_queue(struct link *l_ptr);
+u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
+		   struct sk_buff *buf);
+void tipc_link_wakeup_ports(struct link *l_ptr, int all);
+void tipc_link_set_queue_limits(struct link *l_ptr, u32 window);
+void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *start, u32 retransmits);
+
+/*
+ * Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
+ */
+
+static inline u32 mod(u32 x)
+{
+	return x & 0xffffu;
+}
+
+static inline int between(u32 lower, u32 upper, u32 n)
+{
+	if ((lower < n) && (n < upper))
+		return 1;
+	if ((upper < lower) && ((n > lower) || (n < upper)))
+		return 1;
+	return 0;
+}
+
+static inline int less_eq(u32 left, u32 right)
+{
+	return (mod(right - left) < 32768u);
+}
+
+static inline int less(u32 left, u32 right)
+{
+	return (less_eq(left, right) && (mod(right) != mod(left)));
+}
+
+static inline u32 lesser(u32 left, u32 right)
+{
+	return less_eq(left, right) ? left : right;
+}
+
+#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
new file mode 100644
index 00000000000..3bd345a344e
--- /dev/null
+++ b/net/tipc/msg.c
@@ -0,0 +1,323 @@
+/*
+ * net/tipc/msg.c: TIPC message header routines
+ *     
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "dbg.h"
+#include "msg.h"
+#include "bearer.h"
+
+
+void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str)
+{
+	u32 usr = msg_user(msg);
+	tipc_printf(buf, str);
+
+	switch (usr) {
+	case MSG_BUNDLER:
+		tipc_printf(buf, "BNDL::");
+		tipc_printf(buf, "MSGS(%u):", msg_msgcnt(msg));
+		break;
+	case BCAST_PROTOCOL:
+		tipc_printf(buf, "BCASTP::");
+		break;
+	case MSG_FRAGMENTER:
+		tipc_printf(buf, "FRAGM::");
+		switch (msg_type(msg)) {
+		case FIRST_FRAGMENT:
+			tipc_printf(buf, "FIRST:");
+			break;
+		case FRAGMENT:
+			tipc_printf(buf, "BODY:");
+			break;
+		case LAST_FRAGMENT:
+			tipc_printf(buf, "LAST:");
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN:%x",msg_type(msg));
+
+		}
+		tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg),
+			    msg_fragm_no(msg));
+		break;
+	case DATA_LOW:
+	case DATA_MEDIUM:
+	case DATA_HIGH:
+	case DATA_CRITICAL:
+		tipc_printf(buf, "DAT%u:", msg_user(msg));
+		if (msg_short(msg)) {
+			tipc_printf(buf, "CON:");
+			break;
+		}
+		switch (msg_type(msg)) {
+		case TIPC_CONN_MSG:
+			tipc_printf(buf, "CON:");
+			break;
+		case TIPC_MCAST_MSG:
+			tipc_printf(buf, "MCST:");
+			break;
+		case TIPC_NAMED_MSG:
+			tipc_printf(buf, "NAM:");
+			break;
+		case TIPC_DIRECT_MSG:
+			tipc_printf(buf, "DIR:");
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN TYPE %u",msg_type(msg));
+		}
+		if (msg_routed(msg) && !msg_non_seq(msg))
+			tipc_printf(buf, "ROUT:");
+		if (msg_reroute_cnt(msg))
+			tipc_printf(buf, "REROUTED(%u):",
+				    msg_reroute_cnt(msg));
+		break;
+	case NAME_DISTRIBUTOR:
+		tipc_printf(buf, "NMD::");
+		switch (msg_type(msg)) {
+		case PUBLICATION:
+			tipc_printf(buf, "PUBL(%u):", (msg_size(msg) - msg_hdr_sz(msg)) / 20);	/* Items */
+			break;
+		case WITHDRAWAL:
+			tipc_printf(buf, "WDRW:");
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN:%x",msg_type(msg));
+		}
+		if (msg_routed(msg))
+			tipc_printf(buf, "ROUT:");
+		if (msg_reroute_cnt(msg))
+			tipc_printf(buf, "REROUTED(%u):",
+				    msg_reroute_cnt(msg));
+		break;
+	case CONN_MANAGER:
+		tipc_printf(buf, "CONN_MNG:");
+		switch (msg_type(msg)) {
+		case CONN_PROBE:
+			tipc_printf(buf, "PROBE:");
+			break;
+		case CONN_PROBE_REPLY:
+			tipc_printf(buf, "PROBE_REPLY:");
+			break;
+		case CONN_ACK:
+			tipc_printf(buf, "CONN_ACK:");
+			tipc_printf(buf, "ACK(%u):",msg_msgcnt(msg));
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+		}
+		if (msg_routed(msg))
+			tipc_printf(buf, "ROUT:");
+		if (msg_reroute_cnt(msg))
+			tipc_printf(buf, "REROUTED(%u):",msg_reroute_cnt(msg));
+		break;
+	case LINK_PROTOCOL:
+		tipc_printf(buf, "PROT:TIM(%u):",msg_timestamp(msg));
+		switch (msg_type(msg)) {
+		case STATE_MSG:
+			tipc_printf(buf, "STATE:");
+			tipc_printf(buf, "%s:",msg_probe(msg) ? "PRB" :"");
+			tipc_printf(buf, "NXS(%u):",msg_next_sent(msg));
+			tipc_printf(buf, "GAP(%u):",msg_seq_gap(msg));
+			tipc_printf(buf, "LSTBC(%u):",msg_last_bcast(msg));
+			break;
+		case RESET_MSG:
+			tipc_printf(buf, "RESET:");
+			if (msg_size(msg) != msg_hdr_sz(msg))
+				tipc_printf(buf, "BEAR:%s:",msg_data(msg));
+			break;
+		case ACTIVATE_MSG:
+			tipc_printf(buf, "ACTIVATE:");
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+		}
+		tipc_printf(buf, "PLANE(%c):",msg_net_plane(msg));
+		tipc_printf(buf, "SESS(%u):",msg_session(msg));
+		break;
+	case CHANGEOVER_PROTOCOL:
+		tipc_printf(buf, "TUNL:");
+		switch (msg_type(msg)) {
+		case DUPLICATE_MSG:
+			tipc_printf(buf, "DUPL:");
+			break;
+		case ORIGINAL_MSG:
+			tipc_printf(buf, "ORIG:");
+			tipc_printf(buf, "EXP(%u)",msg_msgcnt(msg));
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+		}
+		break;
+	case ROUTE_DISTRIBUTOR:
+		tipc_printf(buf, "ROUTING_MNG:");
+		switch (msg_type(msg)) {
+		case EXT_ROUTING_TABLE:
+			tipc_printf(buf, "EXT_TBL:");
+			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			break;
+		case LOCAL_ROUTING_TABLE:
+			tipc_printf(buf, "LOCAL_TBL:");
+			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			break;
+		case SLAVE_ROUTING_TABLE:
+			tipc_printf(buf, "DP_TBL:");
+			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			break;
+		case ROUTE_ADDITION:
+			tipc_printf(buf, "ADD:");
+			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			break;
+		case ROUTE_REMOVAL:
+			tipc_printf(buf, "REMOVE:");
+			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+		}
+		break;
+	case LINK_CONFIG:
+		tipc_printf(buf, "CFG:");
+		switch (msg_type(msg)) {
+		case DSC_REQ_MSG:
+			tipc_printf(buf, "DSC_REQ:");
+			break;
+		case DSC_RESP_MSG:
+			tipc_printf(buf, "DSC_RESP:");
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN TYPE:%x:",msg_type(msg));
+			break;
+		}
+		break;
+	default:
+		tipc_printf(buf, "UNKNOWN USER:");
+	}
+
+	switch (usr) {
+	case CONN_MANAGER:
+	case NAME_DISTRIBUTOR:
+	case DATA_LOW:
+	case DATA_MEDIUM:
+	case DATA_HIGH:
+	case DATA_CRITICAL:
+		if (msg_short(msg))
+			break;	/* No error */
+		switch (msg_errcode(msg)) {
+		case TIPC_OK:
+			break;
+		case TIPC_ERR_NO_NAME:
+			tipc_printf(buf, "NO_NAME:");
+			break;
+		case TIPC_ERR_NO_PORT:
+			tipc_printf(buf, "NO_PORT:");
+			break;
+		case TIPC_ERR_NO_NODE:
+			tipc_printf(buf, "NO_PROC:");
+			break;
+		case TIPC_ERR_OVERLOAD:
+			tipc_printf(buf, "OVERLOAD:");
+			break;
+		case TIPC_CONN_SHUTDOWN:
+			tipc_printf(buf, "SHUTDOWN:");
+			break;
+		default:
+			tipc_printf(buf, "UNKNOWN ERROR(%x):",
+				    msg_errcode(msg));
+		}
+	default:{}
+	}
+
+	tipc_printf(buf, "HZ(%u):", msg_hdr_sz(msg));
+	tipc_printf(buf, "SZ(%u):", msg_size(msg));
+	tipc_printf(buf, "SQNO(%u):", msg_seqno(msg));
+
+	if (msg_non_seq(msg))
+		tipc_printf(buf, "NOSEQ:");
+	else {
+		tipc_printf(buf, "ACK(%u):", msg_ack(msg));
+	}
+	tipc_printf(buf, "BACK(%u):", msg_bcast_ack(msg));
+	tipc_printf(buf, "PRND(%x)", msg_prevnode(msg));
+
+	if (msg_isdata(msg)) {
+		if (msg_named(msg)) {
+			tipc_printf(buf, "NTYP(%u):", msg_nametype(msg));
+			tipc_printf(buf, "NINST(%u)", msg_nameinst(msg));
+		}
+	}
+
+	if ((usr != LINK_PROTOCOL) && (usr != LINK_CONFIG) &&
+	    (usr != MSG_BUNDLER)) {
+		if (!msg_short(msg)) {
+			tipc_printf(buf, ":ORIG(%x:%u):",
+				    msg_orignode(msg), msg_origport(msg));
+			tipc_printf(buf, ":DEST(%x:%u):",
+				    msg_destnode(msg), msg_destport(msg));
+		} else {
+			tipc_printf(buf, ":OPRT(%u):", msg_origport(msg));
+			tipc_printf(buf, ":DPRT(%u):", msg_destport(msg));
+		}
+		if (msg_routed(msg) && !msg_non_seq(msg))
+			tipc_printf(buf, ":TSEQN(%u)", msg_transp_seqno(msg));
+	}
+	if (msg_user(msg) == NAME_DISTRIBUTOR) {
+		tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg));
+		tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg));
+		if (msg_routed(msg)) {
+			tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg));
+		}
+	}
+
+	if (msg_user(msg) ==  LINK_CONFIG) {
+		u32* raw = (u32*)msg;
+		struct tipc_media_addr* orig = (struct tipc_media_addr*)&raw[5];
+		tipc_printf(buf, ":REQL(%u):", msg_req_links(msg));
+		tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
+		tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
+		tipc_media_addr_printf(buf, orig);
+	}
+	if (msg_user(msg) == BCAST_PROTOCOL) {
+		tipc_printf(buf, "BCNACK:AFTER(%u):", msg_bcgap_after(msg));
+		tipc_printf(buf, "TO(%u):", msg_bcgap_to(msg));
+	}
+	tipc_printf(buf, "\n");
+	if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) {
+		tipc_msg_print(buf,msg_get_wrapped(msg),"      /");
+	}
+	if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) {
+		tipc_msg_print(buf,msg_get_wrapped(msg),"      /");
+	}
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
new file mode 100644
index 00000000000..6699aaf7bd4
--- /dev/null
+++ b/net/tipc/msg.h
@@ -0,0 +1,818 @@
+/*
+ * net/tipc/msg.h: Include file for TIPC message header routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_MSG_H
+#define _TIPC_MSG_H
+
+#include "core.h"
+
+#define TIPC_VERSION              2
+#define DATA_LOW                  TIPC_LOW_IMPORTANCE
+#define DATA_MEDIUM               TIPC_MEDIUM_IMPORTANCE
+#define DATA_HIGH                 TIPC_HIGH_IMPORTANCE
+#define DATA_CRITICAL             TIPC_CRITICAL_IMPORTANCE
+#define SHORT_H_SIZE              24	/* Connected,in cluster */
+#define DIR_MSG_H_SIZE            32	/* Directly addressed messages */
+#define CONN_MSG_H_SIZE           36	/* Routed connected msgs*/
+#define LONG_H_SIZE               40	/* Named Messages */
+#define MCAST_H_SIZE              44	/* Multicast messages */
+#define MAX_H_SIZE                60	/* Inclusive full options */
+#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
+#define LINK_CONFIG               13
+
+
+/*
+		TIPC user data message header format, version 2
+		
+	- Fundamental definitions available to privileged TIPC users
+	  are located in tipc_msg.h.
+	- Remaining definitions available to TIPC internal users appear below. 
+*/
+
+
+static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
+{
+	m->hdr[w] = htonl(val);
+}
+
+static inline void msg_set_bits(struct tipc_msg *m, u32 w,
+				u32 pos, u32 mask, u32 val)
+{
+	u32 word = msg_word(m,w) & ~(mask << pos);
+	msg_set_word(m, w, (word |= (val << pos)));
+}
+
+/* 
+ * Word 0
+ */
+
+static inline u32 msg_version(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 29, 7);
+}
+
+static inline void msg_set_version(struct tipc_msg *m) 
+{
+	msg_set_bits(m, 0, 29, 0xf, TIPC_VERSION);
+}
+
+static inline u32 msg_user(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 25, 0xf);
+}
+
+static inline u32 msg_isdata(struct tipc_msg *m)
+{
+	return (msg_user(m) <= DATA_CRITICAL);
+}
+
+static inline void msg_set_user(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 0, 25, 0xf, n);
+}
+
+static inline void msg_set_importance(struct tipc_msg *m, u32 i) 
+{
+	msg_set_user(m, i);
+}
+
+static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n) 
+{
+	msg_set_bits(m, 0, 21, 0xf, n>>2);
+}
+
+static inline int msg_non_seq(struct tipc_msg *m) 
+{
+	return msg_bits(m, 0, 20, 1);
+}
+
+static inline void msg_set_non_seq(struct tipc_msg *m) 
+{
+	msg_set_bits(m, 0, 20, 1, 1);
+}
+
+static inline int msg_dest_droppable(struct tipc_msg *m) 
+{
+	return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) 
+{
+	msg_set_bits(m, 0, 19, 1, d);
+}
+
+static inline int msg_src_droppable(struct tipc_msg *m) 
+{
+	return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) 
+{
+	msg_set_bits(m, 0, 18, 1, d);
+}
+
+static inline void msg_set_size(struct tipc_msg *m, u32 sz)
+{
+	m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz);
+}
+
+
+/* 
+ * Word 1
+ */
+
+static inline void msg_set_type(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 1, 29, 0x7, n);
+}
+
+static inline void msg_set_errcode(struct tipc_msg *m, u32 err) 
+{
+	msg_set_bits(m, 1, 25, 0xf, err);
+}
+
+static inline u32 msg_reroute_cnt(struct tipc_msg *m) 
+{
+	return msg_bits(m, 1, 21, 0xf);
+}
+
+static inline void msg_incr_reroute_cnt(struct tipc_msg *m) 
+{
+	msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
+}
+
+static inline void msg_reset_reroute_cnt(struct tipc_msg *m) 
+{
+	msg_set_bits(m, 1, 21, 0xf, 0);
+}
+
+static inline u32 msg_lookup_scope(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 19, 0x3);
+}
+
+static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 1, 19, 0x3, n);
+}
+
+static inline void msg_set_options(struct tipc_msg *m, const char *opt, u32 sz) 
+{
+	u32 hsz = msg_hdr_sz(m);
+	char *to = (char *)&m->hdr[hsz/4];
+
+	if ((hsz < DIR_MSG_H_SIZE) || ((hsz + sz) > MAX_H_SIZE))
+		return;
+	msg_set_bits(m, 1, 16, 0x7, (hsz - 28)/4);
+	msg_set_hdr_sz(m, hsz + sz);
+	memcpy(to, opt, sz);
+}
+
+static inline u32 msg_bcast_ack(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 0, 0xffff);
+}
+
+static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 1, 0, 0xffff, n);
+}
+
+
+/* 
+ * Word 2
+ */
+
+static inline u32 msg_ack(struct tipc_msg *m)
+{
+	return msg_bits(m, 2, 16, 0xffff);
+}
+
+static inline void msg_set_ack(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 2, 16, 0xffff, n);
+}
+
+static inline u32 msg_seqno(struct tipc_msg *m)
+{
+	return msg_bits(m, 2, 0, 0xffff);
+}
+
+static inline void msg_set_seqno(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 2, 0, 0xffff, n);
+}
+
+
+/* 
+ * Words 3-10
+ */
+
+
+static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) 
+{
+	msg_set_word(m, 3, a);
+}
+
+static inline void msg_set_origport(struct tipc_msg *m, u32 p) 
+{
+	msg_set_word(m, 4, p);
+}
+
+static inline void msg_set_destport(struct tipc_msg *m, u32 p) 
+{
+	msg_set_word(m, 5, p);
+}
+
+static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) 
+{
+	msg_set_word(m, 5, p);
+}
+
+static inline void msg_set_orignode(struct tipc_msg *m, u32 a) 
+{
+	msg_set_word(m, 6, a);
+}
+
+static inline void msg_set_destnode(struct tipc_msg *m, u32 a) 
+{
+	msg_set_word(m, 7, a);
+}
+
+static inline int msg_is_dest(struct tipc_msg *m, u32 d) 
+{
+	return(msg_short(m) || (msg_destnode(m) == d));
+}
+
+static inline u32 msg_routed(struct tipc_msg *m)
+{
+	if (likely(msg_short(m)))
+		return 0;
+	return(msg_destnode(m) ^ msg_orignode(m)) >> 11;
+}
+
+static inline void msg_set_nametype(struct tipc_msg *m, u32 n) 
+{
+	msg_set_word(m, 8, n);
+}
+
+static inline u32 msg_transp_seqno(struct tipc_msg *m)
+{
+	return msg_word(m, 8);
+}
+
+static inline void msg_set_timestamp(struct tipc_msg *m, u32 n)
+{
+	msg_set_word(m, 8, n);
+}
+
+static inline u32 msg_timestamp(struct tipc_msg *m)
+{
+	return msg_word(m, 8);
+}
+
+static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n)
+{
+	msg_set_word(m, 8, n);
+}
+
+static inline void msg_set_namelower(struct tipc_msg *m, u32 n) 
+{
+	msg_set_word(m, 9, n);
+}
+
+static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) 
+{
+	msg_set_namelower(m, n);
+}
+
+static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) 
+{
+	msg_set_word(m, 10, n);
+}
+
+static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
+{
+	return (struct tipc_msg *)msg_data(m);
+}
+
+static inline void msg_expand(struct tipc_msg *m, u32 destnode) 
+{
+	if (!msg_short(m))
+		return;
+	msg_set_hdr_sz(m, LONG_H_SIZE);
+	msg_set_orignode(m, msg_prevnode(m));
+	msg_set_destnode(m, destnode);
+	memset(&m->hdr[8], 0, 12);
+}
+
+
+
+/*
+		TIPC internal message header format, version 2
+
+       1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w0:|vers |msg usr|hdr sz |n|resrv|            packet size          |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w1:|m typ|rsv=0|   sequence gap    |       broadcast ack no        |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w2:| link level ack no/bc_gap_from |     seq no / bcast_gap_to     |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w3:|                       previous node                           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w4:|  next sent broadcast/fragm no | next sent pkt/ fragm msg no   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w5:|          session no           |rsv=0|r|berid|link prio|netpl|p|
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w6:|                      originating node                         |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w7:|                      destination node                         |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w8:|                   transport sequence number                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   w9:|   msg count / bcast tag       |       link tolerance          |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      \                                                               \
+      /                     User Specific Data                        /
+      \                                                               \
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+      NB: CONN_MANAGER use data message format. LINK_CONFIG has own format.
+*/   
+
+/* 
+ * Internal users
+ */
+
+#define  BCAST_PROTOCOL       5
+#define  MSG_BUNDLER          6
+#define  LINK_PROTOCOL        7
+#define  CONN_MANAGER         8
+#define  ROUTE_DISTRIBUTOR    9
+#define  CHANGEOVER_PROTOCOL  10
+#define  NAME_DISTRIBUTOR     11
+#define  MSG_FRAGMENTER       12
+#define  LINK_CONFIG          13
+#define  INT_H_SIZE           40
+#define  DSC_H_SIZE           40
+
+/* 
+ *  Connection management protocol messages
+ */
+
+#define CONN_PROBE        0
+#define CONN_PROBE_REPLY  1
+#define CONN_ACK          2
+
+/* 
+ * Name distributor messages
+ */
+
+#define PUBLICATION       0
+#define WITHDRAWAL        1
+
+
+/* 
+ * Word 1
+ */
+
+static inline u32 msg_seq_gap(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 16, 0xff);
+}
+
+static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 1, 16, 0xff, n);
+}
+
+static inline u32 msg_req_links(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 16, 0xfff);
+}
+
+static inline void msg_set_req_links(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 1, 16, 0xfff, n);
+}
+
+
+/* 
+ * Word 2
+ */
+
+static inline u32 msg_dest_domain(struct tipc_msg *m)
+{
+	return msg_word(m, 2);
+}
+
+static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) 
+{
+	msg_set_word(m, 2, n);
+}
+
+static inline u32 msg_bcgap_after(struct tipc_msg *m)
+{
+	return msg_bits(m, 2, 16, 0xffff);
+}
+
+static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 2, 16, 0xffff, n);
+}
+
+static inline u32 msg_bcgap_to(struct tipc_msg *m)
+{
+	return msg_bits(m, 2, 0, 0xffff);
+}
+
+static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 2, 0, 0xffff, n);
+}
+
+
+/* 
+ * Word 4
+ */
+
+static inline u32 msg_last_bcast(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 16, 0xffff);
+}
+
+static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 4, 16, 0xffff, n);
+}
+
+
+static inline u32 msg_fragm_no(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 16, 0xffff);
+}
+
+static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 4, 16, 0xffff, n);
+}
+
+
+static inline u32 msg_next_sent(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_next_sent(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
+
+static inline u32 msg_long_msgno(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
+static inline u32 msg_bc_netid(struct tipc_msg *m)
+{
+	return msg_word(m, 4);
+}
+
+static inline void msg_set_bc_netid(struct tipc_msg *m, u32 id)
+{
+	msg_set_word(m, 4, id);
+}
+
+static inline u32 msg_link_selector(struct tipc_msg *m)
+{
+	return msg_bits(m, 4, 0, 1);
+}
+
+static inline void msg_set_link_selector(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 4, 0, 1, (n & 1));
+}
+
+/* 
+ * Word 5
+ */
+
+static inline u32 msg_session(struct tipc_msg *m)
+{
+	return msg_bits(m, 5, 16, 0xffff);
+}
+
+static inline void msg_set_session(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 5, 16, 0xffff, n);
+}
+
+static inline u32 msg_probe(struct tipc_msg *m)
+{
+	return msg_bits(m, 5, 0, 1);
+}
+
+static inline void msg_set_probe(struct tipc_msg *m, u32 val)
+{
+	msg_set_bits(m, 5, 0, 1, (val & 1));
+}
+
+static inline char msg_net_plane(struct tipc_msg *m)
+{
+	return msg_bits(m, 5, 1, 7) + 'A';
+}
+
+static inline void msg_set_net_plane(struct tipc_msg *m, char n)
+{
+	msg_set_bits(m, 5, 1, 7, (n - 'A'));
+}
+
+static inline u32 msg_linkprio(struct tipc_msg *m)
+{
+	return msg_bits(m, 5, 4, 0x1f);
+}
+
+static inline void msg_set_linkprio(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 5, 4, 0x1f, n);
+}
+
+static inline u32 msg_bearer_id(struct tipc_msg *m)
+{
+	return msg_bits(m, 5, 9, 0x7);
+}
+
+static inline void msg_set_bearer_id(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 5, 9, 0x7, n);
+}
+
+static inline u32 msg_redundant_link(struct tipc_msg *m)
+{
+	return msg_bits(m, 5, 12, 0x1);
+}
+
+static inline void msg_set_redundant_link(struct tipc_msg *m)
+{
+	msg_set_bits(m, 5, 12, 0x1, 1);
+}
+
+static inline void msg_clear_redundant_link(struct tipc_msg *m)
+{
+	msg_set_bits(m, 5, 12, 0x1, 0);
+}
+
+
+/* 
+ * Word 9
+ */
+
+static inline u32 msg_msgcnt(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_msgcnt(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u32 msg_bcast_tag(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u32 msg_max_pkt(struct tipc_msg *m) 
+{
+	return (msg_bits(m, 9, 16, 0xffff) * 4);
+}
+
+static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) 
+{
+	msg_set_bits(m, 9, 16, 0xffff, (n / 4));
+}
+
+static inline u32 msg_link_tolerance(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 0, 0xffff);
+}
+
+static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 9, 0, 0xffff, n);
+}
+
+/* 
+ * Routing table message data
+ */
+
+
+static inline u32 msg_remote_node(struct tipc_msg *m)
+{
+	return msg_word(m, msg_hdr_sz(m)/4);
+}
+
+static inline void msg_set_remote_node(struct tipc_msg *m, u32 a)
+{
+	msg_set_word(m, msg_hdr_sz(m)/4, a);
+}
+
+static inline int msg_dataoctet(struct tipc_msg *m, u32 pos)
+{
+	return(msg_data(m)[pos + 4] != 0);
+}
+
+static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
+{
+	msg_data(m)[pos + 4] = 1;
+}
+
+/* 
+ * Segmentation message types
+ */
+
+#define FIRST_FRAGMENT     0
+#define FRAGMENT           1
+#define LAST_FRAGMENT      2
+
+/* 
+ * Link management protocol message types
+ */
+
+#define STATE_MSG       0
+#define RESET_MSG       1
+#define ACTIVATE_MSG    2
+
+/* 
+ * Changeover tunnel message types
+ */
+#define DUPLICATE_MSG    0
+#define ORIGINAL_MSG     1
+
+/* 
+ * Routing table message types
+ */
+#define EXT_ROUTING_TABLE    0
+#define LOCAL_ROUTING_TABLE  1
+#define SLAVE_ROUTING_TABLE  2
+#define ROUTE_ADDITION       3
+#define ROUTE_REMOVAL        4
+
+/* 
+ * Config protocol message types
+ */
+
+#define DSC_REQ_MSG          0
+#define DSC_RESP_MSG         1
+
+static inline u32 msg_tot_importance(struct tipc_msg *m)
+{
+	if (likely(msg_isdata(m))) {
+		if (likely(msg_orignode(m) == tipc_own_addr))
+			return msg_importance(m);
+		return msg_importance(m) + 4;
+	}
+	if ((msg_user(m) == MSG_FRAGMENTER)  &&
+	    (msg_type(m) == FIRST_FRAGMENT))
+		return msg_importance(msg_get_wrapped(m));
+	return msg_importance(m);
+}
+
+
+static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, 
+			    u32 err, u32 hsize, u32 destnode)
+{
+	memset(m, 0, hsize);
+	msg_set_version(m);
+	msg_set_user(m, user);
+	msg_set_hdr_sz(m, hsize);
+	msg_set_size(m, hsize);
+	msg_set_prevnode(m, tipc_own_addr);
+	msg_set_type(m, type);
+	msg_set_errcode(m, err);
+	if (!msg_short(m)) {
+		msg_set_orignode(m, tipc_own_addr);
+		msg_set_destnode(m, destnode);
+	}
+}
+
+/** 
+ * msg_calc_data_size - determine total data size for message
+ */
+
+static inline int msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
+{
+	int dsz = 0;
+	int i;
+
+	for (i = 0; i < num_sect; i++)
+		dsz += msg_sect[i].iov_len;
+	return dsz;
+}
+
+/** 
+ * msg_build - create message using specified header and data
+ * 
+ * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
+ * 
+ * Returns message data size or errno
+ */
+
+static inline int msg_build(struct tipc_msg *hdr, 
+			    struct iovec const *msg_sect, u32 num_sect,
+			    int max_size, int usrmem, struct sk_buff** buf)
+{
+	int dsz, sz, hsz, pos, res, cnt;
+
+	dsz = msg_calc_data_size(msg_sect, num_sect);
+	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
+		*buf = NULL;
+		return -EINVAL;
+	}
+
+	pos = hsz = msg_hdr_sz(hdr);
+	sz = hsz + dsz;
+	msg_set_size(hdr, sz);
+	if (unlikely(sz > max_size)) {
+		*buf = NULL;
+		return dsz;
+	}
+
+	*buf = buf_acquire(sz);
+	if (!(*buf))
+		return -ENOMEM;
+	memcpy((*buf)->data, (unchar *)hdr, hsz);
+	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
+		if (likely(usrmem))
+			res = !copy_from_user((*buf)->data + pos, 
+					      msg_sect[cnt].iov_base, 
+					      msg_sect[cnt].iov_len);
+		else
+			memcpy((*buf)->data + pos, msg_sect[cnt].iov_base, 
+			       msg_sect[cnt].iov_len);
+		pos += msg_sect[cnt].iov_len;
+	}
+	if (likely(res))
+		return dsz;
+
+	buf_discard(*buf);
+	*buf = NULL;
+	return -EFAULT;
+}
+
+static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
+{
+	memcpy(&((int *)m)[5], a, sizeof(*a));
+}
+
+static inline void msg_get_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
+{
+	memcpy(a, &((int*)m)[5], sizeof(*a));
+}
+
+#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
new file mode 100644
index 00000000000..830f9099904
--- /dev/null
+++ b/net/tipc/name_distr.c
@@ -0,0 +1,309 @@
+/*
+ * net/tipc/name_distr.c: TIPC name distribution code
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "cluster.h"
+#include "dbg.h"
+#include "link.h"
+#include "msg.h"
+#include "name_distr.h"
+
+#undef  DBG_OUTPUT
+#define DBG_OUTPUT NULL
+
+#define ITEM_SIZE sizeof(struct distr_item)
+
+/**
+ * struct distr_item - publication info distributed to other nodes
+ * @type: name sequence type
+ * @lower: name sequence lower bound
+ * @upper: name sequence upper bound
+ * @ref: publishing port reference
+ * @key: publication key
+ * 
+ * ===> All fields are stored in network byte order. <===
+ * 
+ * First 3 fields identify (name or) name sequence being published.
+ * Reference field uniquely identifies port that published name sequence.
+ * Key field uniquely identifies publication, in the event a port has
+ * multiple publications of the same name sequence.
+ * 
+ * Note: There is no field that identifies the publishing node because it is 
+ * the same for all items contained within a publication message.
+ */
+
+struct distr_item {
+	u32 type;
+	u32 lower;
+	u32 upper;
+	u32 ref;
+	u32 key;
+};
+
+/**
+ * List of externally visible publications by this node -- 
+ * that is, all publications having scope > TIPC_NODE_SCOPE.
+ */
+
+static LIST_HEAD(publ_root);
+static u32 publ_cnt = 0;		
+
+/**
+ * publ_to_item - add publication info to a publication message
+ */
+
+static void publ_to_item(struct distr_item *i, struct publication *p)
+{
+	i->type = htonl(p->type);
+	i->lower = htonl(p->lower);
+	i->upper = htonl(p->upper);
+	i->ref = htonl(p->ref);
+	i->key = htonl(p->key);
+	dbg("publ_to_item: %u, %u, %u\n", p->type, p->lower, p->upper);
+}
+
+/**
+ * named_prepare_buf - allocate & initialize a publication message
+ */
+
+static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
+{
+	struct sk_buff *buf = buf_acquire(LONG_H_SIZE + size);  
+	struct tipc_msg *msg;
+
+	if (buf != NULL) {
+		msg = buf_msg(buf);
+		msg_init(msg, NAME_DISTRIBUTOR, type, TIPC_OK, 
+			 LONG_H_SIZE, dest);
+		msg_set_size(msg, LONG_H_SIZE + size);
+	}
+	return buf;
+}
+
+/**
+ * tipc_named_publish - tell other nodes about a new publication by this node
+ */
+
+void tipc_named_publish(struct publication *publ)
+{
+	struct sk_buff *buf;
+	struct distr_item *item;
+
+	list_add(&publ->local_list, &publ_root);
+	publ_cnt++;
+
+	buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
+	if (!buf) {
+		warn("Memory squeeze; failed to distribute publication\n");
+		return;
+	}
+
+	item = (struct distr_item *)msg_data(buf_msg(buf));
+	publ_to_item(item, publ);
+	dbg("tipc_named_withdraw: broadcasting publish msg\n");
+	tipc_cltr_broadcast(buf);
+}
+
+/**
+ * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
+ */
+
+void tipc_named_withdraw(struct publication *publ)
+{
+	struct sk_buff *buf;
+	struct distr_item *item;
+
+	list_del(&publ->local_list);
+	publ_cnt--;
+
+	buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
+	if (!buf) {
+		warn("Memory squeeze; failed to distribute withdrawal\n");
+		return;
+	}
+
+	item = (struct distr_item *)msg_data(buf_msg(buf));
+	publ_to_item(item, publ);
+	dbg("tipc_named_withdraw: broadcasting withdraw msg\n");
+	tipc_cltr_broadcast(buf);
+}
+
+/**
+ * tipc_named_node_up - tell specified node about all publications by this node
+ */
+
+void tipc_named_node_up(unsigned long node)
+{
+	struct publication *publ;
+	struct distr_item *item = 0;
+	struct sk_buff *buf = 0;
+	u32 left = 0;
+	u32 rest;
+	u32 max_item_buf;
+
+	assert(in_own_cluster(node));
+	read_lock_bh(&tipc_nametbl_lock); 
+	max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE;
+	max_item_buf *= ITEM_SIZE;
+	rest = publ_cnt * ITEM_SIZE;
+
+	list_for_each_entry(publ, &publ_root, local_list) {
+		if (!buf) {
+			left = (rest <= max_item_buf) ? rest : max_item_buf;
+			rest -= left;
+			buf = named_prepare_buf(PUBLICATION, left, node);       
+			if (buf == NULL) {
+				warn("Memory Squeeze; could not send publication\n");
+				goto exit;
+			}
+			item = (struct distr_item *)msg_data(buf_msg(buf));
+		}
+		publ_to_item(item, publ);
+		item++;
+		left -= ITEM_SIZE;
+		if (!left) {
+			msg_set_link_selector(buf_msg(buf), node);
+			dbg("tipc_named_node_up: sending publish msg to "
+			    "<%u.%u.%u>\n", tipc_zone(node), 
+			    tipc_cluster(node), tipc_node(node));
+			tipc_link_send(buf, node, node);
+			buf = 0;
+		}
+	}
+exit:
+	read_unlock_bh(&tipc_nametbl_lock); 
+}
+
+/**
+ * node_is_down - remove publication associated with a failed node
+ * 
+ * Invoked for each publication issued by a newly failed node.  
+ * Removes publication structure from name table & deletes it.
+ * In rare cases the link may have come back up again when this
+ * function is called, and we have two items representing the same
+ * publication. Nudge this item's key to distinguish it from the other.
+ * (Note: Publication's node subscription is already unsubscribed.)
+ */
+
+static void node_is_down(struct publication *publ)
+{
+	struct publication *p;
+        write_lock_bh(&tipc_nametbl_lock);
+	dbg("node_is_down: withdrawing %u, %u, %u\n", 
+	    publ->type, publ->lower, publ->upper);
+        publ->key += 1222345;
+	p = tipc_nametbl_remove_publ(publ->type, publ->lower, 
+				     publ->node, publ->ref, publ->key);
+        assert(p == publ);
+	write_unlock_bh(&tipc_nametbl_lock);
+	if (publ)
+		kfree(publ);
+}
+
+/**
+ * tipc_named_recv - process name table update message sent by another node
+ */
+
+void tipc_named_recv(struct sk_buff *buf)
+{
+	struct publication *publ;
+	struct tipc_msg *msg = buf_msg(buf);
+	struct distr_item *item = (struct distr_item *)msg_data(msg);
+	u32 count = msg_data_sz(msg) / ITEM_SIZE;
+
+	write_lock_bh(&tipc_nametbl_lock); 
+	while (count--) {
+		if (msg_type(msg) == PUBLICATION) {
+			dbg("tipc_named_recv: got publication for %u, %u, %u\n", 
+			    ntohl(item->type), ntohl(item->lower),
+			    ntohl(item->upper));
+			publ = tipc_nametbl_insert_publ(ntohl(item->type), 
+							ntohl(item->lower),
+							ntohl(item->upper),
+							TIPC_CLUSTER_SCOPE,
+							msg_orignode(msg), 
+							ntohl(item->ref),
+							ntohl(item->key));
+			if (publ) {
+				tipc_nodesub_subscribe(&publ->subscr, 
+						       msg_orignode(msg), 
+						       publ,
+						       (net_ev_handler)node_is_down);
+			}
+		} else if (msg_type(msg) == WITHDRAWAL) {
+			dbg("tipc_named_recv: got withdrawl for %u, %u, %u\n", 
+			    ntohl(item->type), ntohl(item->lower),
+			    ntohl(item->upper));
+			publ = tipc_nametbl_remove_publ(ntohl(item->type),
+							ntohl(item->lower),
+							msg_orignode(msg),
+							ntohl(item->ref),
+							ntohl(item->key));
+
+			if (publ) {
+				tipc_nodesub_unsubscribe(&publ->subscr);
+        			kfree(publ);
+			}
+		} else {
+			warn("tipc_named_recv: unknown msg\n");
+		}
+		item++;
+	}
+	write_unlock_bh(&tipc_nametbl_lock); 
+	buf_discard(buf);
+}
+
+/**
+ * tipc_named_reinit - re-initialize local publication list
+ * 
+ * This routine is called whenever TIPC networking is (re)enabled.
+ * All existing publications by this node that have "cluster" or "zone" scope
+ * are updated to reflect the node's current network address.
+ * (If the node's address is unchanged, the update loop terminates immediately.)
+ */
+
+void tipc_named_reinit(void)
+{
+	struct publication *publ;
+
+	write_lock_bh(&tipc_nametbl_lock); 
+	list_for_each_entry(publ, &publ_root, local_list) {
+		if (publ->node == tipc_own_addr)
+			break;
+		publ->node = tipc_own_addr;
+	}
+	write_unlock_bh(&tipc_nametbl_lock); 
+}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
new file mode 100644
index 00000000000..843da0172f4
--- /dev/null
+++ b/net/tipc/name_distr.h
@@ -0,0 +1,48 @@
+/*
+ * net/tipc/name_distr.h: Include file for TIPC name distribution code
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_NAME_DISTR_H
+#define _TIPC_NAME_DISTR_H
+
+#include "name_table.h"
+
+void tipc_named_publish(struct publication *publ);
+void tipc_named_withdraw(struct publication *publ);
+void tipc_named_node_up(unsigned long node);
+void tipc_named_recv(struct sk_buff *buf);
+void tipc_named_reinit(void);
+
+#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
new file mode 100644
index 00000000000..3f4b23bd08f
--- /dev/null
+++ b/net/tipc/name_table.c
@@ -0,0 +1,1079 @@
+/*
+ * net/tipc/name_table.c: TIPC name table code
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include "dbg.h"
+#include "name_table.h"
+#include "name_distr.h"
+#include "addr.h"
+#include "node_subscr.h"
+#include "subscr.h"
+#include "port.h"
+#include "cluster.h"
+#include "bcast.h"
+
+int tipc_nametbl_size = 1024;		/* must be a power of 2 */
+
+/**
+ * struct sub_seq - container for all published instances of a name sequence
+ * @lower: name sequence lower bound
+ * @upper: name sequence upper bound
+ * @node_list: circular list of matching publications with >= node scope
+ * @cluster_list: circular list of matching publications with >= cluster scope
+ * @zone_list: circular list of matching publications with >= zone scope
+ */
+
+struct sub_seq {
+	u32 lower;
+	u32 upper;
+	struct publication *node_list;
+	struct publication *cluster_list;
+	struct publication *zone_list;
+};
+
+/** 
+ * struct name_seq - container for all published instances of a name type
+ * @type: 32 bit 'type' value for name sequence
+ * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type';
+ *        sub-sequences are sorted in ascending order
+ * @alloc: number of sub-sequences currently in array
+ * @first_free: upper bound of highest sub-sequence + 1
+ * @ns_list: links to adjacent name sequences in hash chain
+ * @subscriptions: list of subscriptions for this 'type'
+ * @lock: spinlock controlling access to name sequence structure
+ */
+
+struct name_seq {
+	u32 type;
+	struct sub_seq *sseqs;
+	u32 alloc;
+	u32 first_free;
+	struct hlist_node ns_list;
+	struct list_head subscriptions;
+	spinlock_t lock;
+};
+
+/**
+ * struct name_table - table containing all existing port name publications
+ * @types: pointer to fixed-sized array of name sequence lists, 
+ *         accessed via hashing on 'type'; name sequence lists are *not* sorted
+ * @local_publ_count: number of publications issued by this node
+ */
+
+struct name_table {
+	struct hlist_head *types;
+	u32 local_publ_count;
+};
+
+static struct name_table table = { NULL } ;
+static atomic_t rsv_publ_ok = ATOMIC_INIT(0);
+rwlock_t tipc_nametbl_lock = RW_LOCK_UNLOCKED;
+
+
+static inline int hash(int x)
+{
+	return(x & (tipc_nametbl_size - 1));
+}
+
+/**
+ * publ_create - create a publication structure
+ */
+
+static struct publication *publ_create(u32 type, u32 lower, u32 upper, 
+				       u32 scope, u32 node, u32 port_ref,   
+				       u32 key)
+{
+	struct publication *publ =
+		(struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC);
+	if (publ == NULL) {
+		warn("Memory squeeze; failed to create publication\n");
+		return 0;
+	}
+
+	memset(publ, 0, sizeof(*publ));
+	publ->type = type;
+	publ->lower = lower;
+	publ->upper = upper;
+	publ->scope = scope;
+	publ->node = node;
+	publ->ref = port_ref;
+	publ->key = key;
+	INIT_LIST_HEAD(&publ->local_list);
+	INIT_LIST_HEAD(&publ->pport_list);
+	INIT_LIST_HEAD(&publ->subscr.nodesub_list);
+	return publ;
+}
+
+/**
+ * tipc_subseq_alloc - allocate a specified number of sub-sequence structures
+ */
+
+struct sub_seq *tipc_subseq_alloc(u32 cnt)
+{
+	u32 sz = cnt * sizeof(struct sub_seq);
+	struct sub_seq *sseq = (struct sub_seq *)kmalloc(sz, GFP_ATOMIC);
+
+	if (sseq)
+		memset(sseq, 0, sz);
+	return sseq;
+}
+
+/**
+ * tipc_nameseq_create - create a name sequence structure for the specified 'type'
+ * 
+ * Allocates a single sub-sequence structure and sets it to all 0's.
+ */
+
+struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head)
+{
+	struct name_seq *nseq = 
+		(struct name_seq *)kmalloc(sizeof(*nseq), GFP_ATOMIC);
+	struct sub_seq *sseq = tipc_subseq_alloc(1);
+
+	if (!nseq || !sseq) {
+		warn("Memory squeeze; failed to create name sequence\n");
+		kfree(nseq);
+		kfree(sseq);
+		return 0;
+	}
+
+	memset(nseq, 0, sizeof(*nseq));
+	nseq->lock = SPIN_LOCK_UNLOCKED;
+	nseq->type = type;
+	nseq->sseqs = sseq;
+	dbg("tipc_nameseq_create() nseq = %x type %u, ssseqs %x, ff: %u\n",
+	    nseq, type, nseq->sseqs, nseq->first_free);
+	nseq->alloc = 1;
+	INIT_HLIST_NODE(&nseq->ns_list);
+	INIT_LIST_HEAD(&nseq->subscriptions);
+	hlist_add_head(&nseq->ns_list, seq_head);
+	return nseq;
+}
+
+/**
+ * nameseq_find_subseq - find sub-sequence (if any) matching a name instance
+ *  
+ * Very time-critical, so binary searches through sub-sequence array.
+ */
+
+static inline struct sub_seq *nameseq_find_subseq(struct name_seq *nseq, 
+						  u32 instance)
+{
+	struct sub_seq *sseqs = nseq->sseqs;
+	int low = 0;
+	int high = nseq->first_free - 1;
+	int mid;
+
+	while (low <= high) {
+		mid = (low + high) / 2;
+		if (instance < sseqs[mid].lower)
+			high = mid - 1;
+		else if (instance > sseqs[mid].upper)
+			low = mid + 1;
+		else
+			return &sseqs[mid];
+	}
+	return 0;
+}
+
+/**
+ * nameseq_locate_subseq - determine position of name instance in sub-sequence
+ * 
+ * Returns index in sub-sequence array of the entry that contains the specified
+ * instance value; if no entry contains that value, returns the position
+ * where a new entry for it would be inserted in the array.
+ *
+ * Note: Similar to binary search code for locating a sub-sequence.
+ */
+
+static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance)
+{
+	struct sub_seq *sseqs = nseq->sseqs;
+	int low = 0;
+	int high = nseq->first_free - 1;
+	int mid;
+
+	while (low <= high) {
+		mid = (low + high) / 2;
+		if (instance < sseqs[mid].lower)
+			high = mid - 1;
+		else if (instance > sseqs[mid].upper)
+			low = mid + 1;
+		else
+			return mid;
+	}
+	return low;
+}
+
+/**
+ * tipc_nameseq_insert_publ - 
+ */
+
+struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
+					u32 type, u32 lower, u32 upper,
+					u32 scope, u32 node, u32 port, u32 key)
+{
+	struct subscription *s;
+	struct subscription *st;
+	struct publication *publ;
+	struct sub_seq *sseq;
+	int created_subseq = 0;
+
+	assert(nseq->first_free <= nseq->alloc);
+	sseq = nameseq_find_subseq(nseq, lower);
+	dbg("nameseq_ins: for seq %x,<%u,%u>, found sseq %x\n",
+	    nseq, type, lower, sseq);
+	if (sseq) {
+
+		/* Lower end overlaps existing entry => need an exact match */
+
+		if ((sseq->lower != lower) || (sseq->upper != upper)) {
+			warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper);
+			return 0;
+		}
+	} else {
+		u32 inspos;
+		struct sub_seq *freesseq;
+
+		/* Find where lower end should be inserted */
+
+		inspos = nameseq_locate_subseq(nseq, lower);
+
+		/* Fail if upper end overlaps into an existing entry */
+
+		if ((inspos < nseq->first_free) &&
+		    (upper >= nseq->sseqs[inspos].lower)) {
+			warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper);
+			return 0;
+		}
+
+		/* Ensure there is space for new sub-sequence */
+
+		if (nseq->first_free == nseq->alloc) {
+			struct sub_seq *sseqs = nseq->sseqs;
+			nseq->sseqs = tipc_subseq_alloc(nseq->alloc * 2);
+			if (nseq->sseqs != NULL) {
+				memcpy(nseq->sseqs, sseqs,
+				       nseq->alloc * sizeof (struct sub_seq));
+				kfree(sseqs);
+				dbg("Allocated %u sseqs\n", nseq->alloc);
+				nseq->alloc *= 2;
+			} else {
+				warn("Memory squeeze; failed to create sub-sequence\n");
+				return 0;
+			}
+		}
+		dbg("Have %u sseqs for type %u\n", nseq->alloc, type);
+
+		/* Insert new sub-sequence */
+
+		dbg("ins in pos %u, ff = %u\n", inspos, nseq->first_free);
+		sseq = &nseq->sseqs[inspos];
+		freesseq = &nseq->sseqs[nseq->first_free];
+		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof (*sseq));
+		memset(sseq, 0, sizeof (*sseq));
+		nseq->first_free++;
+		sseq->lower = lower;
+		sseq->upper = upper;
+		created_subseq = 1;
+	}
+	dbg("inserting (%u %u %u) from %x:%u into sseq %x(%u,%u) of seq %x\n",
+	    type, lower, upper, node, port, sseq,
+	    sseq->lower, sseq->upper, nseq);
+
+	/* Insert a publication: */
+
+	publ = publ_create(type, lower, upper, scope, node, port, key);
+	if (!publ)
+		return 0;
+	dbg("inserting publ %x, node=%x publ->node=%x, subscr->node=%x\n",
+	    publ, node, publ->node, publ->subscr.node);
+
+	if (!sseq->zone_list)
+		sseq->zone_list = publ->zone_list_next = publ;
+	else {
+		publ->zone_list_next = sseq->zone_list->zone_list_next;
+		sseq->zone_list->zone_list_next = publ;
+	}
+
+	if (in_own_cluster(node)) {
+		if (!sseq->cluster_list)
+			sseq->cluster_list = publ->cluster_list_next = publ;
+		else {
+			publ->cluster_list_next =
+			sseq->cluster_list->cluster_list_next;
+			sseq->cluster_list->cluster_list_next = publ;
+		}
+	}
+
+	if (node == tipc_own_addr) {
+		if (!sseq->node_list)
+			sseq->node_list = publ->node_list_next = publ;
+		else {
+			publ->node_list_next = sseq->node_list->node_list_next;
+			sseq->node_list->node_list_next = publ;
+		}
+	}
+
+	/* 
+	 * Any subscriptions waiting for notification? 
+	 */
+	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
+		dbg("calling report_overlap()\n");
+		tipc_subscr_report_overlap(s,
+					   publ->lower,
+					   publ->upper,
+					   TIPC_PUBLISHED,
+					   publ->ref, 
+					   publ->node,
+					   created_subseq);
+	}
+	return publ;
+}
+
+/**
+ * tipc_nameseq_remove_publ -
+ */
+
+struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst,
+					     u32 node, u32 ref, u32 key)
+{
+	struct publication *publ;
+	struct publication *prev;
+	struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
+	struct sub_seq *free;
+	struct subscription *s, *st;
+	int removed_subseq = 0;
+
+	assert(nseq);
+
+	if (!sseq) {
+		int i;
+
+		warn("Withdraw unknown <%u,%u>?\n", nseq->type, inst);
+		assert(nseq->sseqs);
+		dbg("Dumping subseqs %x for %x, alloc = %u,ff=%u\n",
+		    nseq->sseqs, nseq, nseq->alloc, 
+		    nseq->first_free);
+		for (i = 0; i < nseq->first_free; i++) {
+			dbg("Subseq %u(%x): lower = %u,upper = %u\n",
+			    i, &nseq->sseqs[i], nseq->sseqs[i].lower,
+			    nseq->sseqs[i].upper);
+		}
+		return 0;
+	}
+	dbg("nameseq_remove: seq: %x, sseq %x, <%u,%u> key %u\n",
+	    nseq, sseq, nseq->type, inst, key);
+
+	prev = sseq->zone_list;
+	publ = sseq->zone_list->zone_list_next;
+	while ((publ->key != key) || (publ->ref != ref) || 
+	       (publ->node && (publ->node != node))) {
+		prev = publ;
+		publ = publ->zone_list_next;
+		assert(prev != sseq->zone_list);
+	}
+	if (publ != sseq->zone_list)
+		prev->zone_list_next = publ->zone_list_next;
+	else if (publ->zone_list_next != publ) {
+		prev->zone_list_next = publ->zone_list_next;
+		sseq->zone_list = publ->zone_list_next;
+	} else {
+		sseq->zone_list = 0;
+	}
+
+	if (in_own_cluster(node)) {
+		prev = sseq->cluster_list;
+		publ = sseq->cluster_list->cluster_list_next;
+		while ((publ->key != key) || (publ->ref != ref) || 
+		       (publ->node && (publ->node != node))) {
+			prev = publ;
+			publ = publ->cluster_list_next;
+			assert(prev != sseq->cluster_list);
+		}
+		if (publ != sseq->cluster_list)
+			prev->cluster_list_next = publ->cluster_list_next;
+		else if (publ->cluster_list_next != publ) {
+			prev->cluster_list_next = publ->cluster_list_next;
+			sseq->cluster_list = publ->cluster_list_next;
+		} else {
+			sseq->cluster_list = 0;
+		}
+	}
+
+	if (node == tipc_own_addr) {
+		prev = sseq->node_list;
+		publ = sseq->node_list->node_list_next;
+		while ((publ->key != key) || (publ->ref != ref) || 
+		       (publ->node && (publ->node != node))) {
+			prev = publ;
+			publ = publ->node_list_next;
+			assert(prev != sseq->node_list);
+		}
+		if (publ != sseq->node_list)
+			prev->node_list_next = publ->node_list_next;
+		else if (publ->node_list_next != publ) {
+			prev->node_list_next = publ->node_list_next;
+			sseq->node_list = publ->node_list_next;
+		} else {
+			sseq->node_list = 0;
+		}
+	}
+	assert(!publ->node || (publ->node == node));
+	assert(publ->ref == ref);
+	assert(publ->key == key);
+
+	/* 
+	 * Contract subseq list if no more publications:
+	 */
+	if (!sseq->node_list && !sseq->cluster_list && !sseq->zone_list) {
+		free = &nseq->sseqs[nseq->first_free--];
+		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq));
+		removed_subseq = 1;
+	}
+
+	/* 
+	 * Any subscriptions waiting ? 
+	 */
+	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
+		tipc_subscr_report_overlap(s,
+					   publ->lower,
+					   publ->upper,
+					   TIPC_WITHDRAWN, 
+					   publ->ref, 
+					   publ->node,
+					   removed_subseq);
+	}
+	return publ;
+}
+
+/**
+ * tipc_nameseq_subscribe: attach a subscription, and issue
+ * the prescribed number of events if there is any sub-
+ * sequence overlapping with the requested sequence
+ */
+
+void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s)
+{
+	struct sub_seq *sseq = nseq->sseqs;
+
+	list_add(&s->nameseq_list, &nseq->subscriptions);
+
+	if (!sseq)
+		return;
+
+	while (sseq != &nseq->sseqs[nseq->first_free]) {
+		struct publication *zl = sseq->zone_list;
+		if (zl && tipc_subscr_overlap(s,sseq->lower,sseq->upper)) {
+			struct publication *crs = zl;
+			int must_report = 1;
+
+			do {
+				tipc_subscr_report_overlap(s, 
+							   sseq->lower, 
+							   sseq->upper,
+							   TIPC_PUBLISHED,
+							   crs->ref,
+							   crs->node,
+							   must_report);
+				must_report = 0;
+				crs = crs->zone_list_next;
+			} while (crs != zl);
+		}
+		sseq++;
+	}
+}
+
+static struct name_seq *nametbl_find_seq(u32 type)
+{
+	struct hlist_head *seq_head;
+	struct hlist_node *seq_node;
+	struct name_seq *ns;
+
+	dbg("find_seq %u,(%u,0x%x) table = %p, hash[type] = %u\n",
+	    type, ntohl(type), type, table.types, hash(type));
+
+	seq_head = &table.types[hash(type)];
+	hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
+		if (ns->type == type) {
+			dbg("found %x\n", ns);
+			return ns;
+		}
+	}
+
+	return 0;
+};
+
+struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
+					     u32 scope, u32 node, u32 port, u32 key)
+{
+	struct name_seq *seq = nametbl_find_seq(type);
+
+	dbg("ins_publ: <%u,%x,%x> found %x\n", type, lower, upper, seq);
+	if (lower > upper) {
+		warn("Failed to publish illegal <%u,%u,%u>\n",
+		     type, lower, upper);
+		return 0;
+	}
+
+	dbg("Publishing <%u,%u,%u> from %x\n", type, lower, upper, node);
+	if (!seq) {
+		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
+		dbg("tipc_nametbl_insert_publ: created %x\n", seq);
+	}
+	if (!seq)
+		return 0;
+
+	assert(seq->type == type);
+	return tipc_nameseq_insert_publ(seq, type, lower, upper,
+					scope, node, port, key);
+}
+
+struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, 
+					     u32 node, u32 ref, u32 key)
+{
+	struct publication *publ;
+	struct name_seq *seq = nametbl_find_seq(type);
+
+	if (!seq)
+		return 0;
+
+	dbg("Withdrawing <%u,%u> from %x\n", type, lower, node);
+	publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
+
+	if (!seq->first_free && list_empty(&seq->subscriptions)) {
+		hlist_del_init(&seq->ns_list);
+		kfree(seq->sseqs);
+		kfree(seq);
+	}
+	return publ;
+}
+
+/*
+ * tipc_nametbl_translate(): Translate tipc_name -> tipc_portid.
+ *                      Very time-critical.
+ *
+ * Note: on entry 'destnode' is the search domain used during translation;
+ *       on exit it passes back the node address of the matching port (if any)
+ */
+
+u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
+{
+	struct sub_seq *sseq;
+	struct publication *publ = 0;
+	struct name_seq *seq;
+	u32 ref;
+
+	if (!in_scope(*destnode, tipc_own_addr))
+		return 0;
+
+	read_lock_bh(&tipc_nametbl_lock);
+	seq = nametbl_find_seq(type);
+	if (unlikely(!seq))
+		goto not_found;
+	sseq = nameseq_find_subseq(seq, instance);
+	if (unlikely(!sseq))
+		goto not_found;
+	spin_lock_bh(&seq->lock);
+
+	/* Closest-First Algorithm: */
+	if (likely(!*destnode)) {
+		publ = sseq->node_list;
+		if (publ) {
+			sseq->node_list = publ->node_list_next;
+found:
+			ref = publ->ref;
+			*destnode = publ->node;
+			spin_unlock_bh(&seq->lock);
+			read_unlock_bh(&tipc_nametbl_lock);
+			return ref;
+		}
+		publ = sseq->cluster_list;
+		if (publ) {
+			sseq->cluster_list = publ->cluster_list_next;
+			goto found;
+		}
+		publ = sseq->zone_list;
+		if (publ) {
+			sseq->zone_list = publ->zone_list_next;
+			goto found;
+		}
+	}
+
+	/* Round-Robin Algorithm: */
+	else if (*destnode == tipc_own_addr) {
+		publ = sseq->node_list;
+		if (publ) {
+			sseq->node_list = publ->node_list_next;
+			goto found;
+		}
+	} else if (in_own_cluster(*destnode)) {
+		publ = sseq->cluster_list;
+		if (publ) {
+			sseq->cluster_list = publ->cluster_list_next;
+			goto found;
+		}
+	} else {
+		publ = sseq->zone_list;
+		if (publ) {
+			sseq->zone_list = publ->zone_list_next;
+			goto found;
+		}
+	}
+	spin_unlock_bh(&seq->lock);
+not_found:
+	*destnode = 0;
+	read_unlock_bh(&tipc_nametbl_lock);
+	return 0;
+}
+
+/**
+ * tipc_nametbl_mc_translate - find multicast destinations
+ * 
+ * Creates list of all local ports that overlap the given multicast address;
+ * also determines if any off-node ports overlap.
+ *
+ * Note: Publications with a scope narrower than 'limit' are ignored.
+ * (i.e. local node-scope publications mustn't receive messages arriving
+ * from another node, even if the multcast link brought it here)
+ * 
+ * Returns non-zero if any off-node ports overlap
+ */
+
+int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
+			      struct port_list *dports)
+{
+	struct name_seq *seq;
+	struct sub_seq *sseq;
+	struct sub_seq *sseq_stop;
+	int res = 0;
+
+	read_lock_bh(&tipc_nametbl_lock);
+	seq = nametbl_find_seq(type);
+	if (!seq)
+		goto exit;
+
+	spin_lock_bh(&seq->lock);
+
+	sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
+	sseq_stop = seq->sseqs + seq->first_free;
+	for (; sseq != sseq_stop; sseq++) {
+		struct publication *publ;
+
+		if (sseq->lower > upper)
+			break;
+		publ = sseq->cluster_list;
+		if (publ && (publ->scope <= limit))
+			do {
+				if (publ->node == tipc_own_addr)
+					tipc_port_list_add(dports, publ->ref);
+				else
+					res = 1;
+				publ = publ->cluster_list_next;
+			} while (publ != sseq->cluster_list);
+	}
+
+	spin_unlock_bh(&seq->lock);
+exit:
+	read_unlock_bh(&tipc_nametbl_lock);
+	return res;
+}
+
+/**
+ * tipc_nametbl_publish_rsv - publish port name using a reserved name type
+ */
+
+int tipc_nametbl_publish_rsv(u32 ref, unsigned int scope, 
+			struct tipc_name_seq const *seq)
+{
+	int res;
+
+	atomic_inc(&rsv_publ_ok);
+	res = tipc_publish(ref, scope, seq);
+	atomic_dec(&rsv_publ_ok);
+	return res;
+}
+
+/**
+ * tipc_nametbl_publish - add name publication to network name tables
+ */
+
+struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, 
+				    u32 scope, u32 port_ref, u32 key)
+{
+	struct publication *publ;
+
+	if (table.local_publ_count >= tipc_max_publications) {
+		warn("Failed publish: max %u local publication\n", 
+		     tipc_max_publications);
+		return 0;
+	}
+	if ((type < TIPC_RESERVED_TYPES) && !atomic_read(&rsv_publ_ok)) {
+		warn("Failed to publish reserved name <%u,%u,%u>\n",
+		     type, lower, upper);
+		return 0;
+	}
+
+	write_lock_bh(&tipc_nametbl_lock);
+	table.local_publ_count++;
+	publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
+				   tipc_own_addr, port_ref, key);
+	if (publ && (scope != TIPC_NODE_SCOPE)) {
+		tipc_named_publish(publ);
+	}
+	write_unlock_bh(&tipc_nametbl_lock);
+	return publ;
+}
+
+/**
+ * tipc_nametbl_withdraw - withdraw name publication from network name tables
+ */
+
+int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
+{
+	struct publication *publ;
+
+	dbg("tipc_nametbl_withdraw:<%d,%d,%d>\n", type, lower, key);
+	write_lock_bh(&tipc_nametbl_lock);
+	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
+	if (publ) {
+		table.local_publ_count--;
+		if (publ->scope != TIPC_NODE_SCOPE)
+			tipc_named_withdraw(publ);
+		write_unlock_bh(&tipc_nametbl_lock);
+		list_del_init(&publ->pport_list);
+		kfree(publ);
+		return 1;
+	}
+	write_unlock_bh(&tipc_nametbl_lock);
+	return 0;
+}
+
+/**
+ * tipc_nametbl_subscribe - add a subscription object to the name table
+ */
+
+void
+tipc_nametbl_subscribe(struct subscription *s)
+{
+	u32 type = s->seq.type;
+	struct name_seq *seq;
+
+        write_lock_bh(&tipc_nametbl_lock);
+	seq = nametbl_find_seq(type);
+	if (!seq) {
+		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
+	}
+        if (seq){
+                spin_lock_bh(&seq->lock);
+                dbg("tipc_nametbl_subscribe:found %x for <%u,%u,%u>\n",
+                    seq, type, s->seq.lower, s->seq.upper);
+                assert(seq->type == type);
+                tipc_nameseq_subscribe(seq, s);
+                spin_unlock_bh(&seq->lock);
+        }
+        write_unlock_bh(&tipc_nametbl_lock);
+}
+
+/**
+ * tipc_nametbl_unsubscribe - remove a subscription object from name table
+ */
+
+void
+tipc_nametbl_unsubscribe(struct subscription *s)
+{
+	struct name_seq *seq;
+
+        write_lock_bh(&tipc_nametbl_lock);
+        seq = nametbl_find_seq(s->seq.type);
+	if (seq != NULL){
+                spin_lock_bh(&seq->lock);
+                list_del_init(&s->nameseq_list);
+                spin_unlock_bh(&seq->lock);
+                if ((seq->first_free == 0) && list_empty(&seq->subscriptions)) {
+                        hlist_del_init(&seq->ns_list);
+                        kfree(seq->sseqs);
+                        kfree(seq);
+                }
+        }
+        write_unlock_bh(&tipc_nametbl_lock);
+}
+
+
+/**
+ * subseq_list: print specified sub-sequence contents into the given buffer
+ */
+
+static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
+			u32 index)
+{
+	char portIdStr[27];
+	char *scopeStr;
+	struct publication *publ = sseq->zone_list;
+
+	tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
+
+	if (depth == 2 || !publ) {
+		tipc_printf(buf, "\n");
+		return;
+	}
+
+	do {
+		sprintf (portIdStr, "<%u.%u.%u:%u>",
+			 tipc_zone(publ->node), tipc_cluster(publ->node),
+			 tipc_node(publ->node), publ->ref);
+		tipc_printf(buf, "%-26s ", portIdStr);
+		if (depth > 3) {
+			if (publ->node != tipc_own_addr)
+				scopeStr = "";
+			else if (publ->scope == TIPC_NODE_SCOPE)
+				scopeStr = "node";
+			else if (publ->scope == TIPC_CLUSTER_SCOPE)
+				scopeStr = "cluster";
+			else
+				scopeStr = "zone";
+			tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
+		}
+
+		publ = publ->zone_list_next;
+		if (publ == sseq->zone_list)
+			break;
+
+		tipc_printf(buf, "\n%33s", " ");
+	} while (1);
+
+	tipc_printf(buf, "\n");
+}
+
+/**
+ * nameseq_list: print specified name sequence contents into the given buffer
+ */
+
+static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
+			 u32 type, u32 lowbound, u32 upbound, u32 index)
+{
+	struct sub_seq *sseq;
+	char typearea[11];
+
+	sprintf(typearea, "%-10u", seq->type);
+
+	if (depth == 1) {
+		tipc_printf(buf, "%s\n", typearea);
+		return;
+	}
+
+	for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) {
+		if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) {
+			tipc_printf(buf, "%s ", typearea);
+			subseq_list(sseq, buf, depth, index);
+			sprintf(typearea, "%10s", " ");
+		}
+	}
+}
+
+/**
+ * nametbl_header - print name table header into the given buffer
+ */
+
+static void nametbl_header(struct print_buf *buf, u32 depth)
+{
+	tipc_printf(buf, "Type       ");
+
+	if (depth > 1)
+		tipc_printf(buf, "Lower      Upper      ");
+	if (depth > 2)
+		tipc_printf(buf, "Port Identity              ");
+	if (depth > 3)
+		tipc_printf(buf, "Publication");
+
+	tipc_printf(buf, "\n-----------");
+
+	if (depth > 1)
+		tipc_printf(buf, "--------------------- ");
+	if (depth > 2)
+		tipc_printf(buf, "-------------------------- ");
+	if (depth > 3)
+		tipc_printf(buf, "------------------");
+
+	tipc_printf(buf, "\n");
+}
+
+/**
+ * nametbl_list - print specified name table contents into the given buffer
+ */
+
+static void nametbl_list(struct print_buf *buf, u32 depth_info, 
+			 u32 type, u32 lowbound, u32 upbound)
+{
+	struct hlist_head *seq_head;
+	struct hlist_node *seq_node;
+	struct name_seq *seq;
+	int all_types;
+	u32 depth;
+	u32 i;
+
+	all_types = (depth_info & TIPC_NTQ_ALLTYPES);
+	depth = (depth_info & ~TIPC_NTQ_ALLTYPES);
+
+	if (depth == 0)
+		return;
+
+	if (all_types) {
+		/* display all entries in name table to specified depth */
+		nametbl_header(buf, depth);
+		lowbound = 0;
+		upbound = ~0;
+		for (i = 0; i < tipc_nametbl_size; i++) {
+			seq_head = &table.types[i];
+			hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+				nameseq_list(seq, buf, depth, seq->type, 
+					     lowbound, upbound, i);
+			}
+		}
+	} else {
+		/* display only the sequence that matches the specified type */
+		if (upbound < lowbound) {
+			tipc_printf(buf, "invalid name sequence specified\n");
+			return;
+		}
+		nametbl_header(buf, depth);
+		i = hash(type);
+		seq_head = &table.types[i];
+		hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+			if (seq->type == type) {
+				nameseq_list(seq, buf, depth, type, 
+					     lowbound, upbound, i);
+				break;
+			}
+		}
+	}
+}
+
+void tipc_nametbl_print(struct print_buf *buf, const char *str)
+{
+	tipc_printf(buf, str);
+	read_lock_bh(&tipc_nametbl_lock);
+	nametbl_list(buf, 0, 0, 0, 0);
+	read_unlock_bh(&tipc_nametbl_lock);
+}
+
+#define MAX_NAME_TBL_QUERY 32768
+
+struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
+{
+	struct sk_buff *buf;
+	struct tipc_name_table_query *argv;
+	struct tlv_desc *rep_tlv;
+	struct print_buf b;
+	int str_len;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NAME_TBL_QUERY))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_NAME_TBL_QUERY));
+	if (!buf)
+		return NULL;
+
+	rep_tlv = (struct tlv_desc *)buf->data;
+	tipc_printbuf_init(&b, TLV_DATA(rep_tlv), MAX_NAME_TBL_QUERY);
+	argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
+	read_lock_bh(&tipc_nametbl_lock);
+	nametbl_list(&b, ntohl(argv->depth), ntohl(argv->type), 
+		     ntohl(argv->lowbound), ntohl(argv->upbound));
+	read_unlock_bh(&tipc_nametbl_lock);
+	str_len = tipc_printbuf_validate(&b);
+
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
+void tipc_nametbl_dump(void)
+{
+	nametbl_list(TIPC_CONS, 0, 0, 0, 0);
+}
+
+int tipc_nametbl_init(void)
+{
+	int array_size = sizeof(struct hlist_head) * tipc_nametbl_size;
+
+	table.types = (struct hlist_head *)kmalloc(array_size, GFP_ATOMIC);
+	if (!table.types)
+		return -ENOMEM;
+
+	write_lock_bh(&tipc_nametbl_lock);
+	memset(table.types, 0, array_size);
+	table.local_publ_count = 0;
+	write_unlock_bh(&tipc_nametbl_lock);
+	return 0;
+}
+
+void tipc_nametbl_stop(void)
+{
+	struct hlist_head *seq_head;
+	struct hlist_node *seq_node;
+	struct hlist_node *tmp;
+	struct name_seq *seq;
+	u32 i;
+
+	if (!table.types)
+		return;
+
+	write_lock_bh(&tipc_nametbl_lock);
+	for (i = 0; i < tipc_nametbl_size; i++) {
+		seq_head = &table.types[i];
+		hlist_for_each_entry_safe(seq, seq_node, tmp, seq_head, ns_list) {
+			struct sub_seq *sseq = seq->sseqs;
+
+			for (; sseq != &seq->sseqs[seq->first_free]; sseq++) {
+				struct publication *publ = sseq->zone_list;
+				assert(publ);
+				do {
+					struct publication *next =
+						publ->zone_list_next;
+					kfree(publ);
+					publ = next;
+				}
+				while (publ != sseq->zone_list);
+			}
+		}
+	}
+	kfree(table.types);
+	table.types = NULL;
+	write_unlock_bh(&tipc_nametbl_lock);
+}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
new file mode 100644
index 00000000000..e8a3d71763c
--- /dev/null
+++ b/net/tipc/name_table.h
@@ -0,0 +1,108 @@
+/*
+ * net/tipc/name_table.h: Include file for TIPC name table code
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_NAME_TABLE_H
+#define _TIPC_NAME_TABLE_H
+
+#include "node_subscr.h"
+
+struct subscription;
+struct port_list;
+
+/*
+ * TIPC name types reserved for internal TIPC use (both current and planned)
+ */
+
+#define TIPC_ZM_SRV 3  		/* zone master service name type */
+
+
+/**
+ * struct publication - info about a published (name or) name sequence
+ * @type: name sequence type
+ * @lower: name sequence lower bound
+ * @upper: name sequence upper bound
+ * @scope: scope of publication
+ * @node: network address of publishing port's node
+ * @ref: publishing port
+ * @key: publication key
+ * @subscr: subscription to "node down" event (for off-node publications only)
+ * @local_list: adjacent entries in list of publications made by this node
+ * @pport_list: adjacent entries in list of publications made by this port
+ * @node_list: next matching name seq publication with >= node scope
+ * @cluster_list: next matching name seq publication with >= cluster scope
+ * @zone_list: next matching name seq publication with >= zone scope
+ * 
+ * Note that the node list, cluster list, and zone list are circular lists.
+ */
+
+struct publication {
+	u32 type;
+	u32 lower;
+	u32 upper;
+	u32 scope;
+	u32 node;
+	u32 ref;
+	u32 key;
+	struct node_subscr subscr;
+	struct list_head local_list;
+	struct list_head pport_list;
+	struct publication *node_list_next;
+	struct publication *cluster_list_next;
+	struct publication *zone_list_next;
+};
+
+
+extern rwlock_t tipc_nametbl_lock;
+
+struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space);
+u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node);
+int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, 
+			 struct port_list *dports);
+int tipc_nametbl_publish_rsv(u32 ref, unsigned int scope, 
+			struct tipc_name_seq const *seq);
+struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
+				    u32 scope, u32 port_ref, u32 key);
+int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key);
+struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
+					u32 scope, u32 node, u32 ref, u32 key);
+struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, 
+					u32 node, u32 ref, u32 key);
+void tipc_nametbl_subscribe(struct subscription *s);
+void tipc_nametbl_unsubscribe(struct subscription *s);
+int tipc_nametbl_init(void);
+void tipc_nametbl_stop(void);
+
+#endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
new file mode 100644
index 00000000000..074891ad4f0
--- /dev/null
+++ b/net/tipc/net.c
@@ -0,0 +1,311 @@
+/*
+ * net/tipc/net.c: TIPC network routing code
+ * 
+ * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "bearer.h"
+#include "net.h"
+#include "zone.h"
+#include "addr.h"
+#include "name_table.h"
+#include "name_distr.h"
+#include "subscr.h"
+#include "link.h"
+#include "msg.h"
+#include "port.h"
+#include "bcast.h"
+#include "discover.h"
+#include "config.h"
+
+/* 
+ * The TIPC locking policy is designed to ensure a very fine locking
+ * granularity, permitting complete parallel access to individual
+ * port and node/link instances. The code consists of three major 
+ * locking domains, each protected with their own disjunct set of locks.
+ *
+ * 1: The routing hierarchy.
+ *    Comprises the structures 'zone', 'cluster', 'node', 'link' 
+ *    and 'bearer'. The whole hierarchy is protected by a big 
+ *    read/write lock, tipc_net_lock, to enssure that nothing is added 
+ *    or removed while code is accessing any of these structures. 
+ *    This layer must not be called from the two others while they 
+ *    hold any of their own locks.
+ *    Neither must it itself do any upcalls to the other two before
+ *    it has released tipc_net_lock and other protective locks.
+ *
+ *   Within the tipc_net_lock domain there are two sub-domains;'node' and 
+ *   'bearer', where local write operations are permitted,
+ *   provided that those are protected by individual spin_locks
+ *   per instance. Code holding tipc_net_lock(read) and a node spin_lock 
+ *   is permitted to poke around in both the node itself and its
+ *   subordinate links. I.e, it can update link counters and queues, 
+ *   change link state, send protocol messages, and alter the 
+ *   "active_links" array in the node; but it can _not_ remove a link 
+ *   or a node from the overall structure.
+ *   Correspondingly, individual bearers may change status within a 
+ *   tipc_net_lock(read), protected by an individual spin_lock ber bearer 
+ *   instance, but it needs tipc_net_lock(write) to remove/add any bearers.
+ *     
+ *
+ *  2: The transport level of the protocol. 
+ *     This consists of the structures port, (and its user level 
+ *     representations, such as user_port and tipc_sock), reference and 
+ *     tipc_user (port.c, reg.c, socket.c). 
+ *
+ *     This layer has four different locks:
+ *     - The tipc_port spin_lock. This is protecting each port instance
+ *       from parallel data access and removal. Since we can not place 
+ *       this lock in the port itself, it has been placed in the 
+ *       corresponding reference table entry, which has the same life
+ *       cycle as the module. This entry is difficult to access from 
+ *       outside the TIPC core, however, so a pointer to the lock has 
+ *       been added in the port instance, -to be used for unlocking 
+ *       only.
+ *     - A read/write lock to protect the reference table itself (teg.c). 
+ *       (Nobody is using read-only access to this, so it can just as 
+ *       well be changed to a spin_lock)
+ *     - A spin lock to protect the registry of kernel/driver users (reg.c)
+ *     - A global spin_lock (tipc_port_lock), which only task is to ensure 
+ *       consistency where more than one port is involved in an operation,
+ *       i.e., whe a port is part of a linked list of ports.
+ *       There are two such lists; 'port_list', which is used for management,
+ *       and 'wait_list', which is used to queue ports during congestion.
+ *     
+ *  3: The name table (name_table.c, name_distr.c, subscription.c)
+ *     - There is one big read/write-lock (tipc_nametbl_lock) protecting the 
+ *       overall name table structure. Nothing must be added/removed to 
+ *       this structure without holding write access to it.
+ *     - There is one local spin_lock per sub_sequence, which can be seen
+ *       as a sub-domain to the tipc_nametbl_lock domain. It is used only
+ *       for translation operations, and is needed because a translation
+ *       steps the root of the 'publication' linked list between each lookup.
+ *       This is always used within the scope of a tipc_nametbl_lock(read).
+ *     - A local spin_lock protecting the queue of subscriber events.
+*/
+
+rwlock_t tipc_net_lock = RW_LOCK_UNLOCKED;
+struct network tipc_net = { 0 };
+
+struct node *tipc_net_select_remote_node(u32 addr, u32 ref) 
+{
+	return tipc_zone_select_remote_node(tipc_net.zones[tipc_zone(addr)], addr, ref);
+}
+
+u32 tipc_net_select_router(u32 addr, u32 ref)
+{
+	return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref);
+}
+
+
+u32 tipc_net_next_node(u32 a)
+{
+	if (tipc_net.zones[tipc_zone(a)])
+		return tipc_zone_next_node(a);
+	return 0;
+}
+
+void tipc_net_remove_as_router(u32 router)
+{
+	u32 z_num;
+
+	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
+		if (!tipc_net.zones[z_num])
+			continue;
+		tipc_zone_remove_as_router(tipc_net.zones[z_num], router);
+	}
+}
+
+void tipc_net_send_external_routes(u32 dest)
+{
+	u32 z_num;
+
+	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
+		if (tipc_net.zones[z_num])
+			tipc_zone_send_external_routes(tipc_net.zones[z_num], dest);
+	}
+}
+
+static int net_init(void)
+{
+	u32 sz = sizeof(struct _zone *) * (tipc_max_zones + 1);
+
+	memset(&tipc_net, 0, sizeof(tipc_net));
+	tipc_net.zones = (struct _zone **)kmalloc(sz, GFP_ATOMIC);
+	if (!tipc_net.zones) {
+		return -ENOMEM;
+	}
+	memset(tipc_net.zones, 0, sz);
+	return TIPC_OK;
+}
+
+static void net_stop(void)
+{
+	u32 z_num;
+
+	if (!tipc_net.zones)
+		return;
+
+	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
+		tipc_zone_delete(tipc_net.zones[z_num]);
+	}
+	kfree(tipc_net.zones);
+	tipc_net.zones = 0;
+}
+
+static void net_route_named_msg(struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	u32 dnode;
+	u32 dport;
+
+	if (!msg_named(msg)) {
+		msg_dbg(msg, "tipc_net->drop_nam:");
+		buf_discard(buf);
+		return;
+	}
+
+	dnode = addr_domain(msg_lookup_scope(msg));
+	dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode);
+	dbg("tipc_net->lookup<%u,%u>-><%u,%x>\n",
+	    msg_nametype(msg), msg_nameinst(msg), dport, dnode);
+	if (dport) {
+		msg_set_destnode(msg, dnode);
+		msg_set_destport(msg, dport);
+		tipc_net_route_msg(buf);
+		return;
+	}
+	msg_dbg(msg, "tipc_net->rej:NO NAME: ");
+	tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
+}
+
+void tipc_net_route_msg(struct sk_buff *buf)
+{
+	struct tipc_msg *msg;
+	u32 dnode;
+
+	if (!buf)
+		return;
+	msg = buf_msg(buf);
+
+	msg_incr_reroute_cnt(msg);
+	if (msg_reroute_cnt(msg) > 6) {
+		if (msg_errcode(msg)) {
+			msg_dbg(msg, "NET>DISC>:");
+			buf_discard(buf);
+		} else {
+			msg_dbg(msg, "NET>REJ>:");
+			tipc_reject_msg(buf, msg_destport(msg) ? 
+					TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME);
+		}
+		return;
+	}
+
+	msg_dbg(msg, "tipc_net->rout: ");
+
+	/* Handle message for this node */
+	dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
+	if (in_scope(dnode, tipc_own_addr)) {
+		if (msg_isdata(msg)) {
+			if (msg_mcast(msg)) 
+				tipc_port_recv_mcast(buf, NULL);
+			else if (msg_destport(msg))
+				tipc_port_recv_msg(buf);
+			else
+				net_route_named_msg(buf);
+			return;
+		}
+		switch (msg_user(msg)) {
+		case ROUTE_DISTRIBUTOR:
+			tipc_cltr_recv_routing_table(buf);
+			break;
+		case NAME_DISTRIBUTOR:
+			tipc_named_recv(buf);
+			break;
+		case CONN_MANAGER:
+			tipc_port_recv_proto_msg(buf);
+			break;
+		default:
+			msg_dbg(msg,"DROP/NET/<REC<");
+			buf_discard(buf);
+		}
+		return;
+	}
+
+	/* Handle message for another node */
+	msg_dbg(msg, "NET>SEND>: ");
+	tipc_link_send(buf, dnode, msg_link_selector(msg));
+}
+
+int tipc_net_start(void)
+{
+	char addr_string[16];
+	int res;
+
+	if (tipc_mode != TIPC_NODE_MODE)
+		return -ENOPROTOOPT;
+
+	tipc_mode = TIPC_NET_MODE;
+	tipc_named_reinit();
+	tipc_port_reinit();
+
+	if ((res = tipc_bearer_init()) ||
+	    (res = net_init()) ||
+	    (res = tipc_cltr_init()) ||
+	    (res = tipc_bclink_init())) {
+		return res;
+	}
+        tipc_subscr_stop();
+	tipc_cfg_stop();
+	tipc_k_signal((Handler)tipc_subscr_start, 0);
+	tipc_k_signal((Handler)tipc_cfg_init, 0);
+	info("Started in network mode\n");
+	info("Own node address %s, network identity %u\n",
+	     addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+	return TIPC_OK;
+}
+
+void tipc_net_stop(void)
+{
+	if (tipc_mode != TIPC_NET_MODE)
+		return;
+        write_lock_bh(&tipc_net_lock);
+	tipc_bearer_stop();
+	tipc_mode = TIPC_NODE_MODE;
+	tipc_bclink_stop();
+	net_stop();
+        write_unlock_bh(&tipc_net_lock);
+	info("Left network mode \n");
+}
+
diff --git a/net/tipc/net.h b/net/tipc/net.h
new file mode 100644
index 00000000000..f3e0b85e647
--- /dev/null
+++ b/net/tipc/net.h
@@ -0,0 +1,64 @@
+/*
+ * net/tipc/net.h: Include file for TIPC network routing code
+ * 
+ * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_NET_H
+#define _TIPC_NET_H
+
+struct _zone;
+
+/**
+ * struct network - TIPC network structure
+ * @zones: array of pointers to all zones within network
+ */
+ 
+struct network {
+	struct _zone **zones;
+};
+
+
+extern struct network tipc_net;
+extern rwlock_t tipc_net_lock;
+
+void tipc_net_remove_as_router(u32 router);
+void tipc_net_send_external_routes(u32 dest);
+void tipc_net_route_msg(struct sk_buff *buf);
+struct node *tipc_net_select_remote_node(u32 addr, u32 ref);
+u32 tipc_net_select_router(u32 addr, u32 ref);
+
+int tipc_net_start(void);
+void tipc_net_stop(void);
+
+#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
new file mode 100644
index 00000000000..eb1bb4dce7a
--- /dev/null
+++ b/net/tipc/netlink.c
@@ -0,0 +1,112 @@
+/*
+ * net/tipc/netlink.c: TIPC configuration handling
+ * 
+ * Copyright (c) 2005-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include <net/genetlink.h>
+
+static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *rep_buf;
+	struct nlmsghdr *rep_nlh;
+	struct nlmsghdr *req_nlh = info->nlhdr;
+	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+	int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+
+	if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
+		rep_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
+	else
+		rep_buf = tipc_cfg_do_cmd(req_userhdr->dest,
+					  req_userhdr->cmd,
+					  NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
+					  NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
+					  hdr_space);
+
+	if (rep_buf) {
+		skb_push(rep_buf, hdr_space);
+		rep_nlh = (struct nlmsghdr *)rep_buf->data;
+		memcpy(rep_nlh, req_nlh, hdr_space);
+		rep_nlh->nlmsg_len = rep_buf->len;
+		genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid);
+	}
+
+        return 0;
+}
+
+static struct genl_family family = {
+        .id		= GENL_ID_GENERATE,
+        .name		= TIPC_GENL_NAME,
+        .version	= TIPC_GENL_VERSION,
+        .hdrsize	= TIPC_GENL_HDRLEN,
+        .maxattr	= 0,
+};
+
+static struct genl_ops ops = {
+	.cmd		= TIPC_GENL_CMD,
+	.doit		= handle_cmd,
+};
+
+static int family_registered = 0;
+
+int tipc_netlink_start(void)
+{
+
+
+	if (genl_register_family(&family))
+		goto err;
+
+	family_registered = 1;
+
+	if (genl_register_ops(&family, &ops))
+		goto err_unregister;
+
+        return 0;
+
+ err_unregister:
+	genl_unregister_family(&family);
+	family_registered = 0;
+ err:
+	err("Failed to register netlink interface\n");
+	return -EFAULT;
+}
+
+void tipc_netlink_stop(void)
+{
+	if (family_registered) {
+		genl_unregister_family(&family);
+		family_registered = 0;
+	}
+}
diff --git a/net/tipc/node.c b/net/tipc/node.c
new file mode 100644
index 00000000000..6d65010e5fa
--- /dev/null
+++ b/net/tipc/node.c
@@ -0,0 +1,678 @@
+/*
+ * net/tipc/node.c: TIPC node management routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include "node.h"
+#include "cluster.h"
+#include "net.h"
+#include "addr.h"
+#include "node_subscr.h"
+#include "link.h"
+#include "port.h"
+#include "bearer.h"
+#include "name_distr.h"
+
+void node_print(struct print_buf *buf, struct node *n_ptr, char *str);
+static void node_lost_contact(struct node *n_ptr);
+static void node_established_contact(struct node *n_ptr);
+
+struct node *tipc_nodes = NULL;	/* sorted list of nodes within cluster */
+
+u32 tipc_own_tag = 0;
+
+struct node *tipc_node_create(u32 addr)
+{
+	struct cluster *c_ptr;
+	struct node *n_ptr;
+        struct node **curr_node;
+
+	n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC);
+        if (n_ptr != NULL) {
+                memset(n_ptr, 0, sizeof(*n_ptr));
+                n_ptr->addr = addr;
+                n_ptr->lock =  SPIN_LOCK_UNLOCKED;	
+                INIT_LIST_HEAD(&n_ptr->nsub);
+	
+		c_ptr = tipc_cltr_find(addr);
+                if (c_ptr == NULL)
+                        c_ptr = tipc_cltr_create(addr);
+                if (c_ptr != NULL) {
+                        n_ptr->owner = c_ptr;
+                        tipc_cltr_attach_node(c_ptr, n_ptr);
+                        n_ptr->last_router = -1;
+
+                        /* Insert node into ordered list */
+                        for (curr_node = &tipc_nodes; *curr_node; 
+			     curr_node = &(*curr_node)->next) {
+                                if (addr < (*curr_node)->addr) {
+                                        n_ptr->next = *curr_node;
+                                        break;
+                                }
+                        }
+                        (*curr_node) = n_ptr;
+                } else {
+                        kfree(n_ptr);
+                        n_ptr = NULL;
+                }
+        }
+	return n_ptr;
+}
+
+void tipc_node_delete(struct node *n_ptr)
+{
+	if (!n_ptr)
+		return;
+
+#if 0
+	/* Not needed because links are already deleted via tipc_bearer_stop() */
+
+	u32 l_num;
+
+	for (l_num = 0; l_num < MAX_BEARERS; l_num++) {
+		link_delete(n_ptr->links[l_num]);
+	}
+#endif
+
+	dbg("node %x deleted\n", n_ptr->addr);
+	kfree(n_ptr);
+}
+
+
+/**
+ * tipc_node_link_up - handle addition of link
+ * 
+ * Link becomes active (alone or shared) or standby, depending on its priority.
+ */
+
+void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
+{
+	struct link **active = &n_ptr->active_links[0];
+
+	info("Established link <%s> on network plane %c\n",
+	     l_ptr->name, l_ptr->b_ptr->net_plane);
+	
+	if (!active[0]) {
+		dbg(" link %x into %x/%x\n", l_ptr, &active[0], &active[1]);
+		active[0] = active[1] = l_ptr;
+		node_established_contact(n_ptr);
+		return;
+	}
+	if (l_ptr->priority < active[0]->priority) { 
+		info("Link is standby\n");
+		return;
+	}
+	tipc_link_send_duplicate(active[0], l_ptr);
+	if (l_ptr->priority == active[0]->priority) { 
+		active[0] = l_ptr;
+		return;
+	}
+	info("Link <%s> on network plane %c becomes standby\n",
+	     active[0]->name, active[0]->b_ptr->net_plane);
+	active[0] = active[1] = l_ptr;
+}
+
+/**
+ * node_select_active_links - select active link
+ */
+
+static void node_select_active_links(struct node *n_ptr)
+{
+	struct link **active = &n_ptr->active_links[0];
+	u32 i;
+	u32 highest_prio = 0;
+
+        active[0] = active[1] = 0;
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+                struct link *l_ptr = n_ptr->links[i];
+
+		if (!l_ptr || !tipc_link_is_up(l_ptr) ||
+		    (l_ptr->priority < highest_prio))
+			continue;
+
+		if (l_ptr->priority > highest_prio) {
+                        highest_prio = l_ptr->priority;
+			active[0] = active[1] = l_ptr;
+		} else {
+			active[1] = l_ptr;
+		}
+	}
+}
+
+/**
+ * tipc_node_link_down - handle loss of link
+ */
+
+void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr)
+{
+	struct link **active;
+
+	if (!tipc_link_is_active(l_ptr)) {
+		info("Lost standby link <%s> on network plane %c\n",
+		     l_ptr->name, l_ptr->b_ptr->net_plane);
+		return;
+	}
+	info("Lost link <%s> on network plane %c\n",
+		l_ptr->name, l_ptr->b_ptr->net_plane);
+
+	active = &n_ptr->active_links[0];
+	if (active[0] == l_ptr)
+		active[0] = active[1];
+	if (active[1] == l_ptr)
+		active[1] = active[0];
+	if (active[0] == l_ptr)
+		node_select_active_links(n_ptr);
+	if (tipc_node_is_up(n_ptr)) 
+		tipc_link_changeover(l_ptr);
+	else 
+		node_lost_contact(n_ptr);
+}
+
+int tipc_node_has_active_links(struct node *n_ptr)
+{
+	return (n_ptr && 
+		((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
+}
+
+int tipc_node_has_redundant_links(struct node *n_ptr)
+{
+	return (tipc_node_has_active_links(n_ptr) &&
+		(n_ptr->active_links[0] != n_ptr->active_links[1]));
+}
+
+int tipc_node_has_active_routes(struct node *n_ptr)
+{
+	return (n_ptr && (n_ptr->last_router >= 0));
+}
+
+int tipc_node_is_up(struct node *n_ptr)
+{
+	return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr));
+}
+
+struct node *tipc_node_attach_link(struct link *l_ptr)
+{
+	struct node *n_ptr = tipc_node_find(l_ptr->addr);
+
+	if (!n_ptr)
+		n_ptr = tipc_node_create(l_ptr->addr);
+        if (n_ptr) {
+		u32 bearer_id = l_ptr->b_ptr->identity;
+		char addr_string[16];
+
+                assert(bearer_id < MAX_BEARERS);
+                if (n_ptr->link_cnt >= 2) {
+			char addr_string[16];
+
+                        err("Attempt to create third link to %s\n",
+			    addr_string_fill(addr_string, n_ptr->addr));
+                        return 0;
+                }
+
+                if (!n_ptr->links[bearer_id]) {
+                        n_ptr->links[bearer_id] = l_ptr;
+                        tipc_net.zones[tipc_zone(l_ptr->addr)]->links++;
+                        n_ptr->link_cnt++;
+                        return n_ptr;
+                }
+                err("Attempt to establish second link on <%s> to <%s> \n",
+                    l_ptr->b_ptr->publ.name, 
+		    addr_string_fill(addr_string, l_ptr->addr));
+        }
+	return 0;
+}
+
+void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr)
+{
+	n_ptr->links[l_ptr->b_ptr->identity] = 0;
+	tipc_net.zones[tipc_zone(l_ptr->addr)]->links--;
+	n_ptr->link_cnt--;
+}
+
+/*
+ * Routing table management - five cases to handle:
+ *
+ * 1: A link towards a zone/cluster external node comes up.
+ *    => Send a multicast message updating routing tables of all 
+ *    system nodes within own cluster that the new destination 
+ *    can be reached via this node. 
+ *    (node.establishedContact()=>cluster.multicastNewRoute())
+ *
+ * 2: A link towards a slave node comes up.
+ *    => Send a multicast message updating routing tables of all 
+ *    system nodes within own cluster that the new destination 
+ *    can be reached via this node. 
+ *    (node.establishedContact()=>cluster.multicastNewRoute())
+ *    => Send a  message to the slave node about existence 
+ *    of all system nodes within cluster:
+ *    (node.establishedContact()=>cluster.sendLocalRoutes())
+ *
+ * 3: A new cluster local system node becomes available.
+ *    => Send message(s) to this particular node containing
+ *    information about all cluster external and slave
+ *     nodes which can be reached via this node.
+ *    (node.establishedContact()==>network.sendExternalRoutes())
+ *    (node.establishedContact()==>network.sendSlaveRoutes())
+ *    => Send messages to all directly connected slave nodes 
+ *    containing information about the existence of the new node
+ *    (node.establishedContact()=>cluster.multicastNewRoute())
+ *    
+ * 4: The link towards a zone/cluster external node or slave
+ *    node goes down.
+ *    => Send a multcast message updating routing tables of all 
+ *    nodes within cluster that the new destination can not any
+ *    longer be reached via this node.
+ *    (node.lostAllLinks()=>cluster.bcastLostRoute())
+ *
+ * 5: A cluster local system node becomes unavailable.
+ *    => Remove all references to this node from the local
+ *    routing tables. Note: This is a completely node
+ *    local operation.
+ *    (node.lostAllLinks()=>network.removeAsRouter())
+ *    => Send messages to all directly connected slave nodes 
+ *    containing information about loss of the node
+ *    (node.establishedContact()=>cluster.multicastLostRoute())
+ *
+ */
+
+static void node_established_contact(struct node *n_ptr)
+{
+	struct cluster *c_ptr;
+
+	dbg("node_established_contact:-> %x\n", n_ptr->addr);
+	if (!tipc_node_has_active_routes(n_ptr)) { 
+		tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
+	}
+
+        /* Syncronize broadcast acks */
+        n_ptr->bclink.acked = tipc_bclink_get_last_sent();
+
+	if (is_slave(tipc_own_addr))
+		return;
+	if (!in_own_cluster(n_ptr->addr)) {
+		/* Usage case 1 (see above) */
+		c_ptr = tipc_cltr_find(tipc_own_addr);
+		if (!c_ptr)
+			c_ptr = tipc_cltr_create(tipc_own_addr);
+                if (c_ptr)
+                        tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1, 
+						  tipc_max_nodes);
+		return;
+	} 
+
+	c_ptr = n_ptr->owner;
+	if (is_slave(n_ptr->addr)) {
+		/* Usage case 2 (see above) */
+		tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1, tipc_max_nodes);
+		tipc_cltr_send_local_routes(c_ptr, n_ptr->addr);
+		return;
+	}
+
+	if (n_ptr->bclink.supported) {
+		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		if (n_ptr->addr < tipc_own_addr)
+			tipc_own_tag++;
+	}
+
+	/* Case 3 (see above) */
+	tipc_net_send_external_routes(n_ptr->addr);
+	tipc_cltr_send_slave_routes(c_ptr, n_ptr->addr);
+	tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, LOWEST_SLAVE,
+				  tipc_highest_allowed_slave);
+}
+
+static void node_lost_contact(struct node *n_ptr)
+{
+	struct cluster *c_ptr;
+	struct node_subscr *ns, *tns;
+	char addr_string[16];
+	u32 i;
+
+        /* Clean up broadcast reception remains */
+        n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0;
+        while (n_ptr->bclink.deferred_head) {
+                struct sk_buff* buf = n_ptr->bclink.deferred_head;
+                n_ptr->bclink.deferred_head = buf->next;
+                buf_discard(buf);
+        }
+        if (n_ptr->bclink.defragm) {
+                buf_discard(n_ptr->bclink.defragm);  
+                n_ptr->bclink.defragm = NULL;
+        }            
+        if (in_own_cluster(n_ptr->addr) && n_ptr->bclink.supported) { 
+                tipc_bclink_acknowledge(n_ptr, mod(n_ptr->bclink.acked + 10000));
+        }
+
+        /* Update routing tables */
+	if (is_slave(tipc_own_addr)) {
+		tipc_net_remove_as_router(n_ptr->addr);
+	} else {
+		if (!in_own_cluster(n_ptr->addr)) { 
+			/* Case 4 (see above) */
+			c_ptr = tipc_cltr_find(tipc_own_addr);
+			tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
+						   tipc_max_nodes);
+		} else {
+			/* Case 5 (see above) */
+			c_ptr = tipc_cltr_find(n_ptr->addr);
+			if (is_slave(n_ptr->addr)) {
+				tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
+							   tipc_max_nodes);
+			} else {
+				if (n_ptr->bclink.supported) {
+					tipc_nmap_remove(&tipc_cltr_bcast_nodes, 
+							 n_ptr->addr);
+					if (n_ptr->addr < tipc_own_addr)
+						tipc_own_tag--;
+				}
+				tipc_net_remove_as_router(n_ptr->addr);
+				tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr,
+							   LOWEST_SLAVE,
+							   tipc_highest_allowed_slave);
+			}
+		}
+	}
+	if (tipc_node_has_active_routes(n_ptr))
+		return;
+
+	info("Lost contact with %s\n", 
+	     addr_string_fill(addr_string, n_ptr->addr));
+
+	/* Abort link changeover */
+	for (i = 0; i < MAX_BEARERS; i++) {
+		struct link *l_ptr = n_ptr->links[i];
+		if (!l_ptr) 
+			continue;
+		l_ptr->reset_checkpoint = l_ptr->next_in_no;
+		l_ptr->exp_msg_count = 0;
+		tipc_link_reset_fragments(l_ptr);
+	}
+
+	/* Notify subscribers */
+	list_for_each_entry_safe(ns, tns, &n_ptr->nsub, nodesub_list) {
+                ns->node = 0;
+		list_del_init(&ns->nodesub_list);
+		tipc_k_signal((Handler)ns->handle_node_down,
+			      (unsigned long)ns->usr_handle);
+	}
+}
+
+/**
+ * tipc_node_select_next_hop - find the next-hop node for a message
+ * 
+ * Called by when cluster local lookup has failed.
+ */
+
+struct node *tipc_node_select_next_hop(u32 addr, u32 selector)
+{
+	struct node *n_ptr;
+	u32 router_addr;
+
+        if (!tipc_addr_domain_valid(addr))
+                return 0;
+
+	/* Look for direct link to destination processsor */
+	n_ptr = tipc_node_find(addr);
+	if (n_ptr && tipc_node_has_active_links(n_ptr))
+                return n_ptr;
+
+	/* Cluster local system nodes *must* have direct links */
+	if (!is_slave(addr) && in_own_cluster(addr))
+		return 0;
+
+	/* Look for cluster local router with direct link to node */
+	router_addr = tipc_node_select_router(n_ptr, selector);
+	if (router_addr) 
+                return tipc_node_select(router_addr, selector);
+
+	/* Slave nodes can only be accessed within own cluster via a 
+	   known router with direct link -- if no router was found,give up */
+	if (is_slave(addr))
+		return 0;
+
+	/* Inter zone/cluster -- find any direct link to remote cluster */
+	addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
+	n_ptr = tipc_net_select_remote_node(addr, selector);
+	if (n_ptr && tipc_node_has_active_links(n_ptr))
+                return n_ptr;
+
+	/* Last resort -- look for any router to anywhere in remote zone */
+	router_addr =  tipc_net_select_router(addr, selector);
+	if (router_addr) 
+                return tipc_node_select(router_addr, selector);
+
+        return 0;
+}
+
+/**
+ * tipc_node_select_router - select router to reach specified node
+ * 
+ * Uses a deterministic and fair algorithm for selecting router node. 
+ */
+
+u32 tipc_node_select_router(struct node *n_ptr, u32 ref)
+{
+	u32 ulim;
+	u32 mask;
+	u32 start;
+	u32 r;
+
+        if (!n_ptr)
+                return 0;
+
+	if (n_ptr->last_router < 0)
+		return 0;
+	ulim = ((n_ptr->last_router + 1) * 32) - 1;
+
+	/* Start entry must be random */
+	mask = tipc_max_nodes;
+	while (mask > ulim)
+		mask >>= 1;
+	start = ref & mask;
+	r = start;
+
+	/* Lookup upwards with wrap-around */
+	do {
+		if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1)
+			break;
+	} while (++r <= ulim);
+	if (r > ulim) {
+		r = 1;
+		do {
+			if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1)
+				break;
+		} while (++r < start);
+		assert(r != start);
+	}
+	assert(r && (r <= ulim));
+	return tipc_addr(own_zone(), own_cluster(), r);
+}
+
+void tipc_node_add_router(struct node *n_ptr, u32 router)
+{
+	u32 r_num = tipc_node(router);
+
+	n_ptr->routers[r_num / 32] = 
+		((1 << (r_num % 32)) | n_ptr->routers[r_num / 32]);
+	n_ptr->last_router = tipc_max_nodes / 32;
+	while ((--n_ptr->last_router >= 0) && 
+	       !n_ptr->routers[n_ptr->last_router]);
+}
+
+void tipc_node_remove_router(struct node *n_ptr, u32 router)
+{
+	u32 r_num = tipc_node(router);
+
+	if (n_ptr->last_router < 0)
+		return;		/* No routes */
+
+	n_ptr->routers[r_num / 32] =
+		((~(1 << (r_num % 32))) & (n_ptr->routers[r_num / 32]));
+	n_ptr->last_router = tipc_max_nodes / 32;
+	while ((--n_ptr->last_router >= 0) && 
+	       !n_ptr->routers[n_ptr->last_router]);
+
+	if (!tipc_node_is_up(n_ptr))
+		node_lost_contact(n_ptr);
+}
+
+#if 0
+void node_print(struct print_buf *buf, struct node *n_ptr, char *str)
+{
+	u32 i;
+
+	tipc_printf(buf, "\n\n%s", str);
+	for (i = 0; i < MAX_BEARERS; i++) {
+		if (!n_ptr->links[i]) 
+			continue;
+		tipc_printf(buf, "Links[%u]: %x, ", i, n_ptr->links[i]);
+	}
+	tipc_printf(buf, "Active links: [%x,%x]\n",
+		    n_ptr->active_links[0], n_ptr->active_links[1]);
+}
+#endif
+
+u32 tipc_available_nodes(const u32 domain)
+{
+	struct node *n_ptr;
+	u32 cnt = 0;
+
+	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+		if (!in_scope(domain, n_ptr->addr))
+			continue;
+		if (tipc_node_is_up(n_ptr))
+			cnt++;
+	}
+	return cnt;
+}
+
+struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
+{
+	u32 domain;
+	struct sk_buff *buf;
+	struct node *n_ptr;
+        struct tipc_node_info node_info;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	domain = *(u32 *)TLV_DATA(req_tlv_area);
+	domain = ntohl(domain);
+	if (!tipc_addr_domain_valid(domain))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (network address)");
+
+        if (!tipc_nodes)
+                return tipc_cfg_reply_none();
+
+	/* For now, get space for all other nodes 
+	   (will need to modify this when slave nodes are supported */
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(node_info)) *
+				   (tipc_max_nodes - 1));
+	if (!buf)
+		return NULL;
+
+	/* Add TLVs for all nodes in scope */
+
+	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+		if (!in_scope(domain, n_ptr->addr))
+			continue;
+                node_info.addr = htonl(n_ptr->addr);
+                node_info.up = htonl(tipc_node_is_up(n_ptr));
+		tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO, 
+				    &node_info, sizeof(node_info));
+	}
+
+	return buf;
+}
+
+struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
+{
+	u32 domain;
+	struct sk_buff *buf;
+	struct node *n_ptr;
+        struct tipc_link_info link_info;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	domain = *(u32 *)TLV_DATA(req_tlv_area);
+	domain = ntohl(domain);
+	if (!tipc_addr_domain_valid(domain))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (network address)");
+
+        if (!tipc_nodes)
+                return tipc_cfg_reply_none();
+
+	/* For now, get space for 2 links to all other nodes + bcast link 
+	   (will need to modify this when slave nodes are supported */
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(link_info)) *
+				   (2 * (tipc_max_nodes - 1) + 1));
+	if (!buf)
+		return NULL;
+
+	/* Add TLV for broadcast link */
+
+        link_info.dest = tipc_own_addr & 0xfffff00;
+	link_info.dest = htonl(link_info.dest);
+        link_info.up = htonl(1);
+        sprintf(link_info.str, tipc_bclink_name);
+	tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
+
+	/* Add TLVs for any other links in scope */
+
+	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+                u32 i;
+
+		if (!in_scope(domain, n_ptr->addr))
+			continue;
+                for (i = 0; i < MAX_BEARERS; i++) {
+                        if (!n_ptr->links[i]) 
+                                continue;
+                        link_info.dest = htonl(n_ptr->addr);
+                        link_info.up = htonl(tipc_link_is_up(n_ptr->links[i]));
+                        strcpy(link_info.str, n_ptr->links[i]->name);
+			tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, 
+					    &link_info, sizeof(link_info));
+                }
+	}
+
+	return buf;
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
new file mode 100644
index 00000000000..29f7ae6992d
--- /dev/null
+++ b/net/tipc/node.h
@@ -0,0 +1,144 @@
+/*
+ * net/tipc/node.h: Include file for TIPC node management routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_NODE_H
+#define _TIPC_NODE_H
+
+#include "node_subscr.h"
+#include "addr.h"
+#include "cluster.h"
+#include "bearer.h"
+
+/**
+ * struct node - TIPC node structure
+ * @addr: network address of node
+ * @lock: spinlock governing access to structure
+ * @owner: pointer to cluster that node belongs to
+ * @next: pointer to next node in sorted list of cluster's nodes
+ * @nsub: list of "node down" subscriptions monitoring node
+ * @active_links: pointers to active links to node
+ * @links: pointers to all links to node
+ * @link_cnt: number of links to node
+ * @permit_changeover: non-zero if node has redundant links to this system
+ * @routers: bitmap (used for multicluster communication)
+ * @last_router: (used for multicluster communication)
+ * @bclink: broadcast-related info
+ *    @supported: non-zero if node supports TIPC b'cast capability
+ *    @acked: sequence # of last outbound b'cast message acknowledged by node
+ *    @last_in: sequence # of last in-sequence b'cast message received from node
+ *    @gap_after: sequence # of last message not requiring a NAK request
+ *    @gap_to: sequence # of last message requiring a NAK request
+ *    @nack_sync: counter that determines when NAK requests should be sent
+ *    @deferred_head: oldest OOS b'cast message received from node
+ *    @deferred_tail: newest OOS b'cast message received from node
+ *    @defragm: list of partially reassembled b'cast message fragments from node
+ */
+ 
+struct node {
+	u32 addr;
+	spinlock_t lock;
+	struct cluster *owner;
+	struct node *next;
+	struct list_head nsub;
+	struct link *active_links[2];
+	struct link *links[MAX_BEARERS];
+	int link_cnt;
+	int permit_changeover;
+	u32 routers[512/32];
+	int last_router;
+	struct {
+		int supported;
+		u32 acked;
+		u32 last_in;
+		u32 gap_after; 
+		u32 gap_to; 
+		u32 nack_sync;
+		struct sk_buff *deferred_head;
+		struct sk_buff *deferred_tail;
+		struct sk_buff *defragm;
+	} bclink;
+};
+
+extern struct node *tipc_nodes;
+extern u32 tipc_own_tag;
+
+struct node *tipc_node_create(u32 addr);
+void tipc_node_delete(struct node *n_ptr);
+struct node *tipc_node_attach_link(struct link *l_ptr);
+void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr);
+void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr);
+void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr);
+int tipc_node_has_active_links(struct node *n_ptr);
+int tipc_node_has_redundant_links(struct node *n_ptr);
+u32 tipc_node_select_router(struct node *n_ptr, u32 ref);
+struct node *tipc_node_select_next_hop(u32 addr, u32 selector);
+int tipc_node_is_up(struct node *n_ptr);
+void tipc_node_add_router(struct node *n_ptr, u32 router);
+void tipc_node_remove_router(struct node *n_ptr, u32 router);
+struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
+struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
+
+static inline struct node *tipc_node_find(u32 addr)
+{
+	if (likely(in_own_cluster(addr)))
+		return tipc_local_nodes[tipc_node(addr)];
+	else if (tipc_addr_domain_valid(addr)) {
+		struct cluster *c_ptr = tipc_cltr_find(addr);
+
+		if (c_ptr)
+			return c_ptr->nodes[tipc_node(addr)];
+	}
+	return 0;
+}
+
+static inline struct node *tipc_node_select(u32 addr, u32 selector)
+{
+	if (likely(in_own_cluster(addr)))
+		return tipc_local_nodes[tipc_node(addr)];
+	return tipc_node_select_next_hop(addr, selector);
+}
+
+static inline void tipc_node_lock(struct node *n_ptr)
+{
+	spin_lock_bh(&n_ptr->lock);
+}
+
+static inline void tipc_node_unlock(struct node *n_ptr)
+{
+	spin_unlock_bh(&n_ptr->lock);
+}
+
+#endif
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
new file mode 100644
index 00000000000..afeea121d8b
--- /dev/null
+++ b/net/tipc/node_subscr.c
@@ -0,0 +1,79 @@
+/*
+ * net/tipc/node_subscr.c: TIPC "node down" subscription handling
+ * 
+ * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "dbg.h"
+#include "node_subscr.h"
+#include "node.h"
+#include "addr.h"
+
+/**
+ * tipc_nodesub_subscribe - create "node down" subscription for specified node
+ */
+
+void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, 
+		       void *usr_handle, net_ev_handler handle_down)
+{
+	node_sub->node = 0;
+	if (addr == tipc_own_addr)
+		return;
+	if (!tipc_addr_node_valid(addr)) {
+		warn("node_subscr with illegal %x\n", addr);
+		return;
+	}
+
+	node_sub->handle_node_down = handle_down;
+	node_sub->usr_handle = usr_handle;
+	node_sub->node = tipc_node_find(addr);
+	assert(node_sub->node);
+	tipc_node_lock(node_sub->node);
+	list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub);
+	tipc_node_unlock(node_sub->node);
+}
+
+/**
+ * tipc_nodesub_unsubscribe - cancel "node down" subscription (if any)
+ */
+
+void tipc_nodesub_unsubscribe(struct node_subscr *node_sub)
+{
+	if (!node_sub->node)
+		return;
+
+	tipc_node_lock(node_sub->node);
+	list_del_init(&node_sub->nodesub_list);
+	tipc_node_unlock(node_sub->node);
+}
diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h
new file mode 100644
index 00000000000..01751c4fbb4
--- /dev/null
+++ b/net/tipc/node_subscr.h
@@ -0,0 +1,63 @@
+/*
+ * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling
+ * 
+ * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_NODE_SUBSCR_H
+#define _TIPC_NODE_SUBSCR_H
+
+#include "addr.h"
+
+typedef void (*net_ev_handler) (void *usr_handle);
+
+/**
+ * struct node_subscr - "node down" subscription entry
+ * @node: ptr to node structure of interest (or NULL, if none)
+ * @handle_node_down: routine to invoke when node fails
+ * @usr_handle: argument to pass to routine when node fails
+ * @nodesub_list: adjacent entries in list of subscriptions for the node
+ */
+
+struct node_subscr {
+	struct node *node;
+	net_ev_handler handle_node_down;
+	void *usr_handle;
+	struct list_head nodesub_list;
+};
+
+void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr,
+			    void *usr_handle, net_ev_handler handle_down);
+void tipc_nodesub_unsubscribe(struct node_subscr *node_sub);
+
+#endif
diff --git a/net/tipc/port.c b/net/tipc/port.c
new file mode 100644
index 00000000000..72aae52bfec
--- /dev/null
+++ b/net/tipc/port.c
@@ -0,0 +1,1708 @@
+/*
+ * net/tipc/port.c: TIPC port code
+ * 
+ * Copyright (c) 1992-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include "dbg.h"
+#include "port.h"
+#include "addr.h"
+#include "link.h"
+#include "node.h"
+#include "port.h"
+#include "name_table.h"
+#include "user_reg.h"
+#include "msg.h"
+#include "bcast.h"
+
+/* Connection management: */
+#define PROBING_INTERVAL 3600000	/* [ms] => 1 h */
+#define CONFIRMED 0
+#define PROBING 1
+
+#define MAX_REJECT_SIZE 1024
+
+static struct sk_buff *msg_queue_head = 0;
+static struct sk_buff *msg_queue_tail = 0;
+
+spinlock_t tipc_port_list_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t queue_lock = SPIN_LOCK_UNLOCKED;
+
+static LIST_HEAD(ports);
+static void port_handle_node_down(unsigned long ref);
+static struct sk_buff* port_build_self_abort_msg(struct port *,u32 err);
+static struct sk_buff* port_build_peer_abort_msg(struct port *,u32 err);
+static void port_timeout(unsigned long ref);
+
+
+static inline u32 port_peernode(struct port *p_ptr)
+{
+	return msg_destnode(&p_ptr->publ.phdr);
+}
+
+static inline u32 port_peerport(struct port *p_ptr)
+{
+	return msg_destport(&p_ptr->publ.phdr);
+}
+
+static inline u32 port_out_seqno(struct port *p_ptr)
+{
+	return msg_transp_seqno(&p_ptr->publ.phdr);
+}
+
+static inline void port_set_out_seqno(struct port *p_ptr, u32 seqno) 
+{
+	msg_set_transp_seqno(&p_ptr->publ.phdr,seqno);
+}
+
+static inline void port_incr_out_seqno(struct port *p_ptr)
+{
+	struct tipc_msg *m = &p_ptr->publ.phdr;
+
+	if (likely(!msg_routed(m)))
+		return;
+	msg_set_transp_seqno(m, (msg_transp_seqno(m) + 1));
+}
+
+/**
+ * tipc_multicast - send a multicast message to local and remote destinations
+ */
+
+int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain,
+		   u32 num_sect, struct iovec const *msg_sect)
+{
+	struct tipc_msg *hdr;
+	struct sk_buff *buf;
+	struct sk_buff *ibuf = NULL;
+	struct port_list dports = {0, NULL, };
+	struct port *oport = tipc_port_deref(ref);
+	int ext_targets;
+	int res;
+
+	if (unlikely(!oport))
+		return -EINVAL;
+
+	/* Create multicast message */
+
+	hdr = &oport->publ.phdr;
+	msg_set_type(hdr, TIPC_MCAST_MSG);
+	msg_set_nametype(hdr, seq->type);
+	msg_set_namelower(hdr, seq->lower);
+	msg_set_nameupper(hdr, seq->upper);
+	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
+	res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
+			!oport->user_port, &buf);
+	if (unlikely(!buf))
+		return res;
+
+	/* Figure out where to send multicast message */
+
+	ext_targets = tipc_nametbl_mc_translate(seq->type, seq->lower, seq->upper,
+						TIPC_NODE_SCOPE, &dports);
+	
+	/* Send message to destinations (duplicate it only if necessary) */ 
+
+	if (ext_targets) {
+		if (dports.count != 0) {
+			ibuf = skb_copy(buf, GFP_ATOMIC);
+			if (ibuf == NULL) {
+				tipc_port_list_free(&dports);
+				buf_discard(buf);
+				return -ENOMEM;
+			}
+		}
+		res = tipc_bclink_send_msg(buf);
+		if ((res < 0) && (dports.count != 0)) {
+			buf_discard(ibuf);
+		}
+	} else {
+		ibuf = buf;
+	}
+
+	if (res >= 0) {
+		if (ibuf)
+			tipc_port_recv_mcast(ibuf, &dports);
+	} else {
+		tipc_port_list_free(&dports);
+	}
+	return res;
+}
+
+/**
+ * tipc_port_recv_mcast - deliver multicast message to all destination ports
+ * 
+ * If there is no port list, perform a lookup to create one
+ */
+
+void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
+{
+	struct tipc_msg* msg;
+	struct port_list dports = {0, NULL, };
+	struct port_list *item = dp;
+	int cnt = 0;
+
+	assert(buf);
+	msg = buf_msg(buf);
+
+	/* Create destination port list, if one wasn't supplied */
+
+	if (dp == NULL) {
+		tipc_nametbl_mc_translate(msg_nametype(msg),
+				     msg_namelower(msg),
+				     msg_nameupper(msg),
+				     TIPC_CLUSTER_SCOPE,
+				     &dports);
+		item = dp = &dports;
+	}
+
+	/* Deliver a copy of message to each destination port */
+
+	if (dp->count != 0) {
+		if (dp->count == 1) {
+			msg_set_destport(msg, dp->ports[0]);
+			tipc_port_recv_msg(buf);
+			tipc_port_list_free(dp);
+			return;
+		}
+		for (; cnt < dp->count; cnt++) {
+			int index = cnt % PLSIZE;
+			struct sk_buff *b = skb_clone(buf, GFP_ATOMIC);
+
+			if (b == NULL) {
+				warn("Buffer allocation failure\n");
+				msg_dbg(msg, "LOST:");
+				goto exit;
+			}
+			if ((index == 0) && (cnt != 0)) {
+				item = item->next;
+			}
+			msg_set_destport(buf_msg(b),item->ports[index]);
+			tipc_port_recv_msg(b);
+		}
+	}
+exit:
+	buf_discard(buf);
+	tipc_port_list_free(dp);
+}
+
+/**
+ * tipc_createport_raw - create a native TIPC port
+ * 
+ * Returns local port reference
+ */
+
+u32 tipc_createport_raw(void *usr_handle,
+			u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
+			void (*wakeup)(struct tipc_port *),
+			const u32 importance)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	u32 ref;
+
+	p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC);
+	if (p_ptr == NULL) {
+		warn("Memory squeeze; failed to create port\n");
+		return 0;
+	}
+	memset(p_ptr, 0, sizeof(*p_ptr));
+	ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock);
+	if (!ref) {
+		warn("Reference Table Exhausted\n");
+		kfree(p_ptr);
+		return 0;
+	}
+
+	tipc_port_lock(ref);
+	p_ptr->publ.ref = ref;
+	msg = &p_ptr->publ.phdr;
+	msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0);
+	msg_set_orignode(msg, tipc_own_addr);
+	msg_set_prevnode(msg, tipc_own_addr);
+	msg_set_origport(msg, ref);
+	msg_set_importance(msg,importance);
+	p_ptr->last_in_seqno = 41;
+	p_ptr->sent = 1;
+	p_ptr->publ.usr_handle = usr_handle;
+	INIT_LIST_HEAD(&p_ptr->wait_list);
+	INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
+	p_ptr->congested_link = 0;
+	p_ptr->max_pkt = MAX_PKT_DEFAULT;
+	p_ptr->dispatcher = dispatcher;
+	p_ptr->wakeup = wakeup;
+	p_ptr->user_port = 0;
+	k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref);
+	spin_lock_bh(&tipc_port_list_lock);
+	INIT_LIST_HEAD(&p_ptr->publications);
+	INIT_LIST_HEAD(&p_ptr->port_list);
+	list_add_tail(&p_ptr->port_list, &ports);
+	spin_unlock_bh(&tipc_port_list_lock);
+	tipc_port_unlock(p_ptr);
+	return ref;
+}
+
+int tipc_deleteport(u32 ref)
+{
+	struct port *p_ptr;
+	struct sk_buff *buf = 0;
+
+	tipc_withdraw(ref, 0, 0);
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr) 
+		return -EINVAL;
+
+	tipc_ref_discard(ref);
+	tipc_port_unlock(p_ptr);
+
+	k_cancel_timer(&p_ptr->timer);
+	if (p_ptr->publ.connected) {
+		buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
+		tipc_nodesub_unsubscribe(&p_ptr->subscription);
+	}
+	if (p_ptr->user_port) {
+		tipc_reg_remove_port(p_ptr->user_port);
+		kfree(p_ptr->user_port);
+	}
+
+	spin_lock_bh(&tipc_port_list_lock);
+	list_del(&p_ptr->port_list);
+	list_del(&p_ptr->wait_list);
+	spin_unlock_bh(&tipc_port_list_lock);
+	k_term_timer(&p_ptr->timer);
+	kfree(p_ptr);
+	dbg("Deleted port %u\n", ref);
+	tipc_net_route_msg(buf);
+	return TIPC_OK;
+}
+
+/**
+ * tipc_get_port() - return port associated with 'ref'
+ * 
+ * Note: Port is not locked.
+ */
+
+struct tipc_port *tipc_get_port(const u32 ref)
+{
+	return (struct tipc_port *)tipc_ref_deref(ref);
+}
+
+/**
+ * tipc_get_handle - return user handle associated to port 'ref'
+ */
+
+void *tipc_get_handle(const u32 ref)
+{
+	struct port *p_ptr;
+	void * handle;
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return 0;
+	handle = p_ptr->publ.usr_handle;
+	tipc_port_unlock(p_ptr);
+	return handle;
+}
+
+static inline int port_unreliable(struct port *p_ptr)
+{
+	return msg_src_droppable(&p_ptr->publ.phdr);
+}
+
+int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
+{
+	struct port *p_ptr;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	*isunreliable = port_unreliable(p_ptr);
+	spin_unlock_bh(p_ptr->publ.lock);
+	return TIPC_OK;
+}
+
+int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
+{
+	struct port *p_ptr;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0));
+	tipc_port_unlock(p_ptr);
+	return TIPC_OK;
+}
+
+static inline int port_unreturnable(struct port *p_ptr)
+{
+	return msg_dest_droppable(&p_ptr->publ.phdr);
+}
+
+int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
+{
+	struct port *p_ptr;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	*isunrejectable = port_unreturnable(p_ptr);
+	spin_unlock_bh(p_ptr->publ.lock);
+	return TIPC_OK;
+}
+
+int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
+{
+	struct port *p_ptr;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0));
+	tipc_port_unlock(p_ptr);
+	return TIPC_OK;
+}
+
+/* 
+ * port_build_proto_msg(): build a port level protocol 
+ * or a connection abortion message. Called with 
+ * tipc_port lock on.
+ */
+static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
+					    u32 origport, u32 orignode,
+					    u32 usr, u32 type, u32 err, 
+					    u32 seqno, u32 ack)
+{
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+	
+	buf = buf_acquire(LONG_H_SIZE);
+	if (buf) {
+		msg = buf_msg(buf);
+		msg_init(msg, usr, type, err, LONG_H_SIZE, destnode);
+		msg_set_destport(msg, destport);
+		msg_set_origport(msg, origport);
+		msg_set_destnode(msg, destnode);
+		msg_set_orignode(msg, orignode);
+		msg_set_transp_seqno(msg, seqno);
+		msg_set_msgcnt(msg, ack);
+		msg_dbg(msg, "PORT>SEND>:");
+	}
+	return buf;
+}
+
+int tipc_set_msg_option(struct tipc_port *tp_ptr, const char *opt, const u32 sz)
+{
+	msg_expand(&tp_ptr->phdr, msg_destnode(&tp_ptr->phdr));
+	msg_set_options(&tp_ptr->phdr, opt, sz);
+	return TIPC_OK;
+}
+
+int tipc_reject_msg(struct sk_buff *buf, u32 err)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	struct sk_buff *rbuf;
+	struct tipc_msg *rmsg;
+	int hdr_sz;
+	u32 imp = msg_importance(msg);
+	u32 data_sz = msg_data_sz(msg);
+
+	if (data_sz > MAX_REJECT_SIZE)
+		data_sz = MAX_REJECT_SIZE;
+	if (msg_connected(msg) && (imp < TIPC_CRITICAL_IMPORTANCE))
+		imp++;
+	msg_dbg(msg, "port->rej: ");
+
+	/* discard rejected message if it shouldn't be returned to sender */
+	if (msg_errcode(msg) || msg_dest_droppable(msg)) {
+		buf_discard(buf);
+		return data_sz;
+	}
+
+	/* construct rejected message */
+	if (msg_mcast(msg))
+		hdr_sz = MCAST_H_SIZE;
+	else
+		hdr_sz = LONG_H_SIZE;
+	rbuf = buf_acquire(data_sz + hdr_sz);
+	if (rbuf == NULL) {
+		buf_discard(buf);
+		return data_sz;
+	}
+	rmsg = buf_msg(rbuf);
+	msg_init(rmsg, imp, msg_type(msg), err, hdr_sz, msg_orignode(msg));
+	msg_set_destport(rmsg, msg_origport(msg));
+	msg_set_prevnode(rmsg, tipc_own_addr);
+	msg_set_origport(rmsg, msg_destport(msg));
+	if (msg_short(msg))
+		msg_set_orignode(rmsg, tipc_own_addr);
+	else
+		msg_set_orignode(rmsg, msg_destnode(msg));
+	msg_set_size(rmsg, data_sz + hdr_sz); 
+	msg_set_nametype(rmsg, msg_nametype(msg));
+	msg_set_nameinst(rmsg, msg_nameinst(msg));
+	memcpy(rbuf->data + hdr_sz, msg_data(msg), data_sz);
+
+	/* send self-abort message when rejecting on a connected port */
+	if (msg_connected(msg)) {
+		struct sk_buff *abuf = 0;
+		struct port *p_ptr = tipc_port_lock(msg_destport(msg));
+
+		if (p_ptr) {
+			if (p_ptr->publ.connected)
+				abuf = port_build_self_abort_msg(p_ptr, err);
+			tipc_port_unlock(p_ptr);
+		}
+		tipc_net_route_msg(abuf);
+	}
+
+	/* send rejected message */
+	buf_discard(buf);
+	tipc_net_route_msg(rbuf);
+	return data_sz;
+}
+
+int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
+			      struct iovec const *msg_sect, u32 num_sect,
+			      int err)
+{
+	struct sk_buff *buf;
+	int res;
+
+	res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE, 
+			!p_ptr->user_port, &buf);
+	if (!buf)
+		return res;
+
+	return tipc_reject_msg(buf, err);
+}
+
+static void port_timeout(unsigned long ref)
+{
+	struct port *p_ptr = tipc_port_lock(ref);
+	struct sk_buff *buf = 0;
+
+	if (!p_ptr || !p_ptr->publ.connected)
+		return;
+
+	/* Last probe answered ? */
+	if (p_ptr->probing_state == PROBING) {
+		buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
+	} else {
+		buf = port_build_proto_msg(port_peerport(p_ptr),
+					   port_peernode(p_ptr),
+					   p_ptr->publ.ref,
+					   tipc_own_addr,
+					   CONN_MANAGER,
+					   CONN_PROBE,
+					   TIPC_OK, 
+					   port_out_seqno(p_ptr),
+					   0);
+		port_incr_out_seqno(p_ptr);
+		p_ptr->probing_state = PROBING;
+		k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
+	}
+	tipc_port_unlock(p_ptr);
+	tipc_net_route_msg(buf);
+}
+
+
+static void port_handle_node_down(unsigned long ref)
+{
+	struct port *p_ptr = tipc_port_lock(ref);
+	struct sk_buff* buf = 0;
+
+	if (!p_ptr)
+		return;
+	buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE);
+	tipc_port_unlock(p_ptr);
+	tipc_net_route_msg(buf);
+}
+
+
+static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err)
+{
+	u32 imp = msg_importance(&p_ptr->publ.phdr);
+
+	if (!p_ptr->publ.connected)
+		return 0;
+	if (imp < TIPC_CRITICAL_IMPORTANCE)
+		imp++;
+	return port_build_proto_msg(p_ptr->publ.ref,
+				    tipc_own_addr,
+				    port_peerport(p_ptr),
+				    port_peernode(p_ptr),
+				    imp,
+				    TIPC_CONN_MSG,
+				    err, 
+				    p_ptr->last_in_seqno + 1,
+				    0);
+}
+
+
+static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err)
+{
+	u32 imp = msg_importance(&p_ptr->publ.phdr);
+
+	if (!p_ptr->publ.connected)
+		return 0;
+	if (imp < TIPC_CRITICAL_IMPORTANCE)
+		imp++;
+	return port_build_proto_msg(port_peerport(p_ptr),
+				    port_peernode(p_ptr),
+				    p_ptr->publ.ref,
+				    tipc_own_addr,
+				    imp,
+				    TIPC_CONN_MSG,
+				    err, 
+				    port_out_seqno(p_ptr),
+				    0);
+}
+
+void tipc_port_recv_proto_msg(struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	struct port *p_ptr = tipc_port_lock(msg_destport(msg));
+	u32 err = TIPC_OK;
+	struct sk_buff *r_buf = 0;
+	struct sk_buff *abort_buf = 0;
+
+	msg_dbg(msg, "PORT<RECV<:");
+
+	if (!p_ptr) {
+		err = TIPC_ERR_NO_PORT;
+	} else if (p_ptr->publ.connected) {
+		if (port_peernode(p_ptr) != msg_orignode(msg))
+			err = TIPC_ERR_NO_PORT;
+		if (port_peerport(p_ptr) != msg_origport(msg))
+			err = TIPC_ERR_NO_PORT;
+		if (!err && msg_routed(msg)) {
+			u32 seqno = msg_transp_seqno(msg);
+			u32 myno =  ++p_ptr->last_in_seqno;
+			if (seqno != myno) {
+				err = TIPC_ERR_NO_PORT;
+				abort_buf = port_build_self_abort_msg(p_ptr, err);
+			}
+		}
+		if (msg_type(msg) == CONN_ACK) {
+			int wakeup = tipc_port_congested(p_ptr) && 
+				     p_ptr->publ.congested &&
+				     p_ptr->wakeup;
+			p_ptr->acked += msg_msgcnt(msg);
+			if (tipc_port_congested(p_ptr))
+				goto exit;
+			p_ptr->publ.congested = 0;
+			if (!wakeup)
+				goto exit;
+			p_ptr->wakeup(&p_ptr->publ);
+			goto exit;
+		}
+	} else if (p_ptr->publ.published) {
+		err = TIPC_ERR_NO_PORT;
+	}
+	if (err) {
+		r_buf = port_build_proto_msg(msg_origport(msg),
+					     msg_orignode(msg), 
+					     msg_destport(msg), 
+					     tipc_own_addr,
+					     DATA_HIGH,
+					     TIPC_CONN_MSG,
+					     err,
+					     0,
+					     0);
+		goto exit;
+	}
+
+	/* All is fine */
+	if (msg_type(msg) == CONN_PROBE) {
+		r_buf = port_build_proto_msg(msg_origport(msg), 
+					     msg_orignode(msg), 
+					     msg_destport(msg), 
+					     tipc_own_addr, 
+					     CONN_MANAGER,
+					     CONN_PROBE_REPLY,
+					     TIPC_OK,
+					     port_out_seqno(p_ptr),
+					     0);
+	}
+	p_ptr->probing_state = CONFIRMED;
+	port_incr_out_seqno(p_ptr);
+exit:
+	if (p_ptr)
+		tipc_port_unlock(p_ptr);
+	tipc_net_route_msg(r_buf);
+	tipc_net_route_msg(abort_buf);
+	buf_discard(buf);
+}
+
+static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id)
+{
+        struct publication *publ;
+
+	if (full_id)
+		tipc_printf(buf, "<%u.%u.%u:%u>:", 
+			    tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
+                            tipc_node(tipc_own_addr), p_ptr->publ.ref);
+	else
+		tipc_printf(buf, "%-10u:", p_ptr->publ.ref);
+
+        if (p_ptr->publ.connected) {
+                u32 dport = port_peerport(p_ptr);
+                u32 destnode = port_peernode(p_ptr);
+
+                tipc_printf(buf, " connected to <%u.%u.%u:%u>",
+                            tipc_zone(destnode), tipc_cluster(destnode),
+                            tipc_node(destnode), dport);
+                if (p_ptr->publ.conn_type != 0)
+                        tipc_printf(buf, " via {%u,%u}",
+                                    p_ptr->publ.conn_type,
+                                    p_ptr->publ.conn_instance);
+        }
+        else if (p_ptr->publ.published) {
+                tipc_printf(buf, " bound to");
+                list_for_each_entry(publ, &p_ptr->publications, pport_list) {
+			if (publ->lower == publ->upper)
+				tipc_printf(buf, " {%u,%u}", publ->type,
+					    publ->lower);
+			else
+				tipc_printf(buf, " {%u,%u,%u}", publ->type, 
+					    publ->lower, publ->upper);
+                }
+        }
+        tipc_printf(buf, "\n");
+}
+
+#define MAX_PORT_QUERY 32768
+
+struct sk_buff *tipc_port_get_ports(void)
+{
+	struct sk_buff *buf;
+	struct tlv_desc *rep_tlv;
+	struct print_buf pb;
+	struct port *p_ptr;
+	int str_len;
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY));
+	if (!buf)
+		return NULL;
+	rep_tlv = (struct tlv_desc *)buf->data;
+
+	tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY);
+	spin_lock_bh(&tipc_port_list_lock);
+	list_for_each_entry(p_ptr, &ports, port_list) {
+		spin_lock_bh(p_ptr->publ.lock);
+		port_print(p_ptr, &pb, 0);
+		spin_unlock_bh(p_ptr->publ.lock);
+	}
+	spin_unlock_bh(&tipc_port_list_lock);
+	str_len = tipc_printbuf_validate(&pb);
+
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
+#if 0
+
+#define MAX_PORT_STATS 2000
+
+struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space)
+{
+	u32 ref;
+	struct port *p_ptr;
+	struct sk_buff *buf;
+	struct tlv_desc *rep_tlv;
+	struct print_buf pb;
+	int str_len;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_PORT_REF))
+		return cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	ref = *(u32 *)TLV_DATA(req_tlv_area);
+	ref = ntohl(ref);
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return cfg_reply_error_string("port not found");
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_STATS));
+	if (!buf) {
+		tipc_port_unlock(p_ptr);
+		return NULL;
+	}
+	rep_tlv = (struct tlv_desc *)buf->data;
+
+	tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_STATS);
+	port_print(p_ptr, &pb, 1);
+	/* NEED TO FILL IN ADDITIONAL PORT STATISTICS HERE */
+	tipc_port_unlock(p_ptr);
+	str_len = tipc_printbuf_validate(&pb);
+
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
+#endif
+
+void tipc_port_reinit(void)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+
+	spin_lock_bh(&tipc_port_list_lock);
+	list_for_each_entry(p_ptr, &ports, port_list) {
+		msg = &p_ptr->publ.phdr;
+		if (msg_orignode(msg) == tipc_own_addr)
+			break;
+		msg_set_orignode(msg, tipc_own_addr);
+	}
+	spin_unlock_bh(&tipc_port_list_lock);
+}
+
+
+/*
+ *  port_dispatcher_sigh(): Signal handler for messages destinated
+ *                          to the tipc_port interface.
+ */
+
+static void port_dispatcher_sigh(void *dummy)
+{
+	struct sk_buff *buf;
+
+	spin_lock_bh(&queue_lock);
+	buf = msg_queue_head;
+	msg_queue_head = 0;
+	spin_unlock_bh(&queue_lock);
+
+	while (buf) {
+		struct port *p_ptr;
+		struct user_port *up_ptr;
+		struct tipc_portid orig;
+		struct tipc_name_seq dseq;
+		void *usr_handle;
+		int connected;
+		int published;
+
+		struct sk_buff *next = buf->next;
+		struct tipc_msg *msg = buf_msg(buf);
+		u32 dref = msg_destport(msg);
+		
+		p_ptr = tipc_port_lock(dref);
+		if (!p_ptr) {
+			/* Port deleted while msg in queue */
+			tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+			buf = next;
+			continue;
+		}
+		orig.ref = msg_origport(msg);
+		orig.node = msg_orignode(msg);
+		up_ptr = p_ptr->user_port;
+		usr_handle = up_ptr->usr_handle;
+		connected = p_ptr->publ.connected;
+		published = p_ptr->publ.published;
+
+		if (unlikely(msg_errcode(msg)))
+			goto err;
+
+		switch (msg_type(msg)) {
+		
+		case TIPC_CONN_MSG:{
+				tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
+				u32 peer_port = port_peerport(p_ptr);
+				u32 peer_node = port_peernode(p_ptr);
+
+				spin_unlock_bh(p_ptr->publ.lock);
+				if (unlikely(!connected)) {
+					if (unlikely(published))
+						goto reject;
+					tipc_connect2port(dref,&orig);
+				}
+				if (unlikely(msg_origport(msg) != peer_port))
+					goto reject;
+				if (unlikely(msg_orignode(msg) != peer_node))
+					goto reject;
+				if (unlikely(!cb))
+					goto reject;
+				if (unlikely(++p_ptr->publ.conn_unacked >= 
+					     TIPC_FLOW_CONTROL_WIN))
+					tipc_acknowledge(dref, 
+							 p_ptr->publ.conn_unacked);
+				skb_pull(buf, msg_hdr_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg),
+				   msg_data_sz(msg));
+				break;
+			}
+		case TIPC_DIRECT_MSG:{
+				tipc_msg_event cb = up_ptr->msg_cb;
+
+				spin_unlock_bh(p_ptr->publ.lock);
+				if (unlikely(connected))
+					goto reject;
+				if (unlikely(!cb))
+					goto reject;
+				skb_pull(buf, msg_hdr_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg), 
+				   msg_data_sz(msg), msg_importance(msg),
+				   &orig);
+				break;
+			}
+		case TIPC_NAMED_MSG:{
+				tipc_named_msg_event cb = up_ptr->named_msg_cb;
+
+				spin_unlock_bh(p_ptr->publ.lock);
+				if (unlikely(connected))
+					goto reject;
+				if (unlikely(!cb))
+					goto reject;
+				if (unlikely(!published))
+					goto reject;
+				dseq.type =  msg_nametype(msg);
+				dseq.lower = msg_nameinst(msg);
+				dseq.upper = dseq.lower;
+				skb_pull(buf, msg_hdr_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg), 
+				   msg_data_sz(msg), msg_importance(msg),
+				   &orig, &dseq);
+				break;
+			}
+		}
+		if (buf)
+			buf_discard(buf);
+		buf = next;
+		continue;
+err:
+		switch (msg_type(msg)) {
+		
+		case TIPC_CONN_MSG:{
+				tipc_conn_shutdown_event cb = 
+					up_ptr->conn_err_cb;
+				u32 peer_port = port_peerport(p_ptr);
+				u32 peer_node = port_peernode(p_ptr);
+
+				spin_unlock_bh(p_ptr->publ.lock);
+				if (!connected || !cb)
+					break;
+				if (msg_origport(msg) != peer_port)
+					break;
+				if (msg_orignode(msg) != peer_node)
+					break;
+				tipc_disconnect(dref);
+				skb_pull(buf, msg_hdr_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg),
+				   msg_data_sz(msg), msg_errcode(msg));
+				break;
+			}
+		case TIPC_DIRECT_MSG:{
+				tipc_msg_err_event cb = up_ptr->err_cb;
+
+				spin_unlock_bh(p_ptr->publ.lock);
+				if (connected || !cb)
+					break;
+				skb_pull(buf, msg_hdr_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg),
+				   msg_data_sz(msg), msg_errcode(msg), &orig);
+				break;
+			}
+		case TIPC_NAMED_MSG:{
+				tipc_named_msg_err_event cb = 
+					up_ptr->named_err_cb;
+
+				spin_unlock_bh(p_ptr->publ.lock);
+				if (connected || !cb)
+					break;
+				dseq.type =  msg_nametype(msg);
+				dseq.lower = msg_nameinst(msg);
+				dseq.upper = dseq.lower;
+				skb_pull(buf, msg_hdr_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg), 
+				   msg_data_sz(msg), msg_errcode(msg), &dseq);
+				break;
+			}
+		}
+		if (buf)
+			buf_discard(buf);
+		buf = next;
+		continue;
+reject:
+		tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+		buf = next;
+	}
+}
+
+/*
+ *  port_dispatcher(): Dispatcher for messages destinated
+ *  to the tipc_port interface. Called with port locked.
+ */
+
+static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf)
+{
+	buf->next = NULL;
+	spin_lock_bh(&queue_lock);
+	if (msg_queue_head) {
+		msg_queue_tail->next = buf;
+		msg_queue_tail = buf;
+	} else {
+		msg_queue_tail = msg_queue_head = buf;
+		tipc_k_signal((Handler)port_dispatcher_sigh, 0);
+	}
+	spin_unlock_bh(&queue_lock);
+	return TIPC_OK;
+}
+
+/* 
+ * Wake up port after congestion: Called with port locked,
+ *                                
+ */
+
+static void port_wakeup_sh(unsigned long ref)
+{
+	struct port *p_ptr;
+	struct user_port *up_ptr;
+	tipc_continue_event cb = 0;
+	void *uh = 0;
+
+	p_ptr = tipc_port_lock(ref);
+	if (p_ptr) {
+		up_ptr = p_ptr->user_port;
+		if (up_ptr) {
+			cb = up_ptr->continue_event_cb;
+			uh = up_ptr->usr_handle;
+		}
+		tipc_port_unlock(p_ptr);
+	}
+	if (cb)
+		cb(uh, ref);
+}
+
+
+static void port_wakeup(struct tipc_port *p_ptr)
+{
+	tipc_k_signal((Handler)port_wakeup_sh, p_ptr->ref);
+}
+
+void tipc_acknowledge(u32 ref, u32 ack)
+{
+	struct port *p_ptr;
+	struct sk_buff *buf = 0;
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return;
+	if (p_ptr->publ.connected) {
+		p_ptr->publ.conn_unacked -= ack;
+		buf = port_build_proto_msg(port_peerport(p_ptr),
+					   port_peernode(p_ptr),
+					   ref,
+					   tipc_own_addr,
+					   CONN_MANAGER,
+					   CONN_ACK,
+					   TIPC_OK, 
+					   port_out_seqno(p_ptr),
+					   ack);
+	}
+	tipc_port_unlock(p_ptr);
+	tipc_net_route_msg(buf);
+}
+
+/*
+ * tipc_createport(): user level call. Will add port to
+ *                    registry if non-zero user_ref.
+ */
+
+int tipc_createport(u32 user_ref, 
+		    void *usr_handle, 
+		    unsigned int importance, 
+		    tipc_msg_err_event error_cb, 
+		    tipc_named_msg_err_event named_error_cb, 
+		    tipc_conn_shutdown_event conn_error_cb, 
+		    tipc_msg_event msg_cb, 
+		    tipc_named_msg_event named_msg_cb, 
+		    tipc_conn_msg_event conn_msg_cb, 
+		    tipc_continue_event continue_event_cb,/* May be zero */
+		    u32 *portref)
+{
+	struct user_port *up_ptr;
+	struct port *p_ptr; 
+	u32 ref;
+
+	up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
+	if (up_ptr == NULL) {
+		return -ENOMEM;
+	}
+	ref = tipc_createport_raw(0, port_dispatcher, port_wakeup, importance);
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr) {
+		kfree(up_ptr);
+		return -ENOMEM;
+	}
+
+	p_ptr->user_port = up_ptr;
+	up_ptr->user_ref = user_ref;
+	up_ptr->usr_handle = usr_handle;
+	up_ptr->ref = p_ptr->publ.ref;
+	up_ptr->err_cb = error_cb;
+	up_ptr->named_err_cb = named_error_cb;
+	up_ptr->conn_err_cb = conn_error_cb;
+	up_ptr->msg_cb = msg_cb;
+	up_ptr->named_msg_cb = named_msg_cb;
+	up_ptr->conn_msg_cb = conn_msg_cb;
+	up_ptr->continue_event_cb = continue_event_cb;
+	INIT_LIST_HEAD(&up_ptr->uport_list);
+	tipc_reg_add_port(up_ptr);
+	*portref = p_ptr->publ.ref;
+	dbg(" tipc_createport: %x with ref %u\n", p_ptr, p_ptr->publ.ref);        
+	tipc_port_unlock(p_ptr);
+	return TIPC_OK;
+}
+
+int tipc_ownidentity(u32 ref, struct tipc_portid *id)
+{
+	id->ref = ref;
+	id->node = tipc_own_addr;
+	return TIPC_OK;
+}
+
+int tipc_portimportance(u32 ref, unsigned int *importance)
+{
+	struct port *p_ptr;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	*importance = (unsigned int)msg_importance(&p_ptr->publ.phdr);
+	spin_unlock_bh(p_ptr->publ.lock);
+	return TIPC_OK;
+}
+
+int tipc_set_portimportance(u32 ref, unsigned int imp)
+{
+	struct port *p_ptr;
+
+	if (imp > TIPC_CRITICAL_IMPORTANCE)
+		return -EINVAL;
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	msg_set_importance(&p_ptr->publ.phdr, (u32)imp);
+	spin_unlock_bh(p_ptr->publ.lock);
+	return TIPC_OK;
+}
+
+
+int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
+{
+	struct port *p_ptr;
+	struct publication *publ;
+	u32 key;
+	int res = -EINVAL;
+
+	p_ptr = tipc_port_lock(ref);
+	dbg("tipc_publ %u, p_ptr = %x, conn = %x, scope = %x, "
+	    "lower = %u, upper = %u\n",
+	    ref, p_ptr, p_ptr->publ.connected, scope, seq->lower, seq->upper);
+	if (!p_ptr)
+		return -EINVAL;
+	if (p_ptr->publ.connected)
+		goto exit;
+	if (seq->lower > seq->upper)
+		goto exit;
+	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE))
+		goto exit;
+	key = ref + p_ptr->pub_count + 1;
+	if (key == ref) {
+		res = -EADDRINUSE;
+		goto exit;
+	}
+	publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
+				    scope, p_ptr->publ.ref, key);
+	if (publ) {
+		list_add(&publ->pport_list, &p_ptr->publications);
+		p_ptr->pub_count++;
+		p_ptr->publ.published = 1;
+		res = TIPC_OK;
+	}
+exit:
+	tipc_port_unlock(p_ptr);
+	return res;
+}
+
+int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
+{
+	struct port *p_ptr;
+	struct publication *publ;
+	struct publication *tpubl;
+	int res = -EINVAL;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	if (!p_ptr->publ.published)
+		goto exit;
+	if (!seq) {
+		list_for_each_entry_safe(publ, tpubl, 
+					 &p_ptr->publications, pport_list) {
+			tipc_nametbl_withdraw(publ->type, publ->lower, 
+					      publ->ref, publ->key);
+		}
+		res = TIPC_OK;
+	} else {
+		list_for_each_entry_safe(publ, tpubl, 
+					 &p_ptr->publications, pport_list) {
+			if (publ->scope != scope)
+				continue;
+			if (publ->type != seq->type)
+				continue;
+			if (publ->lower != seq->lower)
+				continue;
+			if (publ->upper != seq->upper)
+				break;
+			tipc_nametbl_withdraw(publ->type, publ->lower, 
+					      publ->ref, publ->key);
+			res = TIPC_OK;
+			break;
+		}
+	}
+	if (list_empty(&p_ptr->publications))
+		p_ptr->publ.published = 0;
+exit:
+	tipc_port_unlock(p_ptr);
+	return res;
+}
+
+int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	int res = -EINVAL;
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	if (p_ptr->publ.published || p_ptr->publ.connected)
+		goto exit;
+	if (!peer->ref)
+		goto exit;
+
+	msg = &p_ptr->publ.phdr;
+	msg_set_destnode(msg, peer->node);
+	msg_set_destport(msg, peer->ref);
+	msg_set_orignode(msg, tipc_own_addr);
+	msg_set_origport(msg, p_ptr->publ.ref);
+	msg_set_transp_seqno(msg, 42);
+	msg_set_type(msg, TIPC_CONN_MSG);
+	if (!may_route(peer->node))
+		msg_set_hdr_sz(msg, SHORT_H_SIZE);
+	else
+		msg_set_hdr_sz(msg, LONG_H_SIZE);
+
+	p_ptr->probing_interval = PROBING_INTERVAL;
+	p_ptr->probing_state = CONFIRMED;
+	p_ptr->publ.connected = 1;
+	k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
+
+	tipc_nodesub_subscribe(&p_ptr->subscription,peer->node,
+			  (void *)(unsigned long)ref,
+			  (net_ev_handler)port_handle_node_down);
+	res = TIPC_OK;
+exit:
+	tipc_port_unlock(p_ptr);
+	p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
+	return res;
+}
+
+/*
+ * tipc_disconnect(): Disconnect port form peer.
+ *                    This is a node local operation.
+ */
+
+int tipc_disconnect(u32 ref)
+{
+	struct port *p_ptr;
+	int res = -ENOTCONN;
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	if (p_ptr->publ.connected) {
+		p_ptr->publ.connected = 0;
+		/* let timer expire on it's own to avoid deadlock! */
+		tipc_nodesub_unsubscribe(&p_ptr->subscription);
+		res = TIPC_OK;
+	}
+	tipc_port_unlock(p_ptr);
+	return res;
+}
+
+/*
+ * tipc_shutdown(): Send a SHUTDOWN msg to peer and disconnect
+ */
+int tipc_shutdown(u32 ref)
+{
+	struct port *p_ptr;
+	struct sk_buff *buf = 0;
+
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+
+	if (p_ptr->publ.connected) {
+		u32 imp = msg_importance(&p_ptr->publ.phdr);
+		if (imp < TIPC_CRITICAL_IMPORTANCE)
+			imp++;
+		buf = port_build_proto_msg(port_peerport(p_ptr),
+					   port_peernode(p_ptr),
+					   ref,
+					   tipc_own_addr,
+					   imp,
+					   TIPC_CONN_MSG,
+					   TIPC_CONN_SHUTDOWN, 
+					   port_out_seqno(p_ptr),
+					   0);
+	}
+	tipc_port_unlock(p_ptr);
+	tipc_net_route_msg(buf);
+	return tipc_disconnect(ref);
+}
+
+int tipc_isconnected(u32 ref, int *isconnected)
+{
+	struct port *p_ptr;
+	
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	*isconnected = p_ptr->publ.connected;
+	tipc_port_unlock(p_ptr);
+	return TIPC_OK;
+}
+
+int tipc_peer(u32 ref, struct tipc_portid *peer)
+{
+	struct port *p_ptr;
+	int res;
+	 
+	p_ptr = tipc_port_lock(ref);
+	if (!p_ptr)
+		return -EINVAL;
+	if (p_ptr->publ.connected) {
+		peer->ref = port_peerport(p_ptr);
+		peer->node = port_peernode(p_ptr);
+		res = TIPC_OK;
+	} else
+		res = -ENOTCONN;
+	tipc_port_unlock(p_ptr);
+	return res;
+}
+
+int tipc_ref_valid(u32 ref)
+{
+	/* Works irrespective of type */
+	return !!tipc_ref_deref(ref);
+}
+
+
+/*
+ *  tipc_port_recv_sections(): Concatenate and deliver sectioned
+ *                        message for this node.
+ */
+
+int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
+		       struct iovec const *msg_sect)
+{
+	struct sk_buff *buf;
+	int res;
+	 
+	res = msg_build(&sender->publ.phdr, msg_sect, num_sect,
+			MAX_MSG_SIZE, !sender->user_port, &buf);
+	if (likely(buf))
+		tipc_port_recv_msg(buf);
+	return res;
+}
+
+/**
+ * tipc_send - send message sections on connection
+ */
+
+int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
+{
+	struct port *p_ptr;
+	u32 destnode;
+	int res;
+
+	p_ptr = tipc_port_deref(ref);
+	if (!p_ptr || !p_ptr->publ.connected)
+		return -EINVAL;
+
+	p_ptr->publ.congested = 1;
+	if (!tipc_port_congested(p_ptr)) {
+		destnode = port_peernode(p_ptr);
+		if (likely(destnode != tipc_own_addr))
+			res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
+							   destnode);
+		else
+			res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
+
+		if (likely(res != -ELINKCONG)) {
+			port_incr_out_seqno(p_ptr);
+			p_ptr->publ.congested = 0;
+			p_ptr->sent++;
+			return res;
+		}
+	}
+	if (port_unreliable(p_ptr)) {
+		p_ptr->publ.congested = 0;
+		/* Just calculate msg length and return */
+		return msg_calc_data_size(msg_sect, num_sect);
+	}
+	return -ELINKCONG;
+}
+
+/** 
+ * tipc_send_buf - send message buffer on connection
+ */
+
+int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	u32 destnode;
+	u32 hsz;
+	u32 sz;
+	u32 res;
+	 
+	p_ptr = tipc_port_deref(ref);
+	if (!p_ptr || !p_ptr->publ.connected)
+		return -EINVAL;
+
+	msg = &p_ptr->publ.phdr;
+	hsz = msg_hdr_sz(msg);
+	sz = hsz + dsz;
+	msg_set_size(msg, sz);
+	if (skb_cow(buf, hsz))
+		return -ENOMEM;
+
+	skb_push(buf, hsz);
+	memcpy(buf->data, (unchar *)msg, hsz);
+	destnode = msg_destnode(msg);
+	p_ptr->publ.congested = 1;
+	if (!tipc_port_congested(p_ptr)) {
+		if (likely(destnode != tipc_own_addr))
+			res = tipc_send_buf_fast(buf, destnode);
+		else {
+			tipc_port_recv_msg(buf);
+			res = sz;
+		}
+		if (likely(res != -ELINKCONG)) {
+			port_incr_out_seqno(p_ptr);
+			p_ptr->sent++;
+			p_ptr->publ.congested = 0;
+			return res;
+		}
+	}
+	if (port_unreliable(p_ptr)) {
+		p_ptr->publ.congested = 0;
+		return dsz;
+	}
+	return -ELINKCONG;
+}
+
+/**
+ * tipc_forward2name - forward message sections to port name
+ */
+
+int tipc_forward2name(u32 ref, 
+		      struct tipc_name const *name, 
+		      u32 domain,
+		      u32 num_sect, 
+		      struct iovec const *msg_sect,
+		      struct tipc_portid const *orig, 
+		      unsigned int importance)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	u32 destnode = domain;
+	u32 destport = 0;
+	int res;
+
+	p_ptr = tipc_port_deref(ref);
+	if (!p_ptr || p_ptr->publ.connected)
+		return -EINVAL;
+
+	msg = &p_ptr->publ.phdr;
+	msg_set_type(msg, TIPC_NAMED_MSG);
+	msg_set_orignode(msg, orig->node);
+	msg_set_origport(msg, orig->ref);
+	msg_set_hdr_sz(msg, LONG_H_SIZE);
+	msg_set_nametype(msg, name->type);
+	msg_set_nameinst(msg, name->instance);
+	msg_set_lookup_scope(msg, addr_scope(domain));
+	if (importance <= TIPC_CRITICAL_IMPORTANCE)
+		msg_set_importance(msg,importance);
+	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
+	msg_set_destnode(msg, destnode);
+	msg_set_destport(msg, destport);
+
+	if (likely(destport || destnode)) {
+		p_ptr->sent++;
+		if (likely(destnode == tipc_own_addr))
+			return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
+		res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, 
+						   destnode);
+		if (likely(res != -ELINKCONG))
+			return res;
+		if (port_unreliable(p_ptr)) {
+			/* Just calculate msg length and return */
+			return msg_calc_data_size(msg_sect, num_sect);
+		}
+		return -ELINKCONG;
+	}
+	return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect, 
+					 TIPC_ERR_NO_NAME);
+}
+
+/**
+ * tipc_send2name - send message sections to port name
+ */
+
+int tipc_send2name(u32 ref, 
+		   struct tipc_name const *name,
+		   unsigned int domain, 
+		   unsigned int num_sect, 
+		   struct iovec const *msg_sect)
+{
+	struct tipc_portid orig;
+
+	orig.ref = ref;
+	orig.node = tipc_own_addr;
+	return tipc_forward2name(ref, name, domain, num_sect, msg_sect, &orig,
+				 TIPC_PORT_IMPORTANCE);
+}
+
+/** 
+ * tipc_forward_buf2name - forward message buffer to port name
+ */
+
+int tipc_forward_buf2name(u32 ref,
+			  struct tipc_name const *name,
+			  u32 domain,
+			  struct sk_buff *buf,
+			  unsigned int dsz,
+			  struct tipc_portid const *orig,
+			  unsigned int importance)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	u32 destnode = domain;
+	u32 destport = 0;
+	int res;
+
+	p_ptr = (struct port *)tipc_ref_deref(ref);
+	if (!p_ptr || p_ptr->publ.connected)
+		return -EINVAL;
+
+	msg = &p_ptr->publ.phdr;
+	if (importance <= TIPC_CRITICAL_IMPORTANCE)
+		msg_set_importance(msg, importance);
+	msg_set_type(msg, TIPC_NAMED_MSG);
+	msg_set_orignode(msg, orig->node);
+	msg_set_origport(msg, orig->ref);
+	msg_set_nametype(msg, name->type);
+	msg_set_nameinst(msg, name->instance);
+	msg_set_lookup_scope(msg, addr_scope(domain));
+	msg_set_hdr_sz(msg, LONG_H_SIZE);
+	msg_set_size(msg, LONG_H_SIZE + dsz);
+	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
+	msg_set_destnode(msg, destnode);
+	msg_set_destport(msg, destport);
+	msg_dbg(msg, "forw2name ==> ");
+	if (skb_cow(buf, LONG_H_SIZE))
+		return -ENOMEM;
+	skb_push(buf, LONG_H_SIZE);
+	memcpy(buf->data, (unchar *)msg, LONG_H_SIZE);
+	msg_dbg(buf_msg(buf),"PREP:");
+	if (likely(destport || destnode)) {
+		p_ptr->sent++;
+		if (destnode == tipc_own_addr)
+			return tipc_port_recv_msg(buf);
+		res = tipc_send_buf_fast(buf, destnode);
+		if (likely(res != -ELINKCONG))
+			return res;
+		if (port_unreliable(p_ptr))
+			return dsz;
+		return -ELINKCONG;
+	}
+	return tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
+}
+
+/** 
+ * tipc_send_buf2name - send message buffer to port name
+ */
+
+int tipc_send_buf2name(u32 ref, 
+		       struct tipc_name const *dest, 
+		       u32 domain,
+		       struct sk_buff *buf, 
+		       unsigned int dsz)
+{
+	struct tipc_portid orig;
+
+	orig.ref = ref;
+	orig.node = tipc_own_addr;
+	return tipc_forward_buf2name(ref, dest, domain, buf, dsz, &orig,
+				     TIPC_PORT_IMPORTANCE);
+}
+
+/** 
+ * tipc_forward2port - forward message sections to port identity
+ */
+
+int tipc_forward2port(u32 ref,
+		      struct tipc_portid const *dest,
+		      unsigned int num_sect, 
+		      struct iovec const *msg_sect,
+		      struct tipc_portid const *orig, 
+		      unsigned int importance)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	int res;
+
+	p_ptr = tipc_port_deref(ref);
+	if (!p_ptr || p_ptr->publ.connected)
+		return -EINVAL;
+
+	msg = &p_ptr->publ.phdr;
+	msg_set_type(msg, TIPC_DIRECT_MSG);
+	msg_set_orignode(msg, orig->node);
+	msg_set_origport(msg, orig->ref);
+	msg_set_destnode(msg, dest->node);
+	msg_set_destport(msg, dest->ref);
+	msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
+	if (importance <= TIPC_CRITICAL_IMPORTANCE)
+		msg_set_importance(msg, importance);
+	p_ptr->sent++;
+	if (dest->node == tipc_own_addr)
+		return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
+	res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, dest->node);
+	if (likely(res != -ELINKCONG))
+		return res;
+	if (port_unreliable(p_ptr)) {
+		/* Just calculate msg length and return */
+		return msg_calc_data_size(msg_sect, num_sect);
+	}
+	return -ELINKCONG;
+}
+
+/** 
+ * tipc_send2port - send message sections to port identity 
+ */
+
+int tipc_send2port(u32 ref, 
+		   struct tipc_portid const *dest,
+		   unsigned int num_sect, 
+		   struct iovec const *msg_sect)
+{
+	struct tipc_portid orig;
+
+	orig.ref = ref;
+	orig.node = tipc_own_addr;
+	return tipc_forward2port(ref, dest, num_sect, msg_sect, &orig, 
+				 TIPC_PORT_IMPORTANCE);
+}
+
+/** 
+ * tipc_forward_buf2port - forward message buffer to port identity
+ */
+int tipc_forward_buf2port(u32 ref,
+			  struct tipc_portid const *dest,
+			  struct sk_buff *buf,
+			  unsigned int dsz,
+			  struct tipc_portid const *orig,
+			  unsigned int importance)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg;
+	int res;
+
+	p_ptr = (struct port *)tipc_ref_deref(ref);
+	if (!p_ptr || p_ptr->publ.connected)
+		return -EINVAL;
+
+	msg = &p_ptr->publ.phdr;
+	msg_set_type(msg, TIPC_DIRECT_MSG);
+	msg_set_orignode(msg, orig->node);
+	msg_set_origport(msg, orig->ref);
+	msg_set_destnode(msg, dest->node);
+	msg_set_destport(msg, dest->ref);
+	msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
+	if (importance <= TIPC_CRITICAL_IMPORTANCE)
+		msg_set_importance(msg, importance);
+	msg_set_size(msg, DIR_MSG_H_SIZE + dsz);
+	if (skb_cow(buf, DIR_MSG_H_SIZE))
+		return -ENOMEM;
+
+	skb_push(buf, DIR_MSG_H_SIZE);
+	memcpy(buf->data, (unchar *)msg, DIR_MSG_H_SIZE);
+	msg_dbg(msg, "buf2port: ");
+	p_ptr->sent++;
+	if (dest->node == tipc_own_addr)
+		return tipc_port_recv_msg(buf);
+	res = tipc_send_buf_fast(buf, dest->node);
+	if (likely(res != -ELINKCONG))
+		return res;
+	if (port_unreliable(p_ptr))
+		return dsz;
+	return -ELINKCONG;
+}
+
+/** 
+ * tipc_send_buf2port - send message buffer to port identity
+ */
+
+int tipc_send_buf2port(u32 ref, 
+		       struct tipc_portid const *dest,
+		       struct sk_buff *buf, 
+		       unsigned int dsz)
+{
+	struct tipc_portid orig;
+
+	orig.ref = ref;
+	orig.node = tipc_own_addr;
+	return tipc_forward_buf2port(ref, dest, buf, dsz, &orig, 
+				     TIPC_PORT_IMPORTANCE);
+}
+
diff --git a/net/tipc/port.h b/net/tipc/port.h
new file mode 100644
index 00000000000..839f100da64
--- /dev/null
+++ b/net/tipc/port.h
@@ -0,0 +1,209 @@
+/*
+ * net/tipc/port.h: Include file for TIPC port code
+ * 
+ * Copyright (c) 1994-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_PORT_H
+#define _TIPC_PORT_H
+
+#include "core.h"
+#include "ref.h"
+#include "net.h"
+#include "msg.h"
+#include "dbg.h"
+#include "node_subscr.h"
+
+/**
+ * struct user_port - TIPC user port (used with native API)
+ * @user_ref: id of user who created user port
+ * @usr_handle: user-specified field
+ * @ref: object reference to associated TIPC port
+ * <various callback routines>
+ * @uport_list: adjacent user ports in list of ports held by user
+ */
+ 
+struct user_port {
+	u32 user_ref;
+	void *usr_handle; 
+	u32 ref;
+	tipc_msg_err_event err_cb; 
+	tipc_named_msg_err_event named_err_cb; 
+	tipc_conn_shutdown_event conn_err_cb; 
+	tipc_msg_event msg_cb; 
+	tipc_named_msg_event named_msg_cb; 
+	tipc_conn_msg_event conn_msg_cb; 
+	tipc_continue_event continue_event_cb;
+	struct list_head uport_list;
+};
+
+/**
+ * struct port - TIPC port structure
+ * @publ: TIPC port info available to privileged users
+ * @port_list: adjacent ports in TIPC's global list of ports
+ * @dispatcher: ptr to routine which handles received messages
+ * @wakeup: ptr to routine to call when port is no longer congested
+ * @user_port: ptr to user port associated with port (if any)
+ * @wait_list: adjacent ports in list of ports waiting on link congestion
+ * @congested_link: ptr to congested link port is waiting on
+ * @waiting_pkts:
+ * @sent:
+ * @acked:
+ * @publications: list of publications for port
+ * @pub_count: total # of publications port has made during its lifetime
+ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @probing_state:
+ * @probing_interval:
+ * @last_in_seqno:
+ * @timer_ref:
+ * @subscription: "node down" subscription used to terminate failed connections
+ */
+
+struct port {
+	struct tipc_port publ;
+	struct list_head port_list;
+	u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
+	void (*wakeup)(struct tipc_port *);
+	struct user_port *user_port;
+	struct list_head wait_list;
+	struct link *congested_link;
+	u32 waiting_pkts;
+	u32 sent;
+	u32 acked;
+	struct list_head publications;
+	u32 pub_count;
+	u32 max_pkt;
+	u32 probing_state;
+	u32 probing_interval;
+	u32 last_in_seqno;
+	struct timer_list timer;
+	struct node_subscr subscription;
+};
+
+extern spinlock_t tipc_port_list_lock;
+struct port_list;
+
+int tipc_port_recv_sections(struct port *p_ptr, u32 num_sect, 
+			    struct iovec const *msg_sect);
+int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
+			      struct iovec const *msg_sect, u32 num_sect,
+			      int err);
+struct sk_buff *tipc_port_get_ports(void);
+struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space);
+void tipc_port_recv_proto_msg(struct sk_buff *buf);
+void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp);
+void tipc_port_reinit(void);
+
+/**
+ * tipc_port_lock - lock port instance referred to and return its pointer
+ */
+
+static inline struct port *tipc_port_lock(u32 ref)
+{
+	return (struct port *)tipc_ref_lock(ref);
+}
+
+/** 
+ * tipc_port_unlock - unlock a port instance
+ * 
+ * Can use pointer instead of tipc_ref_unlock() since port is already locked.
+ */
+
+static inline void tipc_port_unlock(struct port *p_ptr)
+{
+	spin_unlock_bh(p_ptr->publ.lock);
+}
+
+static inline struct port* tipc_port_deref(u32 ref)
+{
+	return (struct port *)tipc_ref_deref(ref);
+}
+
+static inline u32 tipc_peer_port(struct port *p_ptr)
+{
+	return msg_destport(&p_ptr->publ.phdr);
+}
+
+static inline u32 tipc_peer_node(struct port *p_ptr)
+{
+	return msg_destnode(&p_ptr->publ.phdr);
+}
+
+static inline int tipc_port_congested(struct port *p_ptr)
+{
+	return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2));
+}
+
+/** 
+ * tipc_port_recv_msg - receive message from lower layer and deliver to port user
+ */
+
+static inline int tipc_port_recv_msg(struct sk_buff *buf)
+{
+	struct port *p_ptr;
+	struct tipc_msg *msg = buf_msg(buf);
+	u32 destport = msg_destport(msg);
+	u32 dsz = msg_data_sz(msg);
+	u32 err;
+	
+	/* forward unresolved named message */
+	if (unlikely(!destport)) {
+		tipc_net_route_msg(buf);
+		return dsz;
+	}
+
+	/* validate destination & pass to port, otherwise reject message */
+	p_ptr = tipc_port_lock(destport);
+	if (likely(p_ptr)) {
+		if (likely(p_ptr->publ.connected)) {
+			if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) ||
+			    (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) ||
+			    (unlikely(!msg_connected(msg)))) {
+				err = TIPC_ERR_NO_PORT;
+				tipc_port_unlock(p_ptr);
+				goto reject;
+			}
+		}
+		err = p_ptr->dispatcher(&p_ptr->publ, buf);
+		tipc_port_unlock(p_ptr);
+		if (likely(!err))
+			return dsz;
+	} else {
+		err = TIPC_ERR_NO_PORT;
+	}
+reject:
+	dbg("port->rejecting, err = %x..\n",err);
+	return tipc_reject_msg(buf, err);
+}
+
+#endif
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
new file mode 100644
index 00000000000..5a13c2defe4
--- /dev/null
+++ b/net/tipc/ref.c
@@ -0,0 +1,189 @@
+/*
+ * net/tipc/ref.c: TIPC object registry code
+ * 
+ * Copyright (c) 1991-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "ref.h"
+#include "port.h"
+#include "subscr.h"
+#include "name_distr.h"
+#include "name_table.h"
+#include "config.h"
+#include "discover.h"
+#include "bearer.h"
+#include "node.h"
+#include "bcast.h"
+
+/*
+ * Object reference table consists of 2**N entries.
+ *
+ * A used entry has object ptr != 0, reference == XXXX|own index
+ *				     (XXXX changes each time entry is acquired) 
+ * A free entry has object ptr == 0, reference == YYYY|next free index
+ *				     (YYYY is one more than last used XXXX)
+ *
+ * Free list is initially chained from entry (2**N)-1 to entry 1. 
+ * Entry 0 is not used to allow index 0 to indicate the end of the free list.
+ *
+ * Note: Any accidental reference of the form XXXX|0--0 won't match entry 0
+ * because entry 0's reference field has the form XXXX|1--1.
+ */
+
+struct ref_table tipc_ref_table = { 0 };
+
+static rwlock_t ref_table_lock = RW_LOCK_UNLOCKED;
+
+/**
+ * tipc_ref_table_init - create reference table for objects
+ */
+
+int tipc_ref_table_init(u32 requested_size, u32 start)
+{
+	struct reference *table;
+	u32 sz = 1 << 4;
+	u32 index_mask;
+	int i;
+
+	while (sz < requested_size) {
+		sz <<= 1;
+	}
+	table = (struct reference *)vmalloc(sz * sizeof(struct reference));
+	if (table == NULL)
+		return -ENOMEM;
+
+	write_lock_bh(&ref_table_lock);
+	index_mask = sz - 1;
+	for (i = sz - 1; i >= 0; i--) {
+		table[i].object = 0;
+		table[i].lock = SPIN_LOCK_UNLOCKED;
+		table[i].data.next_plus_upper = (start & ~index_mask) + i - 1;
+	}
+	tipc_ref_table.entries = table;
+	tipc_ref_table.index_mask = index_mask;
+	tipc_ref_table.first_free = sz - 1;
+	tipc_ref_table.last_free = 1;
+	write_unlock_bh(&ref_table_lock);
+	return TIPC_OK;
+}
+
+/**
+ * tipc_ref_table_stop - destroy reference table for objects
+ */
+
+void tipc_ref_table_stop(void)
+{
+	if (!tipc_ref_table.entries)
+		return;
+
+	vfree(tipc_ref_table.entries);
+	tipc_ref_table.entries = 0;
+}
+
+/**
+ * tipc_ref_acquire - create reference to an object
+ * 
+ * Return a unique reference value which can be translated back to the pointer
+ * 'object' at a later time.  Also, pass back a pointer to the lock protecting 
+ * the object, but without locking it.
+ */
+
+u32 tipc_ref_acquire(void *object, spinlock_t **lock)
+{
+	struct reference *entry;
+	u32 index;
+	u32 index_mask;
+	u32 next_plus_upper;
+	u32 reference = 0;
+
+	assert(tipc_ref_table.entries && object);
+
+	write_lock_bh(&ref_table_lock);
+	if (tipc_ref_table.first_free) {
+		index = tipc_ref_table.first_free;
+		entry = &(tipc_ref_table.entries[index]);
+		index_mask = tipc_ref_table.index_mask;
+		/* take lock in case a previous user of entry still holds it */ 
+		spin_lock_bh(&entry->lock);  
+		next_plus_upper = entry->data.next_plus_upper;
+		tipc_ref_table.first_free = next_plus_upper & index_mask;
+		reference = (next_plus_upper & ~index_mask) + index;
+		entry->data.reference = reference;
+		entry->object = object;
+                if (lock != 0)
+                        *lock = &entry->lock;
+		spin_unlock_bh(&entry->lock);
+	}
+	write_unlock_bh(&ref_table_lock);
+	return reference;
+}
+
+/**
+ * tipc_ref_discard - invalidate references to an object
+ * 
+ * Disallow future references to an object and free up the entry for re-use.
+ * Note: The entry's spin_lock may still be busy after discard
+ */
+
+void tipc_ref_discard(u32 ref)
+{
+	struct reference *entry;
+	u32 index; 
+	u32 index_mask;
+
+	assert(tipc_ref_table.entries);
+	assert(ref != 0);
+
+	write_lock_bh(&ref_table_lock);
+	index_mask = tipc_ref_table.index_mask;
+	index = ref & index_mask;
+	entry = &(tipc_ref_table.entries[index]);
+	assert(entry->object != 0);
+	assert(entry->data.reference == ref);
+
+	/* mark entry as unused */
+	entry->object = 0;
+	if (tipc_ref_table.first_free == 0)
+		tipc_ref_table.first_free = index;
+	else
+		/* next_plus_upper is always XXXX|0--0 for last free entry */
+		tipc_ref_table.entries[tipc_ref_table.last_free].data.next_plus_upper 
+			|= index;
+	tipc_ref_table.last_free = index;
+
+	/* increment upper bits of entry to invalidate subsequent references */
+	entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1);
+	write_unlock_bh(&ref_table_lock);
+}
+
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
new file mode 100644
index 00000000000..4f8f9f40dca
--- /dev/null
+++ b/net/tipc/ref.h
@@ -0,0 +1,131 @@
+/*
+ * net/tipc/ref.h: Include file for TIPC object registry code
+ * 
+ * Copyright (c) 1991-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_REF_H
+#define _TIPC_REF_H
+
+/**
+ * struct reference - TIPC object reference entry
+ * @object: pointer to object associated with reference entry
+ * @lock: spinlock controlling access to object
+ * @data: reference value associated with object (or link to next unused entry)
+ */
+ 
+struct reference {
+	void *object;
+	spinlock_t lock;
+	union {
+		u32 next_plus_upper;
+		u32 reference;
+	} data;
+};
+
+/**
+ * struct tipc_ref_table - table of TIPC object reference entries
+ * @entries: pointer to array of reference entries
+ * @index_mask: bitmask for array index portion of reference values
+ * @first_free: array index of first unused object reference entry
+ * @last_free: array index of last unused object reference entry
+ */
+
+struct ref_table {
+	struct reference *entries;
+	u32 index_mask;
+	u32 first_free;
+	u32 last_free;
+};
+
+extern struct ref_table tipc_ref_table;
+
+int tipc_ref_table_init(u32 requested_size, u32 start);
+void tipc_ref_table_stop(void);
+
+u32 tipc_ref_acquire(void *object, spinlock_t **lock);
+void tipc_ref_discard(u32 ref);
+
+
+/**
+ * tipc_ref_lock - lock referenced object and return pointer to it
+ */
+
+static inline void *tipc_ref_lock(u32 ref)
+{
+	if (likely(tipc_ref_table.entries)) {
+		struct reference *r =
+			&tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
+
+		spin_lock_bh(&r->lock);
+		if (likely(r->data.reference == ref))
+			return r->object;
+		spin_unlock_bh(&r->lock);
+	}
+	return 0;
+}
+
+/**
+ * tipc_ref_unlock - unlock referenced object 
+ */
+
+static inline void tipc_ref_unlock(u32 ref)
+{
+	if (likely(tipc_ref_table.entries)) {
+		struct reference *r =
+			&tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
+
+		if (likely(r->data.reference == ref))
+			spin_unlock_bh(&r->lock);
+		else
+			err("tipc_ref_unlock() invoked using obsolete reference\n");
+	}
+}
+
+/**
+ * tipc_ref_deref - return pointer referenced object (without locking it)
+ */
+
+static inline void *tipc_ref_deref(u32 ref)
+{
+	if (likely(tipc_ref_table.entries)) {
+		struct reference *r = 
+			&tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
+
+		if (likely(r->data.reference == ref))
+			return r->object;
+	}
+	return 0;
+}
+
+#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
new file mode 100644
index 00000000000..67253bfcd70
--- /dev/null
+++ b/net/tipc/socket.c
@@ -0,0 +1,1724 @@
+/*
+ * net/tipc/socket.c: TIPC socket API
+ * 
+ * Copyright (c) 2001-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <asm/semaphore.h>
+#include <asm/string.h>
+#include <asm/atomic.h>
+#include <net/sock.h>
+
+#include <linux/tipc.h>
+#include <linux/tipc_config.h>
+#include <net/tipc/tipc_msg.h>
+#include <net/tipc/tipc_port.h>
+
+#include "core.h"
+
+#define SS_LISTENING	-1	/* socket is listening */
+#define SS_READY	-2	/* socket is connectionless */
+
+#define OVERLOAD_LIMIT_BASE    5000
+
+struct tipc_sock {
+	struct sock sk;
+	struct tipc_port *p;
+	struct semaphore sem;
+};
+
+#define tipc_sk(sk) ((struct tipc_sock*)sk)
+
+static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
+static void wakeupdispatch(struct tipc_port *tport);
+
+static struct proto_ops packet_ops;
+static struct proto_ops stream_ops;
+static struct proto_ops msg_ops;
+
+static struct proto tipc_proto;
+
+static int sockets_enabled = 0;
+
+static atomic_t tipc_queue_size = ATOMIC_INIT(0);
+
+
+/* 
+ * sock_lock(): Lock a port/socket pair. lock_sock() can 
+ * not be used here, since the same lock must protect ports 
+ * with non-socket interfaces.
+ * See net.c for description of locking policy.
+ */
+static inline void sock_lock(struct tipc_sock* tsock)
+{
+        spin_lock_bh(tsock->p->lock);       
+}
+
+/* 
+ * sock_unlock(): Unlock a port/socket pair
+ */
+static inline void sock_unlock(struct tipc_sock* tsock)
+{
+        spin_unlock_bh(tsock->p->lock);
+}
+
+/**
+ * pollmask - determine the current set of poll() events for a socket
+ * @sock: socket structure
+ * 
+ * TIPC sets the returned events as follows:
+ * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty
+ *    or if a connection-oriented socket is does not have an active connection
+ *    (i.e. a read operation will not block).
+ * b) POLLOUT is set except when a socket's connection has been terminated
+ *    (i.e. a write operation will not block).
+ * c) POLLHUP is set when a socket's connection has been terminated.
+ *
+ * IMPORTANT: The fact that a read or write operation will not block does NOT
+ * imply that the operation will succeed!
+ * 
+ * Returns pollmask value
+ */
+
+static inline u32 pollmask(struct socket *sock)
+{
+	u32 mask;
+
+	if ((skb_queue_len(&sock->sk->sk_receive_queue) != 0) ||
+	    (sock->state == SS_UNCONNECTED) ||
+	    (sock->state == SS_DISCONNECTING))
+		mask = (POLLRDNORM | POLLIN);
+	else
+		mask = 0;
+
+	if (sock->state == SS_DISCONNECTING) 
+		mask |= POLLHUP;
+	else
+		mask |= POLLOUT;
+
+	return mask;
+}
+
+
+/**
+ * advance_queue - discard first buffer in queue
+ * @tsock: TIPC socket
+ */
+
+static inline void advance_queue(struct tipc_sock *tsock)
+{
+        sock_lock(tsock);
+	buf_discard(skb_dequeue(&tsock->sk.sk_receive_queue));
+        sock_unlock(tsock);
+	atomic_dec(&tipc_queue_size);
+}
+
+/**
+ * tipc_create - create a TIPC socket
+ * @sock: pre-allocated socket structure
+ * @protocol: protocol indicator (must be 0)
+ * 
+ * This routine creates and attaches a 'struct sock' to the 'struct socket',
+ * then create and attaches a TIPC port to the 'struct sock' part.
+ *
+ * Returns 0 on success, errno otherwise
+ */
+static int tipc_create(struct socket *sock, int protocol)
+{
+	struct tipc_sock *tsock;
+	struct tipc_port *port;
+	struct sock *sk;
+        u32 ref;
+
+	if ((sock->type != SOCK_STREAM) && 
+	    (sock->type != SOCK_SEQPACKET) &&
+	    (sock->type != SOCK_DGRAM) &&
+	    (sock->type != SOCK_RDM))
+		return -EPROTOTYPE;
+
+	if (unlikely(protocol != 0))
+		return -EPROTONOSUPPORT;
+
+	ref = tipc_createport_raw(0, &dispatch, &wakeupdispatch, TIPC_LOW_IMPORTANCE);
+	if (unlikely(!ref))
+		return -ENOMEM;
+
+	sock->state = SS_UNCONNECTED;
+
+	switch (sock->type) {
+	case SOCK_STREAM:
+		sock->ops = &stream_ops;
+		break;
+	case SOCK_SEQPACKET:
+		sock->ops = &packet_ops;
+		break;
+	case SOCK_DGRAM:
+		tipc_set_portunreliable(ref, 1);
+		/* fall through */
+	case SOCK_RDM:
+		tipc_set_portunreturnable(ref, 1);
+		sock->ops = &msg_ops;
+		sock->state = SS_READY;
+		break;
+	}
+
+	sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
+	if (!sk) {
+		tipc_deleteport(ref);
+		return -ENOMEM;
+	}
+
+	sock_init_data(sock, sk);
+	init_waitqueue_head(sk->sk_sleep);
+	sk->sk_rcvtimeo = 8 * HZ;   /* default connect timeout = 8s */
+
+	tsock = tipc_sk(sk);
+	port = tipc_get_port(ref);
+
+	tsock->p = port;
+	port->usr_handle = tsock;
+
+	init_MUTEX(&tsock->sem);
+
+	dbg("sock_create: %x\n",tsock);
+
+	atomic_inc(&tipc_user_count);
+
+	return 0;
+}
+
+/**
+ * release - destroy a TIPC socket
+ * @sock: socket to destroy
+ *
+ * This routine cleans up any messages that are still queued on the socket.
+ * For DGRAM and RDM socket types, all queued messages are rejected.
+ * For SEQPACKET and STREAM socket types, the first message is rejected
+ * and any others are discarded.  (If the first message on a STREAM socket
+ * is partially-read, it is discarded and the next one is rejected instead.)
+ * 
+ * NOTE: Rejected messages are not necessarily returned to the sender!  They
+ * are returned or discarded according to the "destination droppable" setting
+ * specified for the message by the sender.
+ *
+ * Returns 0 on success, errno otherwise
+ */
+
+static int release(struct socket *sock)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	struct sock *sk = sock->sk;
+	int res = TIPC_OK;
+	struct sk_buff *buf;
+
+        dbg("sock_delete: %x\n",tsock);
+	if (!tsock)
+		return 0;
+	down_interruptible(&tsock->sem);
+	if (!sock->sk) {
+		up(&tsock->sem);
+		return 0;
+	}
+	
+	/* Reject unreceived messages, unless no longer connected */
+
+	while (sock->state != SS_DISCONNECTING) {
+		sock_lock(tsock);
+		buf = skb_dequeue(&sk->sk_receive_queue);
+		if (!buf)
+			tsock->p->usr_handle = 0;
+		sock_unlock(tsock);
+		if (!buf)
+			break;
+		if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf)))
+			buf_discard(buf);
+		else
+			tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+		atomic_dec(&tipc_queue_size);
+	}
+
+	/* Delete TIPC port */
+
+	res = tipc_deleteport(tsock->p->ref);
+	sock->sk = NULL;
+
+	/* Discard any remaining messages */
+
+	while ((buf = skb_dequeue(&sk->sk_receive_queue))) {
+		buf_discard(buf);
+		atomic_dec(&tipc_queue_size);
+	}
+
+	up(&tsock->sem);
+
+	sock_put(sk);
+
+        atomic_dec(&tipc_user_count);
+	return res;
+}
+
+/**
+ * bind - associate or disassocate TIPC name(s) with a socket
+ * @sock: socket structure
+ * @uaddr: socket address describing name(s) and desired operation
+ * @uaddr_len: size of socket address data structure
+ * 
+ * Name and name sequence binding is indicated using a positive scope value;
+ * a negative scope value unbinds the specified name.  Specifying no name
+ * (i.e. a socket address length of 0) unbinds all names from the socket.
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
+	int res;
+
+	if (down_interruptible(&tsock->sem))
+		return -ERESTARTSYS;
+	
+	if (unlikely(!uaddr_len)) {
+		res = tipc_withdraw(tsock->p->ref, 0, 0);
+		goto exit;
+	}
+
+	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
+		res = -EINVAL;
+		goto exit;
+	}
+
+	if (addr->family != AF_TIPC) {
+		res = -EAFNOSUPPORT;
+		goto exit;
+	}
+	if (addr->addrtype == TIPC_ADDR_NAME)
+		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
+	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
+		res = -EAFNOSUPPORT;
+		goto exit;
+	}
+        
+       	if (addr->scope > 0)
+		res = tipc_publish(tsock->p->ref, addr->scope,
+				   &addr->addr.nameseq);
+	else
+		res = tipc_withdraw(tsock->p->ref, -addr->scope,
+				    &addr->addr.nameseq);
+exit:
+	up(&tsock->sem);
+	return res;
+}
+
+/** 
+ * get_name - get port ID of socket or peer socket
+ * @sock: socket structure
+ * @uaddr: area for returned socket address
+ * @uaddr_len: area for returned length of socket address
+ * @peer: 0 to obtain socket name, 1 to obtain peer socket name
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int get_name(struct socket *sock, struct sockaddr *uaddr, 
+		    int *uaddr_len, int peer)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
+	u32 res;
+
+	if (down_interruptible(&tsock->sem))
+		return -ERESTARTSYS;
+
+	*uaddr_len = sizeof(*addr);
+	addr->addrtype = TIPC_ADDR_ID;
+	addr->family = AF_TIPC;
+	addr->scope = 0;
+	if (peer)
+		res = tipc_peer(tsock->p->ref, &addr->addr.id);
+	else
+		res = tipc_ownidentity(tsock->p->ref, &addr->addr.id);
+	addr->addr.name.domain = 0;
+
+	up(&tsock->sem);
+	return res;
+}
+
+/**
+ * poll - read and possibly block on pollmask
+ * @file: file structure associated with the socket
+ * @sock: socket for which to calculate the poll bits
+ * @wait: ???
+ *
+ * Returns the pollmask
+ */
+
+static unsigned int poll(struct file *file, struct socket *sock, 
+			 poll_table *wait)
+{
+	poll_wait(file, sock->sk->sk_sleep, wait);
+	/* NEED LOCK HERE? */
+	return pollmask(sock);
+}
+
+/** 
+ * dest_name_check - verify user is permitted to send to specified port name
+ * @dest: destination address
+ * @m: descriptor for message to be sent
+ * 
+ * Prevents restricted configuration commands from being issued by
+ * unauthorized users.
+ * 
+ * Returns 0 if permission is granted, otherwise errno
+ */
+
+static inline int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
+{
+	struct tipc_cfg_msg_hdr hdr;
+
+        if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
+                return 0;
+        if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
+                return 0;
+
+        if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
+                return -EACCES;
+
+        if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
+		return -EFAULT;
+	if ((ntohs(hdr.tcm_type) & 0xC000) & (!capable(CAP_NET_ADMIN)))
+		return -EACCES;
+        
+	return 0;
+}
+
+/**
+ * send_msg - send message in connectionless manner
+ * @iocb: (unused)
+ * @sock: socket structure
+ * @m: message to send
+ * @total_len: (unused)
+ * 
+ * Message must have an destination specified explicitly.
+ * Used for SOCK_RDM and SOCK_DGRAM messages, 
+ * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
+ * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
+ * 
+ * Returns the number of bytes sent on success, or errno otherwise
+ */
+
+static int send_msg(struct kiocb *iocb, struct socket *sock,
+		    struct msghdr *m, size_t total_len)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+        struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
+	struct sk_buff *buf;
+	int needs_conn;
+	int res = -EINVAL;
+
+	if (unlikely(!dest))
+		return -EDESTADDRREQ;
+	if (unlikely(dest->family != AF_TIPC))
+		return -EINVAL;
+
+	needs_conn = (sock->state != SS_READY);
+	if (unlikely(needs_conn)) {
+		if (sock->state == SS_LISTENING)
+			return -EPIPE;
+		if (sock->state != SS_UNCONNECTED)
+			return -EISCONN;
+		if ((tsock->p->published) ||
+		    ((sock->type == SOCK_STREAM) && (total_len != 0)))
+			return -EOPNOTSUPP;
+	}
+
+	if (down_interruptible(&tsock->sem))
+		return -ERESTARTSYS;
+
+	if (needs_conn) {
+
+		/* Abort any pending connection attempts (very unlikely) */
+
+		while ((buf = skb_dequeue(&sock->sk->sk_receive_queue))) {
+			tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+			atomic_dec(&tipc_queue_size);
+		}
+
+		sock->state = SS_CONNECTING;
+	}
+
+        do {
+                if (dest->addrtype == TIPC_ADDR_NAME) {
+                        if ((res = dest_name_check(dest, m)))
+                                goto exit;
+                        res = tipc_send2name(tsock->p->ref,
+                                             &dest->addr.name.name,
+                                             dest->addr.name.domain, 
+                                             m->msg_iovlen,
+                                             m->msg_iov);
+                }
+                else if (dest->addrtype == TIPC_ADDR_ID) {
+                        res = tipc_send2port(tsock->p->ref,
+                                             &dest->addr.id,
+                                             m->msg_iovlen,
+                                             m->msg_iov);
+                }
+                else if (dest->addrtype == TIPC_ADDR_MCAST) {
+			if (needs_conn) {
+				res = -EOPNOTSUPP;
+				goto exit;
+			}
+                        if ((res = dest_name_check(dest, m)))
+                                goto exit;
+                        res = tipc_multicast(tsock->p->ref,
+                                             &dest->addr.nameseq,
+                                             0,
+                                             m->msg_iovlen,
+                                             m->msg_iov);
+                }
+                if (likely(res != -ELINKCONG)) {
+exit:                                
+                        up(&tsock->sem);
+                        return res;
+                }
+		if (m->msg_flags & MSG_DONTWAIT) {
+			res = -EWOULDBLOCK;
+			goto exit;
+		}
+                if (wait_event_interruptible(*sock->sk->sk_sleep,
+                                             !tsock->p->congested)) {
+                    res = -ERESTARTSYS;
+                    goto exit;
+                }
+        } while (1);
+}
+
+/** 
+ * send_packet - send a connection-oriented message
+ * @iocb: (unused)
+ * @sock: socket structure
+ * @m: message to send
+ * @total_len: (unused)
+ * 
+ * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
+ * 
+ * Returns the number of bytes sent on success, or errno otherwise
+ */
+
+static int send_packet(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+        struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
+	int res;
+
+	/* Handle implied connection establishment */
+
+	if (unlikely(dest))
+		return send_msg(iocb, sock, m, total_len);
+
+	if (down_interruptible(&tsock->sem)) {
+		return -ERESTARTSYS;
+        }
+
+        if (unlikely(sock->state != SS_CONNECTED)) {
+                if (sock->state == SS_DISCONNECTING)
+                        res = -EPIPE;   
+                else
+                        res = -ENOTCONN;
+                goto exit;
+        }
+
+        do {
+                res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov);
+                if (likely(res != -ELINKCONG)) {
+exit:
+                        up(&tsock->sem);
+                        return res;
+                }
+		if (m->msg_flags & MSG_DONTWAIT) {
+			res = -EWOULDBLOCK;
+			goto exit;
+		}
+                if (wait_event_interruptible(*sock->sk->sk_sleep,
+                                             !tsock->p->congested)) {
+                    res = -ERESTARTSYS;
+                    goto exit;
+                }
+        } while (1);
+}
+
+/** 
+ * send_stream - send stream-oriented data
+ * @iocb: (unused)
+ * @sock: socket structure
+ * @m: data to send
+ * @total_len: total length of data to be sent
+ * 
+ * Used for SOCK_STREAM data.
+ * 
+ * Returns the number of bytes sent on success, or errno otherwise
+ */
+
+
+static int send_stream(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len)
+{
+	struct msghdr my_msg;
+	struct iovec my_iov;
+	struct iovec *curr_iov;
+	int curr_iovlen;
+	char __user *curr_start;
+	int curr_left;
+	int bytes_to_send;
+	int res;
+	
+	if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE))
+		return send_packet(iocb, sock, m, total_len);
+
+	/* Can only send large data streams if already connected */
+
+        if (unlikely(sock->state != SS_CONNECTED)) {
+                if (sock->state == SS_DISCONNECTING)
+                        return -EPIPE;   
+                else
+                        return -ENOTCONN;
+        }
+
+	/* 
+	 * Send each iovec entry using one or more messages
+	 *
+	 * Note: This algorithm is good for the most likely case 
+	 * (i.e. one large iovec entry), but could be improved to pass sets
+	 * of small iovec entries into send_packet().
+	 */
+
+	my_msg = *m;
+	curr_iov = my_msg.msg_iov;
+	curr_iovlen = my_msg.msg_iovlen;
+	my_msg.msg_iov = &my_iov;
+	my_msg.msg_iovlen = 1;
+
+	while (curr_iovlen--) {
+		curr_start = curr_iov->iov_base;
+		curr_left = curr_iov->iov_len;
+
+		while (curr_left) {
+			bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE)
+				? curr_left : TIPC_MAX_USER_MSG_SIZE;
+			my_iov.iov_base = curr_start;
+			my_iov.iov_len = bytes_to_send;
+                        if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0)
+                                return res;
+			curr_left -= bytes_to_send;
+			curr_start += bytes_to_send;
+		}
+
+		curr_iov++;
+	}
+
+	return total_len;
+}
+
+/**
+ * auto_connect - complete connection setup to a remote port
+ * @sock: socket structure
+ * @tsock: TIPC-specific socket structure
+ * @msg: peer's response message
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int auto_connect(struct socket *sock, struct tipc_sock *tsock, 
+			struct tipc_msg *msg)
+{
+	struct tipc_portid peer;
+
+	if (msg_errcode(msg)) {
+		sock->state = SS_DISCONNECTING;
+		return -ECONNREFUSED;
+	}
+
+	peer.ref = msg_origport(msg);
+	peer.node = msg_orignode(msg);
+	tipc_connect2port(tsock->p->ref, &peer);
+	tipc_set_portimportance(tsock->p->ref, msg_importance(msg));
+	sock->state = SS_CONNECTED;
+	return 0;
+}
+
+/**
+ * set_orig_addr - capture sender's address for received message
+ * @m: descriptor for message info
+ * @msg: received message header
+ * 
+ * Note: Address is not captured if not requested by receiver.
+ */
+
+static inline void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
+{
+        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name;
+
+        if (addr) {
+		addr->family = AF_TIPC;
+		addr->addrtype = TIPC_ADDR_ID;
+		addr->addr.id.ref = msg_origport(msg);
+		addr->addr.id.node = msg_orignode(msg);
+		addr->addr.name.domain = 0;   	/* could leave uninitialized */
+		addr->scope = 0;   		/* could leave uninitialized */
+		m->msg_namelen = sizeof(struct sockaddr_tipc);
+	}
+}
+
+/**
+ * anc_data_recv - optionally capture ancillary data for received message 
+ * @m: descriptor for message info
+ * @msg: received message header
+ * @tport: TIPC port associated with message
+ * 
+ * Note: Ancillary data is not captured if not requested by receiver.
+ * 
+ * Returns 0 if successful, otherwise errno
+ */
+
+static inline int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, 
+				struct tipc_port *tport)
+{
+	u32 anc_data[3];
+	u32 err;
+	u32 dest_type;
+	int res;
+
+	if (likely(m->msg_controllen == 0))
+		return 0;
+
+	/* Optionally capture errored message object(s) */
+
+	err = msg ? msg_errcode(msg) : 0;
+	if (unlikely(err)) {
+		anc_data[0] = err;
+		anc_data[1] = msg_data_sz(msg);
+		if ((res = put_cmsg(m, SOL_SOCKET, TIPC_ERRINFO, 8, anc_data)))
+			return res;
+		if (anc_data[1] &&
+		    (res = put_cmsg(m, SOL_SOCKET, TIPC_RETDATA, anc_data[1], 
+				    msg_data(msg))))
+			return res;
+	}
+
+	/* Optionally capture message destination object */
+
+	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
+	switch (dest_type) {
+	case TIPC_NAMED_MSG:
+		anc_data[0] = msg_nametype(msg);
+		anc_data[1] = msg_namelower(msg);
+		anc_data[2] = msg_namelower(msg);
+		break;
+	case TIPC_MCAST_MSG:
+		anc_data[0] = msg_nametype(msg);
+		anc_data[1] = msg_namelower(msg);
+		anc_data[2] = msg_nameupper(msg);
+		break;
+	case TIPC_CONN_MSG:
+		anc_data[0] = tport->conn_type;
+		anc_data[1] = tport->conn_instance;
+		anc_data[2] = tport->conn_instance;
+		break;
+	default:
+		anc_data[0] = 0;
+	}
+	if (anc_data[0] &&
+	    (res = put_cmsg(m, SOL_SOCKET, TIPC_DESTNAME, 12, anc_data)))
+		return res;
+
+	return 0;
+}
+
+/** 
+ * recv_msg - receive packet-oriented message
+ * @iocb: (unused)
+ * @m: descriptor for message info
+ * @buf_len: total size of user buffer area
+ * @flags: receive flags
+ * 
+ * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
+ * If the complete message doesn't fit in user area, truncate it.
+ *
+ * Returns size of returned message data, errno otherwise
+ */
+
+static int recv_msg(struct kiocb *iocb, struct socket *sock,
+		    struct msghdr *m, size_t buf_len, int flags)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+	unsigned int q_len;
+	unsigned int sz;
+	u32 err;
+	int res;
+
+	/* Currently doesn't support receiving into multiple iovec entries */
+
+	if (m->msg_iovlen != 1)
+		return -EOPNOTSUPP;
+
+	/* Catch invalid receive attempts */
+
+	if (unlikely(!buf_len))
+		return -EINVAL;
+
+	if (sock->type == SOCK_SEQPACKET) {
+		if (unlikely(sock->state == SS_UNCONNECTED))
+			return -ENOTCONN;
+		if (unlikely((sock->state == SS_DISCONNECTING) && 
+			     (skb_queue_len(&sock->sk->sk_receive_queue) == 0)))
+		       	return -ENOTCONN;
+	}
+
+	/* Look for a message in receive queue; wait if necessary */
+
+	if (unlikely(down_interruptible(&tsock->sem)))
+		return -ERESTARTSYS;
+
+restart:
+	if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
+		     (flags & MSG_DONTWAIT))) {
+		res = -EWOULDBLOCK;
+		goto exit;
+	}
+
+	if ((res = wait_event_interruptible(
+		*sock->sk->sk_sleep, 
+		((q_len = skb_queue_len(&sock->sk->sk_receive_queue)) ||
+		 (sock->state == SS_DISCONNECTING))) )) {
+		goto exit;
+	}
+
+	/* Catch attempt to receive on an already terminated connection */
+	/* [THIS CHECK MAY OVERLAP WITH AN EARLIER CHECK] */
+
+	if (!q_len) {
+		res = -ENOTCONN;
+		goto exit;
+	}
+
+	/* Get access to first message in receive queue */
+
+	buf = skb_peek(&sock->sk->sk_receive_queue);
+	msg = buf_msg(buf);
+	sz = msg_data_sz(msg);
+	err = msg_errcode(msg);
+
+	/* Complete connection setup for an implied connect */
+
+	if (unlikely(sock->state == SS_CONNECTING)) {
+		if ((res = auto_connect(sock, tsock, msg)))
+			goto exit;
+	}
+
+	/* Discard an empty non-errored message & try again */
+
+	if ((!sz) && (!err)) {
+		advance_queue(tsock);
+		goto restart;
+	}
+
+	/* Capture sender's address (optional) */
+
+	set_orig_addr(m, msg);
+
+	/* Capture ancillary data (optional) */
+
+	if ((res = anc_data_recv(m, msg, tsock->p)))
+		goto exit;
+
+	/* Capture message data (if valid) & compute return value (always) */
+	
+	if (!err) {
+		if (unlikely(buf_len < sz)) {
+			sz = buf_len;
+			m->msg_flags |= MSG_TRUNC;
+		}
+		if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg),
+					  sz))) {
+			res = -EFAULT;
+			goto exit;
+		}
+		res = sz;
+	} else {
+		if ((sock->state == SS_READY) ||
+		    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
+			res = 0;
+		else
+			res = -ECONNRESET;
+	}
+
+	/* Consume received message (optional) */
+
+	if (likely(!(flags & MSG_PEEK))) {
+                if (unlikely(++tsock->p->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
+                        tipc_acknowledge(tsock->p->ref, tsock->p->conn_unacked);
+		advance_queue(tsock);
+        }
+exit:
+	up(&tsock->sem);
+	return res;
+}
+
+/** 
+ * recv_stream - receive stream-oriented data
+ * @iocb: (unused)
+ * @m: descriptor for message info
+ * @buf_len: total size of user buffer area
+ * @flags: receive flags
+ * 
+ * Used for SOCK_STREAM messages only.  If not enough data is available 
+ * will optionally wait for more; never truncates data.
+ *
+ * Returns size of returned message data, errno otherwise
+ */
+
+static int recv_stream(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t buf_len, int flags)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	struct sk_buff *buf;
+	struct tipc_msg *msg;
+	unsigned int q_len;
+	unsigned int sz;
+	int sz_to_copy;
+	int sz_copied = 0;
+	int needed;
+	char *crs = m->msg_iov->iov_base;
+	unsigned char *buf_crs;
+	u32 err;
+	int res;
+
+	/* Currently doesn't support receiving into multiple iovec entries */
+
+	if (m->msg_iovlen != 1)
+		return -EOPNOTSUPP;
+
+	/* Catch invalid receive attempts */
+
+	if (unlikely(!buf_len))
+		return -EINVAL;
+
+	if (unlikely(sock->state == SS_DISCONNECTING)) {
+		if (skb_queue_len(&sock->sk->sk_receive_queue) == 0)
+			return -ENOTCONN;
+	} else if (unlikely(sock->state != SS_CONNECTED))
+		return -ENOTCONN;
+
+	/* Look for a message in receive queue; wait if necessary */
+
+	if (unlikely(down_interruptible(&tsock->sem)))
+		return -ERESTARTSYS;
+
+restart:
+	if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
+		     (flags & MSG_DONTWAIT))) {
+		res = (sz_copied == 0) ? -EWOULDBLOCK : 0;
+		goto exit;
+	}
+
+	if ((res = wait_event_interruptible(
+		*sock->sk->sk_sleep, 
+		((q_len = skb_queue_len(&sock->sk->sk_receive_queue)) ||
+		 (sock->state == SS_DISCONNECTING))) )) {
+		goto exit;
+	}
+
+	/* Catch attempt to receive on an already terminated connection */
+	/* [THIS CHECK MAY OVERLAP WITH AN EARLIER CHECK] */
+
+	if (!q_len) {
+		res = -ENOTCONN;
+		goto exit;
+	}
+
+	/* Get access to first message in receive queue */
+
+	buf = skb_peek(&sock->sk->sk_receive_queue);
+	msg = buf_msg(buf);
+	sz = msg_data_sz(msg);
+	err = msg_errcode(msg);
+
+	/* Discard an empty non-errored message & try again */
+
+	if ((!sz) && (!err)) {
+		advance_queue(tsock);
+		goto restart;
+	}
+
+	/* Optionally capture sender's address & ancillary data of first msg */
+
+	if (sz_copied == 0) {
+		set_orig_addr(m, msg);
+		if ((res = anc_data_recv(m, msg, tsock->p)))
+			goto exit;
+	}
+
+	/* Capture message data (if valid) & compute return value (always) */
+	
+	if (!err) {
+		buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
+		sz = buf->tail - buf_crs;
+
+		needed = (buf_len - sz_copied);
+		sz_to_copy = (sz <= needed) ? sz : needed;
+		if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) {
+			res = -EFAULT;
+			goto exit;
+		}
+		sz_copied += sz_to_copy;
+
+		if (sz_to_copy < sz) {
+			if (!(flags & MSG_PEEK))
+				TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy;
+			goto exit;
+		}
+
+		crs += sz_to_copy;
+	} else {
+		if (sz_copied != 0)
+			goto exit; /* can't add error msg to valid data */
+
+		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
+			res = 0;
+		else
+			res = -ECONNRESET;
+	}
+
+	/* Consume received message (optional) */
+
+	if (likely(!(flags & MSG_PEEK))) {
+                if (unlikely(++tsock->p->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
+                        tipc_acknowledge(tsock->p->ref, tsock->p->conn_unacked);
+		advance_queue(tsock);
+        }
+
+	/* Loop around if more data is required */
+
+	if ((sz_copied < buf_len)    /* didn't get all requested data */ 
+	    && (flags & MSG_WAITALL) /* ... and need to wait for more */
+	    && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */
+	    && (!err)                /* ... and haven't reached a FIN */
+	    )
+		goto restart;
+
+exit:
+	up(&tsock->sem);
+	return res ? res : sz_copied;
+}
+
+/**
+ * queue_overloaded - test if queue overload condition exists
+ * @queue_size: current size of queue
+ * @base: nominal maximum size of queue
+ * @msg: message to be added to queue
+ * 
+ * Returns 1 if queue is currently overloaded, 0 otherwise
+ */
+
+static int queue_overloaded(u32 queue_size, u32 base, struct tipc_msg *msg)
+{
+	u32 threshold;
+	u32 imp = msg_importance(msg);
+
+	if (imp == TIPC_LOW_IMPORTANCE)
+		threshold = base;
+	else if (imp == TIPC_MEDIUM_IMPORTANCE)
+		threshold = base * 2;
+	else if (imp == TIPC_HIGH_IMPORTANCE)
+		threshold = base * 100;
+	else
+		return 0;
+
+	if (msg_connected(msg))
+		threshold *= 4;
+
+	return (queue_size > threshold);
+}
+
+/** 
+ * async_disconnect - wrapper function used to disconnect port
+ * @portref: TIPC port reference (passed as pointer-sized value)
+ */
+
+static void async_disconnect(unsigned long portref)
+{
+	tipc_disconnect((u32)portref);
+}
+
+/** 
+ * dispatch - handle arriving message
+ * @tport: TIPC port that received message
+ * @buf: message
+ * 
+ * Called with port locked.  Must not take socket lock to avoid deadlock risk.
+ * 
+ * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
+ */
+
+static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
+{
+	struct tipc_msg *msg = buf_msg(buf);
+	struct tipc_sock *tsock = (struct tipc_sock *)tport->usr_handle;
+	struct socket *sock;
+	u32 recv_q_len;
+
+	/* Reject message if socket is closing */
+
+	if (!tsock)
+		return TIPC_ERR_NO_PORT;
+
+	/* Reject message if it is wrong sort of message for socket */
+
+	/*
+	 * WOULD IT BE BETTER TO JUST DISCARD THESE MESSAGES INSTEAD?
+	 * "NO PORT" ISN'T REALLY THE RIGHT ERROR CODE, AND THERE MAY
+	 * BE SECURITY IMPLICATIONS INHERENT IN REJECTING INVALID TRAFFIC
+	 */
+	sock = tsock->sk.sk_socket;
+	if (sock->state == SS_READY) {
+		if (msg_connected(msg)) {
+			msg_dbg(msg, "dispatch filter 1\n");
+			return TIPC_ERR_NO_PORT;
+		}
+	} else {
+		if (msg_mcast(msg)) {
+			msg_dbg(msg, "dispatch filter 2\n");
+			return TIPC_ERR_NO_PORT;
+		}
+		if (sock->state == SS_CONNECTED) {
+			if (!msg_connected(msg)) {
+				msg_dbg(msg, "dispatch filter 3\n");
+				return TIPC_ERR_NO_PORT;
+			}
+		}
+		else if (sock->state == SS_CONNECTING) {
+			if (!msg_connected(msg) && (msg_errcode(msg) == 0)) {
+				msg_dbg(msg, "dispatch filter 4\n");
+				return TIPC_ERR_NO_PORT;
+			}
+		} 
+		else if (sock->state == SS_LISTENING) {
+			if (msg_connected(msg) || msg_errcode(msg)) {
+				msg_dbg(msg, "dispatch filter 5\n");
+				return TIPC_ERR_NO_PORT;
+			}
+		} 
+		else if (sock->state == SS_DISCONNECTING) {
+			msg_dbg(msg, "dispatch filter 6\n");
+			return TIPC_ERR_NO_PORT;
+		}
+		else /* (sock->state == SS_UNCONNECTED) */ {
+			if (msg_connected(msg) || msg_errcode(msg)) {
+				msg_dbg(msg, "dispatch filter 7\n");
+				return TIPC_ERR_NO_PORT;
+			}
+		}
+	}
+
+	/* Reject message if there isn't room to queue it */
+
+	if (unlikely((u32)atomic_read(&tipc_queue_size) > 
+		     OVERLOAD_LIMIT_BASE)) {
+		if (queue_overloaded(atomic_read(&tipc_queue_size), 
+				     OVERLOAD_LIMIT_BASE, msg))
+			return TIPC_ERR_OVERLOAD;
+        }
+	recv_q_len = skb_queue_len(&tsock->sk.sk_receive_queue);
+	if (unlikely(recv_q_len > (OVERLOAD_LIMIT_BASE / 2))) {
+		if (queue_overloaded(recv_q_len, 
+				     OVERLOAD_LIMIT_BASE / 2, msg)) 
+			return TIPC_ERR_OVERLOAD;
+        }
+
+	/* Initiate connection termination for an incoming 'FIN' */
+
+	if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) {
+		sock->state = SS_DISCONNECTING;
+		/* Note: Use signal since port lock is already taken! */
+		tipc_k_signal((Handler)async_disconnect, tport->ref);
+	}
+
+	/* Enqueue message (finally!) */
+
+	msg_dbg(msg,"<DISP<: ");
+	TIPC_SKB_CB(buf)->handle = msg_data(msg);
+	atomic_inc(&tipc_queue_size);
+	skb_queue_tail(&sock->sk->sk_receive_queue, buf);
+
+        wake_up_interruptible(sock->sk->sk_sleep);
+	return TIPC_OK;
+}
+
+/** 
+ * wakeupdispatch - wake up port after congestion
+ * @tport: port to wakeup
+ * 
+ * Called with port lock on.
+ */
+
+static void wakeupdispatch(struct tipc_port *tport)
+{
+	struct tipc_sock *tsock = (struct tipc_sock *)tport->usr_handle;
+
+        wake_up_interruptible(tsock->sk.sk_sleep);
+}
+
+/**
+ * connect - establish a connection to another TIPC port
+ * @sock: socket structure
+ * @dest: socket address for destination port
+ * @destlen: size of socket address data structure
+ * @flags: (unused)
+ *
+ * Returns 0 on success, errno otherwise
+ */
+
+static int connect(struct socket *sock, struct sockaddr *dest, int destlen, 
+		   int flags)
+{
+   struct tipc_sock *tsock = tipc_sk(sock->sk);
+   struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
+   struct msghdr m = {0,};
+   struct sk_buff *buf;
+   struct tipc_msg *msg;
+   int res;
+
+   /* For now, TIPC does not allow use of connect() with DGRAM or RDM types */
+
+   if (sock->state == SS_READY)
+	   return -EOPNOTSUPP;
+
+   /* MOVE THE REST OF THIS ERROR CHECKING TO send_msg()? */
+   if (sock->state == SS_LISTENING)
+	   return -EOPNOTSUPP;
+   if (sock->state == SS_CONNECTING)
+	   return -EALREADY;
+   if (sock->state != SS_UNCONNECTED)
+           return -EISCONN;
+
+   if ((dst->family != AF_TIPC) ||
+       ((dst->addrtype != TIPC_ADDR_NAME) && (dst->addrtype != TIPC_ADDR_ID)))
+           return -EINVAL;
+
+   /* Send a 'SYN-' to destination */
+
+   m.msg_name = dest;
+   if ((res = send_msg(0, sock, &m, 0)) < 0) {
+	   sock->state = SS_DISCONNECTING;
+	   return res;
+   }
+
+   if (down_interruptible(&tsock->sem)) 
+           return -ERESTARTSYS;
+	
+   /* Wait for destination's 'ACK' response */
+
+   res = wait_event_interruptible_timeout(*sock->sk->sk_sleep,
+                                          skb_queue_len(&sock->sk->sk_receive_queue),
+					  sock->sk->sk_rcvtimeo);
+   buf = skb_peek(&sock->sk->sk_receive_queue);
+   if (res > 0) {
+	   msg = buf_msg(buf);
+           res = auto_connect(sock, tsock, msg);
+           if (!res) {
+		   if (dst->addrtype == TIPC_ADDR_NAME) {
+			   tsock->p->conn_type = dst->addr.name.name.type;
+			   tsock->p->conn_instance = dst->addr.name.name.instance;
+		   }
+		   if (!msg_data_sz(msg))
+			   advance_queue(tsock);
+	   }
+   } else {
+	   if (res == 0) {
+		   res = -ETIMEDOUT;
+	   } else
+	           { /* leave "res" unchanged */ }
+	   sock->state = SS_DISCONNECTING;
+   }
+
+   up(&tsock->sem);
+   return res;
+}
+
+/** 
+ * listen - allow socket to listen for incoming connections
+ * @sock: socket structure
+ * @len: (unused)
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int listen(struct socket *sock, int len)
+{
+	/* REQUIRES SOCKET LOCKING OF SOME SORT? */
+
+	if (sock->state == SS_READY)
+		return -EOPNOTSUPP;
+	if (sock->state != SS_UNCONNECTED)
+		return -EINVAL;
+	sock->state = SS_LISTENING;
+        return 0;
+}
+
+/** 
+ * accept - wait for connection request
+ * @sock: listening socket
+ * @newsock: new socket that is to be connected
+ * @flags: file-related flags associated with socket
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	struct sk_buff *buf;
+	int res = -EFAULT;
+
+	if (sock->state == SS_READY)
+		return -EOPNOTSUPP;
+	if (sock->state != SS_LISTENING)
+		return -EINVAL;
+	
+	if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) && 
+		     (flags & O_NONBLOCK)))
+		return -EWOULDBLOCK;
+
+	if (down_interruptible(&tsock->sem))
+		return -ERESTARTSYS;
+
+	if (wait_event_interruptible(*sock->sk->sk_sleep, 
+				     skb_queue_len(&sock->sk->sk_receive_queue))) {
+		res = -ERESTARTSYS;
+		goto exit;
+	}
+	buf = skb_peek(&sock->sk->sk_receive_queue);
+
+	res = tipc_create(newsock, 0);
+	if (!res) {
+		struct tipc_sock *new_tsock = tipc_sk(newsock->sk);
+		struct tipc_portid id;
+		struct tipc_msg *msg = buf_msg(buf);
+		u32 new_ref = new_tsock->p->ref;
+
+		id.ref = msg_origport(msg);
+		id.node = msg_orignode(msg);
+		tipc_connect2port(new_ref, &id);
+		newsock->state = SS_CONNECTED;
+
+		tipc_set_portimportance(new_ref, msg_importance(msg));
+		if (msg_named(msg)) {
+			new_tsock->p->conn_type = msg_nametype(msg);
+			new_tsock->p->conn_instance = msg_nameinst(msg);
+		}
+
+               /* 
+		 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
+		 * Respond to 'SYN+' by queuing it on new socket.
+		 */
+
+		msg_dbg(msg,"<ACC<: ");
+                if (!msg_data_sz(msg)) {
+                        struct msghdr m = {0,};
+
+                        send_packet(0, newsock, &m, 0);      
+                        advance_queue(tsock);
+                } else {
+			sock_lock(tsock);
+			skb_dequeue(&sock->sk->sk_receive_queue);
+			sock_unlock(tsock);
+			skb_queue_head(&newsock->sk->sk_receive_queue, buf);
+		}
+	}
+exit:
+	up(&tsock->sem);
+	return res;
+}
+
+/**
+ * shutdown - shutdown socket connection
+ * @sock: socket structure
+ * @how: direction to close (always treated as read + write)
+ *
+ * Terminates connection (if necessary), then purges socket's receive queue.
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int shutdown(struct socket *sock, int how)
+{
+	struct tipc_sock* tsock = tipc_sk(sock->sk);
+	struct sk_buff *buf;
+	int res;
+
+	/* Could return -EINVAL for an invalid "how", but why bother? */
+
+	if (down_interruptible(&tsock->sem))
+		return -ERESTARTSYS;
+
+	sock_lock(tsock);
+
+	switch (sock->state) {
+	case SS_CONNECTED:
+
+		/* Send 'FIN+' or 'FIN-' message to peer */
+
+		sock_unlock(tsock);
+restart:
+		if ((buf = skb_dequeue(&sock->sk->sk_receive_queue))) {
+			atomic_dec(&tipc_queue_size);
+			if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) {
+				buf_discard(buf);
+				goto restart;
+			}
+			tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
+		}
+		else {
+			tipc_shutdown(tsock->p->ref);
+		}
+		sock_lock(tsock);
+
+		/* fall through */
+
+	case SS_DISCONNECTING:
+
+		/* Discard any unreceived messages */
+
+		while ((buf = skb_dequeue(&sock->sk->sk_receive_queue))) {
+			atomic_dec(&tipc_queue_size);
+			buf_discard(buf);
+		}
+		tsock->p->conn_unacked = 0;
+
+		/* fall through */
+
+	case SS_CONNECTING:
+		sock->state = SS_DISCONNECTING;
+		res = 0;
+		break;
+
+	default:
+		res = -ENOTCONN;
+	}
+
+	sock_unlock(tsock);
+
+	up(&tsock->sem);
+	return res;
+}
+
+/**
+ * setsockopt - set socket option
+ * @sock: socket structure
+ * @lvl: option level
+ * @opt: option identifier
+ * @ov: pointer to new option value
+ * @ol: length of option value
+ * 
+ * For stream sockets only, accepts and ignores all IPPROTO_TCP options 
+ * (to ease compatibility).
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+	u32 value;
+	int res;
+
+        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
+                return 0;
+	if (lvl != SOL_TIPC)
+		return -ENOPROTOOPT;
+	if (ol < sizeof(value))
+		return -EINVAL;
+        if ((res = get_user(value, (u32 *)ov)))
+		return res;
+
+	if (down_interruptible(&tsock->sem)) 
+		return -ERESTARTSYS;
+	
+	switch (opt) {
+	case TIPC_IMPORTANCE:
+		res = tipc_set_portimportance(tsock->p->ref, value);
+		break;
+	case TIPC_SRC_DROPPABLE:
+		if (sock->type != SOCK_STREAM)
+			res = tipc_set_portunreliable(tsock->p->ref, value);
+		else 
+			res = -ENOPROTOOPT;
+		break;
+	case TIPC_DEST_DROPPABLE:
+		res = tipc_set_portunreturnable(tsock->p->ref, value);
+		break;
+	case TIPC_CONN_TIMEOUT:
+		sock->sk->sk_rcvtimeo = (value * HZ / 1000);
+		break;
+	default:
+		res = -EINVAL;
+	}
+
+	up(&tsock->sem);
+	return res;
+}
+
+/**
+ * getsockopt - get socket option
+ * @sock: socket structure
+ * @lvl: option level
+ * @opt: option identifier
+ * @ov: receptacle for option value
+ * @ol: receptacle for length of option value
+ * 
+ * For stream sockets only, returns 0 length result for all IPPROTO_TCP options 
+ * (to ease compatibility).
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+
+static int getsockopt(struct socket *sock, int lvl, int opt, char *ov, int *ol)
+{
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
+        int len;
+	u32 value;
+        int res;
+
+        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
+                return put_user(0, ol);
+	if (lvl != SOL_TIPC)
+		return -ENOPROTOOPT;
+        if ((res = get_user(len, ol)))
+                return res;
+
+	if (down_interruptible(&tsock->sem)) 
+		return -ERESTARTSYS;
+
+	switch (opt) {
+	case TIPC_IMPORTANCE:
+		res = tipc_portimportance(tsock->p->ref, &value);
+		break;
+	case TIPC_SRC_DROPPABLE:
+		res = tipc_portunreliable(tsock->p->ref, &value);
+		break;
+	case TIPC_DEST_DROPPABLE:
+		res = tipc_portunreturnable(tsock->p->ref, &value);
+		break;
+	case TIPC_CONN_TIMEOUT:
+		value = (sock->sk->sk_rcvtimeo * 1000) / HZ;
+		break;
+	default:
+		res = -EINVAL;
+	}
+
+	if (res) {
+		/* "get" failed */
+	}
+	else if (len < sizeof(value)) {
+		res = -EINVAL;
+	}
+	else if ((res = copy_to_user(ov, &value, sizeof(value)))) {
+		/* couldn't return value */
+	}
+	else {
+		res = put_user(sizeof(value), ol);
+	}
+
+        up(&tsock->sem);
+	return res;
+}
+
+/**
+ * Placeholders for non-implemented functionality
+ * 
+ * Returns error code (POSIX-compliant where defined)
+ */
+
+static int ioctl(struct socket *s, u32 cmd, unsigned long arg)
+{
+        return -EINVAL;
+}
+
+static int no_mmap(struct file *file, struct socket *sock,
+                   struct vm_area_struct *vma)
+{
+        return -EINVAL;
+}
+static ssize_t no_sendpage(struct socket *sock, struct page *page,
+                           int offset, size_t size, int flags)
+{
+        return -EINVAL;
+}
+
+static int no_skpair(struct socket *s1, struct socket *s2)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ * Protocol switches for the various types of TIPC sockets
+ */
+
+static struct proto_ops msg_ops = {
+	.owner 		= THIS_MODULE,
+	.family		= AF_TIPC,
+	.release	= release,
+	.bind		= bind,
+	.connect	= connect,
+	.socketpair	= no_skpair,
+	.accept		= accept,
+	.getname	= get_name,
+	.poll		= poll,
+	.ioctl		= ioctl,
+	.listen		= listen,
+	.shutdown	= shutdown,
+	.setsockopt	= setsockopt,
+	.getsockopt	= getsockopt,
+	.sendmsg	= send_msg,
+	.recvmsg	= recv_msg,
+        .mmap		= no_mmap,
+        .sendpage	= no_sendpage
+};
+
+static struct proto_ops packet_ops = {
+	.owner 		= THIS_MODULE,
+	.family		= AF_TIPC,
+	.release	= release,
+	.bind		= bind,
+	.connect	= connect,
+	.socketpair	= no_skpair,
+	.accept		= accept,
+	.getname	= get_name,
+	.poll		= poll,
+	.ioctl		= ioctl,
+	.listen		= listen,
+	.shutdown	= shutdown,
+	.setsockopt	= setsockopt,
+	.getsockopt	= getsockopt,
+	.sendmsg	= send_packet,
+	.recvmsg	= recv_msg,
+        .mmap		= no_mmap,
+        .sendpage	= no_sendpage
+};
+
+static struct proto_ops stream_ops = {
+	.owner 		= THIS_MODULE,
+	.family		= AF_TIPC,
+	.release	= release,
+	.bind		= bind,
+	.connect	= connect,
+	.socketpair	= no_skpair,
+	.accept		= accept,
+	.getname	= get_name,
+	.poll		= poll,
+	.ioctl		= ioctl,
+	.listen		= listen,
+	.shutdown	= shutdown,
+	.setsockopt	= setsockopt,
+	.getsockopt	= getsockopt,
+	.sendmsg	= send_stream,
+	.recvmsg	= recv_stream,
+        .mmap		= no_mmap,
+        .sendpage	= no_sendpage
+};
+
+static struct net_proto_family tipc_family_ops = {
+	.owner 		= THIS_MODULE,
+	.family		= AF_TIPC,
+	.create		= tipc_create
+};
+
+static struct proto tipc_proto = {
+	.name		= "TIPC",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct tipc_sock)
+};
+
+/**
+ * tipc_socket_init - initialize TIPC socket interface
+ * 
+ * Returns 0 on success, errno otherwise
+ */
+int tipc_socket_init(void)
+{
+	int res;
+
+        res = proto_register(&tipc_proto, 1);
+	if (res) {
+		err("Failed to register TIPC protocol type\n");
+		goto out;
+	}
+
+	res = sock_register(&tipc_family_ops);
+	if (res) {
+		err("Failed to register TIPC socket type\n");
+		proto_unregister(&tipc_proto);
+		goto out;
+	}
+
+	sockets_enabled = 1;
+ out:
+	return res;
+}
+
+/**
+ * tipc_socket_stop - stop TIPC socket interface
+ */
+void tipc_socket_stop(void)
+{
+	if (!sockets_enabled)
+		return;
+
+	sockets_enabled = 0;
+	sock_unregister(tipc_family_ops.family);
+	proto_unregister(&tipc_proto);
+}
+
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
new file mode 100644
index 00000000000..5ff38b9f319
--- /dev/null
+++ b/net/tipc/subscr.c
@@ -0,0 +1,527 @@
+/*
+ * net/tipc/subscr.c: TIPC subscription service
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "dbg.h"
+#include "subscr.h"
+#include "name_table.h"
+#include "ref.h"
+
+/**
+ * struct subscriber - TIPC network topology subscriber
+ * @ref: object reference to subscriber object itself
+ * @lock: pointer to spinlock controlling access to subscriber object
+ * @subscriber_list: adjacent subscribers in top. server's list of subscribers
+ * @subscription_list: list of subscription objects for this subscriber
+ * @port_ref: object reference to port used to communicate with subscriber
+ * @swap: indicates if subscriber uses opposite endianness in its messages
+ */
+ 
+struct subscriber {
+	u32 ref;
+        spinlock_t *lock;
+	struct list_head subscriber_list;
+	struct list_head subscription_list;
+	u32 port_ref;
+	int swap;
+};
+
+/**
+ * struct top_srv - TIPC network topology subscription service
+ * @user_ref: TIPC userid of subscription service
+ * @setup_port: reference to TIPC port that handles subscription requests
+ * @subscription_count: number of active subscriptions (not subscribers!)
+ * @subscriber_list: list of ports subscribing to service
+ * @lock: spinlock govering access to subscriber list
+ */
+
+struct top_srv {
+	u32 user_ref;
+	u32 setup_port;
+	atomic_t subscription_count;
+	struct list_head subscriber_list;
+	spinlock_t lock;
+};
+
+static struct top_srv topsrv = { 0 };
+
+/**
+ * htohl - convert value to endianness used by destination
+ * @in: value to convert
+ * @swap: non-zero if endianness must be reversed
+ * 
+ * Returns converted value
+ */
+
+static inline u32 htohl(u32 in, int swap)
+{
+	char *c = (char *)&in;
+
+	return swap ? ((c[3] << 3) + (c[2] << 2) + (c[1] << 1) + c[0]) : in;
+}
+
+/**
+ * subscr_send_event - send a message containing a tipc_event to the subscriber
+ */
+
+static void subscr_send_event(struct subscription *sub, 
+			      u32 found_lower, 
+			      u32 found_upper,
+			      u32 event, 
+			      u32 port_ref, 
+			      u32 node)
+{
+	struct iovec msg_sect;
+
+	msg_sect.iov_base = (void *)&sub->evt;
+	msg_sect.iov_len = sizeof(struct tipc_event);
+
+	sub->evt.event = htohl(event, sub->owner->swap);
+	sub->evt.found_lower = htohl(found_lower, sub->owner->swap);
+	sub->evt.found_upper = htohl(found_upper, sub->owner->swap);
+	sub->evt.port.ref = htohl(port_ref, sub->owner->swap);
+	sub->evt.port.node = htohl(node, sub->owner->swap);
+	tipc_send(sub->owner->port_ref, 1, &msg_sect);
+}
+
+/**
+ * tipc_subscr_overlap - test for subscription overlap with the given values
+ *
+ * Returns 1 if there is overlap, otherwise 0.
+ */
+
+int tipc_subscr_overlap(struct subscription *sub, 
+			u32 found_lower, 
+			u32 found_upper)
+
+{
+	if (found_lower < sub->seq.lower)
+		found_lower = sub->seq.lower;
+	if (found_upper > sub->seq.upper)
+		found_upper = sub->seq.upper;
+	if (found_lower > found_upper)
+		return 0;
+	return 1;
+}
+
+/**
+ * tipc_subscr_report_overlap - issue event if there is subscription overlap
+ * 
+ * Protected by nameseq.lock in name_table.c
+ */
+
+void tipc_subscr_report_overlap(struct subscription *sub, 
+				u32 found_lower, 
+				u32 found_upper,
+				u32 event, 
+				u32 port_ref, 
+				u32 node,
+				int must)
+{
+	dbg("Rep overlap %u:%u,%u<->%u,%u\n", sub->seq.type, sub->seq.lower,
+	    sub->seq.upper, found_lower, found_upper);
+	if (!tipc_subscr_overlap(sub, found_lower, found_upper))
+		return;
+	if (!must && (sub->filter != TIPC_SUB_PORTS))
+		return;
+	subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
+}
+
+/**
+ * subscr_timeout - subscription timeout has occurred
+ */
+
+static void subscr_timeout(struct subscription *sub)
+{
+	struct subscriber *subscriber;
+	u32 subscriber_ref;
+
+	/* Validate subscriber reference (in case subscriber is terminating) */
+
+	subscriber_ref = sub->owner->ref;
+	subscriber = (struct subscriber *)tipc_ref_lock(subscriber_ref);
+	if (subscriber == NULL)
+		return;
+
+	/* Unlink subscription from name table */
+
+	tipc_nametbl_unsubscribe(sub);
+
+	/* Notify subscriber of timeout, then unlink subscription */
+
+	subscr_send_event(sub, 
+			  sub->evt.s.seq.lower, 
+			  sub->evt.s.seq.upper,
+			  TIPC_SUBSCR_TIMEOUT, 
+			  0, 
+			  0);
+	list_del(&sub->subscription_list);
+
+	/* Now destroy subscription */
+
+	tipc_ref_unlock(subscriber_ref);
+	k_term_timer(&sub->timer);
+	kfree(sub);
+	atomic_dec(&topsrv.subscription_count);
+}
+
+/**
+ * subscr_terminate - terminate communication with a subscriber
+ * 
+ * Called with subscriber locked.  Routine must temporarily release this lock
+ * to enable subscription timeout routine(s) to finish without deadlocking; 
+ * the lock is then reclaimed to allow caller to release it upon return.
+ * (This should work even in the unlikely event some other thread creates 
+ * a new object reference in the interim that uses this lock; this routine will
+ * simply wait for it to be released, then claim it.)
+ */
+
+static void subscr_terminate(struct subscriber *subscriber)
+{
+	struct subscription *sub;
+	struct subscription *sub_temp;
+
+	/* Invalidate subscriber reference */
+
+	tipc_ref_discard(subscriber->ref);
+	spin_unlock_bh(subscriber->lock);
+
+	/* Destroy any existing subscriptions for subscriber */
+	
+	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
+				 subscription_list) {
+		if (sub->timeout != TIPC_WAIT_FOREVER) {
+			k_cancel_timer(&sub->timer);
+			k_term_timer(&sub->timer);
+		}
+		tipc_nametbl_unsubscribe(sub);
+		list_del(&sub->subscription_list);
+		dbg("Term: Removed sub %u,%u,%u from subscriber %x list\n",
+		    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
+		kfree(sub);
+		atomic_dec(&topsrv.subscription_count);
+	}
+
+	/* Sever connection to subscriber */
+
+	tipc_shutdown(subscriber->port_ref);
+	tipc_deleteport(subscriber->port_ref);
+
+	/* Remove subscriber from topology server's subscriber list */
+
+	spin_lock_bh(&topsrv.lock);
+	list_del(&subscriber->subscriber_list);
+	spin_unlock_bh(&topsrv.lock);
+
+	/* Now destroy subscriber */
+
+	spin_lock_bh(subscriber->lock);
+	kfree(subscriber);
+}
+
+/**
+ * subscr_subscribe - create subscription for subscriber
+ * 
+ * Called with subscriber locked
+ */
+
+static void subscr_subscribe(struct tipc_subscr *s,
+			     struct subscriber *subscriber)
+{
+	struct subscription *sub;
+
+	/* Refuse subscription if global limit exceeded */
+
+	if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
+		warn("Failed: max %u subscriptions\n", tipc_max_subscriptions);
+		subscr_terminate(subscriber);
+		return;
+	}
+
+	/* Allocate subscription object */
+
+	sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
+	if (sub == NULL) {
+		warn("Memory squeeze; ignoring subscription\n");
+		subscr_terminate(subscriber);
+		return;
+	}
+
+	/* Determine/update subscriber's endianness */
+
+	if ((s->filter == TIPC_SUB_PORTS) || (s->filter == TIPC_SUB_SERVICE))
+		subscriber->swap = 0;
+	else
+		subscriber->swap = 1;
+
+	/* Initialize subscription object */
+
+	memset(sub, 0, sizeof(*sub));
+	sub->seq.type = htohl(s->seq.type, subscriber->swap);
+	sub->seq.lower = htohl(s->seq.lower, subscriber->swap);
+	sub->seq.upper = htohl(s->seq.upper, subscriber->swap);
+	sub->timeout = htohl(s->timeout, subscriber->swap);
+	sub->filter = htohl(s->filter, subscriber->swap);
+	if ((((sub->filter != TIPC_SUB_PORTS) 
+	      && (sub->filter != TIPC_SUB_SERVICE)))
+	    || (sub->seq.lower > sub->seq.upper)) {
+		warn("Rejecting illegal subscription %u,%u,%u\n",
+		     sub->seq.type, sub->seq.lower, sub->seq.upper);
+		kfree(sub);
+		subscr_terminate(subscriber);
+		return;
+	}
+	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
+	INIT_LIST_HEAD(&sub->subscription_list);
+	INIT_LIST_HEAD(&sub->nameseq_list);
+	list_add(&sub->subscription_list, &subscriber->subscription_list);
+	atomic_inc(&topsrv.subscription_count);
+	if (sub->timeout != TIPC_WAIT_FOREVER) {
+		k_init_timer(&sub->timer,
+			     (Handler)subscr_timeout, (unsigned long)sub);
+		k_start_timer(&sub->timer, sub->timeout);
+	}
+	sub->owner = subscriber;
+	tipc_nametbl_subscribe(sub);
+}
+
+/**
+ * subscr_conn_shutdown_event - handle termination request from subscriber
+ */
+
+static void subscr_conn_shutdown_event(void *usr_handle,
+				       u32 portref,
+				       struct sk_buff **buf,
+				       unsigned char const *data,
+				       unsigned int size,
+				       int reason)
+{
+	struct subscriber *subscriber;
+	spinlock_t *subscriber_lock;
+
+	subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle);
+	if (subscriber == NULL)
+		return;
+
+	subscriber_lock = subscriber->lock;
+	subscr_terminate(subscriber);
+	spin_unlock_bh(subscriber_lock);
+}
+
+/**
+ * subscr_conn_msg_event - handle new subscription request from subscriber
+ */
+
+static void subscr_conn_msg_event(void *usr_handle,
+				  u32 port_ref,
+				  struct sk_buff **buf,
+				  const unchar *data,
+				  u32 size)
+{
+	struct subscriber *subscriber;
+	spinlock_t *subscriber_lock;
+
+	subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle);
+	if (subscriber == NULL)
+		return;
+
+	subscriber_lock = subscriber->lock;
+	if (size != sizeof(struct tipc_subscr))
+		subscr_terminate(subscriber);
+	else
+		subscr_subscribe((struct tipc_subscr *)data, subscriber);
+	
+	spin_unlock_bh(subscriber_lock);
+}
+
+/**
+ * subscr_named_msg_event - handle request to establish a new subscriber
+ */
+
+static void subscr_named_msg_event(void *usr_handle,
+				   u32 port_ref,
+				   struct sk_buff **buf,
+				   const unchar *data,
+				   u32 size,
+				   u32 importance, 
+				   struct tipc_portid const *orig,
+				   struct tipc_name_seq const *dest)
+{
+	struct subscriber *subscriber;
+	struct iovec msg_sect = {0, 0};
+	spinlock_t *subscriber_lock;
+
+	dbg("subscr_named_msg_event: orig = %x own = %x,\n",
+	    orig->node, tipc_own_addr);
+	if (size && (size != sizeof(struct tipc_subscr))) {
+		warn("Received tipc_subscr of invalid size\n");
+		return;
+	}
+
+	/* Create subscriber object */
+
+	subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC);
+	if (subscriber == NULL) {
+		warn("Memory squeeze; ignoring subscriber setup\n");
+		return;
+	}
+	memset(subscriber, 0, sizeof(struct subscriber));
+	INIT_LIST_HEAD(&subscriber->subscription_list);
+	INIT_LIST_HEAD(&subscriber->subscriber_list);
+	subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock);
+	if (subscriber->ref == 0) {
+		warn("Failed to acquire subscriber reference\n");
+		kfree(subscriber);
+		return;
+	}
+
+	/* Establish a connection to subscriber */
+
+	tipc_createport(topsrv.user_ref,
+			(void *)(unsigned long)subscriber->ref,
+			importance,
+			0,
+			0,
+			subscr_conn_shutdown_event,
+			0,
+			0,
+			subscr_conn_msg_event,
+			0,
+			&subscriber->port_ref);
+	if (subscriber->port_ref == 0) {
+		warn("Memory squeeze; failed to create subscription port\n");
+		tipc_ref_discard(subscriber->ref);
+		kfree(subscriber);
+		return;
+	}
+	tipc_connect2port(subscriber->port_ref, orig);
+
+
+	/* Add subscriber to topology server's subscriber list */
+
+	tipc_ref_lock(subscriber->ref);
+	spin_lock_bh(&topsrv.lock);
+	list_add(&subscriber->subscriber_list, &topsrv.subscriber_list);
+	spin_unlock_bh(&topsrv.lock);
+
+	/*
+	 * Subscribe now if message contains a subscription,
+	 * otherwise send an empty response to complete connection handshaking
+	 */
+
+	subscriber_lock = subscriber->lock;
+	if (size)
+		subscr_subscribe((struct tipc_subscr *)data, subscriber);
+	else
+		tipc_send(subscriber->port_ref, 1, &msg_sect);
+
+	spin_unlock_bh(subscriber_lock);
+}
+
+int tipc_subscr_start(void)
+{
+	struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV};
+	int res = -1;
+
+	memset(&topsrv, 0, sizeof (topsrv));
+	topsrv.lock = SPIN_LOCK_UNLOCKED;
+	INIT_LIST_HEAD(&topsrv.subscriber_list);
+
+	spin_lock_bh(&topsrv.lock);
+	res = tipc_attach(&topsrv.user_ref, 0, 0);
+	if (res) {
+		spin_unlock_bh(&topsrv.lock);
+		return res;
+	}
+
+ 	res = tipc_createport(topsrv.user_ref,
+ 			      0,
+ 			      TIPC_CRITICAL_IMPORTANCE,
+ 			      0,
+ 			      0,
+ 			      0,
+ 			      0,
+ 			      subscr_named_msg_event,
+ 			      0,
+ 			      0,
+ 			      &topsrv.setup_port);
+ 	if (res)
+		goto failed;
+
+ 	res = tipc_nametbl_publish_rsv(topsrv.setup_port, TIPC_NODE_SCOPE, &seq);
+ 	if (res)
+		goto failed;
+
+	spin_unlock_bh(&topsrv.lock);
+	return 0;
+
+failed:
+	err("Failed to create subscription service\n");
+	tipc_detach(topsrv.user_ref);
+	topsrv.user_ref = 0;
+	spin_unlock_bh(&topsrv.lock);
+	return res;
+}
+
+void tipc_subscr_stop(void)
+{
+	struct subscriber *subscriber;
+	struct subscriber *subscriber_temp;
+	spinlock_t *subscriber_lock;
+
+	if (topsrv.user_ref) {
+		tipc_deleteport(topsrv.setup_port);
+		list_for_each_entry_safe(subscriber, subscriber_temp, 
+					 &topsrv.subscriber_list,
+					 subscriber_list) {
+			tipc_ref_lock(subscriber->ref);
+			subscriber_lock = subscriber->lock;
+			subscr_terminate(subscriber);
+			spin_unlock_bh(subscriber_lock);
+		}
+		tipc_detach(topsrv.user_ref);
+		topsrv.user_ref = 0;
+	}
+}
+
+
+int tipc_ispublished(struct tipc_name const *name)
+{
+	u32 domain = 0;
+
+	return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0);
+}
+
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
new file mode 100644
index 00000000000..1e5090465d2
--- /dev/null
+++ b/net/tipc/subscr.h
@@ -0,0 +1,80 @@
+/*
+ * net/tipc/subscr.h: Include file for TIPC subscription service
+ * 
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_SUBSCR_H
+#define _TIPC_SUBSCR_H
+
+/**
+ * struct subscription - TIPC network topology subscription object
+ * @seq: name sequence associated with subscription
+ * @timeout: duration of subscription (in ms)
+ * @filter: event filtering to be done for subscription
+ * @evt: template for events generated by subscription
+ * @subscription_list: adjacent subscriptions in subscriber's subscription list
+ * @nameseq_list: adjacent subscriptions in name sequence's subscription list
+ * @timer_ref: reference to timer governing subscription duration (may be NULL)
+ * @owner: pointer to subscriber object associated with this subscription
+ */
+ 
+struct subscription {
+	struct tipc_name_seq seq;
+	u32 timeout;
+	u32 filter;
+	struct tipc_event evt;
+	struct list_head subscription_list;
+	struct list_head nameseq_list;
+	struct timer_list timer;
+	struct subscriber *owner;
+};
+
+int tipc_subscr_overlap(struct subscription * sub, 
+			u32 found_lower, 
+			u32 found_upper);
+
+void tipc_subscr_report_overlap(struct subscription * sub, 
+				u32 found_lower, 
+				u32 found_upper,
+				u32 event, 
+				u32 port_ref, 
+				u32 node,
+				int must_report);
+
+int tipc_subscr_start(void);
+
+void tipc_subscr_stop(void);
+
+
+#endif
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
new file mode 100644
index 00000000000..106200d7658
--- /dev/null
+++ b/net/tipc/user_reg.c
@@ -0,0 +1,265 @@
+/*
+ * net/tipc/user_reg.c: TIPC user registry code
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2004-2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "user_reg.h"
+
+/*
+ * TIPC user registry keeps track of users of the tipc_port interface.
+ *
+ * The registry utilizes an array of "TIPC user" entries; 
+ * a user's ID is the index of their associated array entry.
+ * Array entry 0 is not used, so userid 0 is not valid;
+ * TIPC sometimes uses this value to denote an anonymous user.
+ * The list of free entries is initially chained from last entry to entry 1.
+ */
+
+/**
+ * struct tipc_user - registered TIPC user info
+ * @next: index of next free registry entry (or -1 for an allocated entry)
+ * @callback: ptr to routine to call when TIPC mode changes (NULL if none)
+ * @usr_handle: user-defined value passed to callback routine 
+ * @ports: list of user ports owned by the user
+ */
+
+struct tipc_user {
+	int next;
+	tipc_mode_event callback;
+	void *usr_handle;
+	struct list_head ports;
+};
+
+#define MAX_USERID 64
+#define USER_LIST_SIZE ((MAX_USERID + 1) * sizeof(struct tipc_user))
+
+static struct tipc_user *users = 0;
+static u32 next_free_user = MAX_USERID + 1;
+static spinlock_t reg_lock = SPIN_LOCK_UNLOCKED;
+
+/**
+ * reg_init - create TIPC user registry (but don't activate it)
+ * 
+ * If registry has been pre-initialized it is left "as is".
+ * NOTE: This routine may be called when TIPC is inactive.
+ */
+
+static int reg_init(void)
+{
+	u32 i;
+	
+	spin_lock_bh(&reg_lock);
+	if (!users) {
+		users = (struct tipc_user *)kmalloc(USER_LIST_SIZE, GFP_ATOMIC);
+		if (users) {
+			memset(users, 0, USER_LIST_SIZE);
+			for (i = 1; i <= MAX_USERID; i++) {
+				users[i].next = i - 1;
+			}
+			next_free_user = MAX_USERID;
+		}
+	}
+	spin_unlock_bh(&reg_lock);
+	return users ? TIPC_OK : -ENOMEM;
+}
+
+/**
+ * reg_callback - inform TIPC user about current operating mode
+ */
+
+static void reg_callback(struct tipc_user *user_ptr)
+{
+	tipc_mode_event cb;
+	void *arg;
+
+	spin_lock_bh(&reg_lock);
+	cb = user_ptr->callback;
+	arg = user_ptr->usr_handle;
+	spin_unlock_bh(&reg_lock);
+
+	if (cb)
+		cb(arg, tipc_mode, tipc_own_addr);
+}
+
+/**
+ * tipc_reg_start - activate TIPC user registry
+ */
+
+int tipc_reg_start(void)
+{
+	u32 u;
+	int res;
+
+	if ((res = reg_init()))
+		return res;
+
+	for (u = 1; u <= MAX_USERID; u++) {
+		if (users[u].callback)
+			tipc_k_signal((Handler)reg_callback,
+				      (unsigned long)&users[u]);
+	}
+	return TIPC_OK;
+}
+
+/**
+ * tipc_reg_stop - shut down & delete TIPC user registry
+ */
+
+void tipc_reg_stop(void)
+{               
+	int id;
+
+	if (!users)
+		return;
+
+	for (id = 1; id <= MAX_USERID; id++) {
+		if (users[id].callback)
+			reg_callback(&users[id]);
+	}
+	kfree(users);
+	users = 0;
+}
+
+/**
+ * tipc_attach - register a TIPC user
+ *
+ * NOTE: This routine may be called when TIPC is inactive.
+ */
+
+int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle)
+{
+	struct tipc_user *user_ptr;
+
+	if ((tipc_mode == TIPC_NOT_RUNNING) && !cb)
+		return -ENOPROTOOPT;
+	if (!users)
+		reg_init();
+
+	spin_lock_bh(&reg_lock);
+	if (!next_free_user) {
+		spin_unlock_bh(&reg_lock);
+		return -EBUSY;
+	}
+	user_ptr = &users[next_free_user];
+	*userid = next_free_user;
+	next_free_user = user_ptr->next;
+	user_ptr->next = -1; 
+	spin_unlock_bh(&reg_lock);
+
+	user_ptr->callback = cb;
+	user_ptr->usr_handle = usr_handle;
+	INIT_LIST_HEAD(&user_ptr->ports);
+	atomic_inc(&tipc_user_count);
+	
+	if (cb && (tipc_mode != TIPC_NOT_RUNNING))
+		tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr);
+	return TIPC_OK;
+}
+
+/**
+ * tipc_detach - deregister a TIPC user
+ */
+
+void tipc_detach(u32 userid)
+{
+	struct tipc_user *user_ptr;
+	struct list_head ports_temp;
+	struct user_port *up_ptr, *temp_up_ptr;
+
+	if ((userid == 0) || (userid > MAX_USERID))
+		return;
+
+	spin_lock_bh(&reg_lock);
+	if ((!users) || (users[userid].next >= 0)) {
+		spin_unlock_bh(&reg_lock);
+		return;
+	}
+
+	user_ptr = &users[userid];
+        user_ptr->callback = NULL;              
+	INIT_LIST_HEAD(&ports_temp);
+        list_splice(&user_ptr->ports, &ports_temp);
+	user_ptr->next = next_free_user;
+	next_free_user = userid;
+	spin_unlock_bh(&reg_lock);
+
+	atomic_dec(&tipc_user_count);
+
+        list_for_each_entry_safe(up_ptr, temp_up_ptr, &ports_temp, uport_list) {
+		tipc_deleteport(up_ptr->ref);
+	}
+}
+
+/**
+ * tipc_reg_add_port - register a user's driver port
+ */
+
+int tipc_reg_add_port(struct user_port *up_ptr)
+{
+	struct tipc_user *user_ptr;
+
+	if (up_ptr->user_ref == 0)
+		return TIPC_OK;
+	if (up_ptr->user_ref > MAX_USERID)
+		return -EINVAL;
+	if ((tipc_mode == TIPC_NOT_RUNNING) || !users )
+		return -ENOPROTOOPT;
+
+	spin_lock_bh(&reg_lock);
+	user_ptr = &users[up_ptr->user_ref];
+	list_add(&up_ptr->uport_list, &user_ptr->ports);
+	spin_unlock_bh(&reg_lock);
+	return TIPC_OK;
+}
+
+/**
+ * tipc_reg_remove_port - deregister a user's driver port
+ */
+
+int tipc_reg_remove_port(struct user_port *up_ptr)
+{
+	if (up_ptr->user_ref == 0)
+		return TIPC_OK;
+	if (up_ptr->user_ref > MAX_USERID)
+		return -EINVAL;
+	if (!users )
+		return -ENOPROTOOPT;
+
+	spin_lock_bh(&reg_lock);
+	list_del_init(&up_ptr->uport_list);
+	spin_unlock_bh(&reg_lock);
+	return TIPC_OK;
+}
+
diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h
new file mode 100644
index 00000000000..d0e88794ed1
--- /dev/null
+++ b/net/tipc/user_reg.h
@@ -0,0 +1,48 @@
+/*
+ * net/tipc/user_reg.h: Include file for TIPC user registry code
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_USER_REG_H
+#define _TIPC_USER_REG_H
+
+#include "port.h"
+
+int tipc_reg_start(void);
+void tipc_reg_stop(void);
+
+int tipc_reg_add_port(struct user_port *up_ptr);
+int tipc_reg_remove_port(struct user_port *up_ptr);
+
+#endif
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
new file mode 100644
index 00000000000..7c11f7f83a2
--- /dev/null
+++ b/net/tipc/zone.c
@@ -0,0 +1,169 @@
+/*
+ * net/tipc/zone.c: TIPC zone management routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "zone.h"
+#include "net.h"
+#include "addr.h"
+#include "node_subscr.h"
+#include "cluster.h"
+#include "node.h"
+
+struct _zone *tipc_zone_create(u32 addr)
+{
+	struct _zone *z_ptr = 0;
+	u32 z_num;
+
+	if (!tipc_addr_domain_valid(addr))
+		return 0;
+
+	z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC);
+	if (z_ptr != NULL) {
+		memset(z_ptr, 0, sizeof(*z_ptr));
+		z_num = tipc_zone(addr);
+		z_ptr->addr = tipc_addr(z_num, 0, 0);
+		tipc_net.zones[z_num] = z_ptr;
+	}
+	return z_ptr;
+}
+
+void tipc_zone_delete(struct _zone *z_ptr)
+{
+	u32 c_num;
+
+	if (!z_ptr)
+		return;
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		tipc_cltr_delete(z_ptr->clusters[c_num]);
+	}
+	kfree(z_ptr);
+}
+
+void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr)
+{
+	u32 c_num = tipc_cluster(c_ptr->addr);
+
+	assert(c_ptr->addr);
+	assert(c_num <= tipc_max_clusters);
+	assert(z_ptr->clusters[c_num] == 0);
+	z_ptr->clusters[c_num] = c_ptr;
+}
+
+void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router)
+{
+	u32 c_num;
+
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		if (z_ptr->clusters[c_num]) {
+			tipc_cltr_remove_as_router(z_ptr->clusters[c_num], 
+						   router);
+		}
+	}
+}
+
+void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest)
+{
+	u32 c_num;
+
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		if (z_ptr->clusters[c_num]) {
+			if (in_own_cluster(z_ptr->addr))
+				continue;
+			tipc_cltr_send_ext_routes(z_ptr->clusters[c_num], dest);
+		}
+	}
+}
+
+struct node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref)
+{
+	struct cluster *c_ptr;
+	struct node *n_ptr;
+	u32 c_num;
+
+	if (!z_ptr)
+		return 0;
+	c_ptr = z_ptr->clusters[tipc_cluster(addr)];
+	if (!c_ptr)
+		return 0;
+	n_ptr = tipc_cltr_select_node(c_ptr, ref);
+	if (n_ptr)
+		return n_ptr;
+
+	/* Links to any other clusters within this zone ? */
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		c_ptr = z_ptr->clusters[c_num];
+		if (!c_ptr)
+			return 0;
+		n_ptr = tipc_cltr_select_node(c_ptr, ref);
+		if (n_ptr)
+			return n_ptr;
+	}
+	return 0;
+}
+
+u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref)
+{
+	struct cluster *c_ptr;
+	u32 c_num;
+	u32 router;
+
+	if (!z_ptr)
+		return 0;
+	c_ptr = z_ptr->clusters[tipc_cluster(addr)];
+	router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0;
+	if (router)
+		return router;
+
+	/* Links to any other clusters within the zone? */
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		c_ptr = z_ptr->clusters[c_num];
+		router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0;
+		if (router)
+			return router;
+	}
+	return 0;
+}
+
+
+u32 tipc_zone_next_node(u32 addr)
+{
+	struct cluster *c_ptr = tipc_cltr_find(addr);
+
+	if (c_ptr)
+		return tipc_cltr_next_node(c_ptr, addr);
+	return 0;
+}
+
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
new file mode 100644
index 00000000000..267999c5a24
--- /dev/null
+++ b/net/tipc/zone.h
@@ -0,0 +1,71 @@
+/*
+ * net/tipc/zone.h: Include file for TIPC zone management routines
+ * 
+ * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2005, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_ZONE_H
+#define _TIPC_ZONE_H
+
+#include "node_subscr.h"
+#include "net.h"
+
+
+/**
+ * struct _zone - TIPC zone structure
+ * @addr: network address of zone
+ * @clusters: array of pointers to all clusters within zone
+ * @links: (used for inter-zone communication)
+ */
+ 
+struct _zone {
+	u32 addr;
+	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
+	u32 links;
+};
+
+struct node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref);
+u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref);
+void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router);
+void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
+struct _zone *tipc_zone_create(u32 addr);
+void tipc_zone_delete(struct _zone *z_ptr);
+void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr);
+u32 tipc_zone_next_node(u32 addr);
+
+static inline struct _zone *tipc_zone_find(u32 addr)
+{
+	return tipc_net.zones[tipc_zone(addr)];
+}
+
+#endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index acc73ba8bad..1b5989b1b67 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -121,7 +121,7 @@
 int sysctl_unix_max_dgram_qlen = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
-DEFINE_RWLOCK(unix_table_lock);
+DEFINE_SPINLOCK(unix_table_lock);
 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 
 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
@@ -130,7 +130,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 
 /*
  *  SMP locking strategy:
- *    hash table is protected with rwlock unix_table_lock
+ *    hash table is protected with spinlock unix_table_lock
  *    each socket state is protected by separate rwlock.
  */
 
@@ -214,16 +214,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 
 static inline void unix_remove_socket(struct sock *sk)
 {
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	__unix_remove_socket(sk);
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 }
 
 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 {
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	__unix_insert_socket(list, sk);
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 }
 
 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
@@ -250,11 +250,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
 {
 	struct sock *s;
 
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	s = __unix_find_socket_byname(sunname, len, type, hash);
 	if (s)
 		sock_hold(s);
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 	return s;
 }
 
@@ -263,7 +263,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 	struct sock *s;
 	struct hlist_node *node;
 
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	sk_for_each(s, node,
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->dentry;
@@ -276,7 +276,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 	}
 	s = NULL;
 found:
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 	return s;
 }
 
@@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *,
 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 				  struct msghdr *, size_t);
 
-static struct proto_ops unix_stream_ops = {
+static const struct proto_ops unix_stream_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops unix_dgram_ops = {
+static const struct proto_ops unix_dgram_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops unix_seqpacket_ops = {
+static const struct proto_ops unix_seqpacket_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -564,7 +564,7 @@ static struct sock * unix_create1(struct socket *sock)
 	u	  = unix_sk(sk);
 	u->dentry = NULL;
 	u->mnt	  = NULL;
-	rwlock_init(&u->lock);
+	spin_lock_init(&u->lock);
 	atomic_set(&u->inflight, sock ? 0 : -1);
 	init_MUTEX(&u->readsem); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
@@ -642,12 +642,12 @@ retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
 
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
 
 	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
 				      addr->hash)) {
-		write_unlock(&unix_table_lock);
+		spin_unlock(&unix_table_lock);
 		/* Sanity yield. It is unusual case, but yet... */
 		if (!(ordernum&0xFF))
 			yield();
@@ -658,7 +658,7 @@ retry:
 	__unix_remove_socket(sk);
 	u->addr = addr;
 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 	err = 0;
 
 out:	up(&u->readsem);
@@ -784,14 +784,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
 		if (err)
 			goto out_mknod_dput;
-		up(&nd.dentry->d_inode->i_sem);
+		mutex_unlock(&nd.dentry->d_inode->i_mutex);
 		dput(nd.dentry);
 		nd.dentry = dentry;
 
 		addr->hash = UNIX_HASH_SIZE;
 	}
 
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 
 	if (!sunaddr->sun_path[0]) {
 		err = -EADDRINUSE;
@@ -814,7 +814,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	__unix_insert_socket(list, sk);
 
 out_unlock:
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 out_up:
 	up(&u->readsem);
 out:
@@ -823,7 +823,7 @@ out:
 out_mknod_dput:
 	dput(dentry);
 out_mknod_unlock:
-	up(&nd.dentry->d_inode->i_sem);
+	mutex_unlock(&nd.dentry->d_inode->i_mutex);
 	path_release(&nd);
 out_mknod_parent:
 	if (err==-EEXIST)
@@ -1063,10 +1063,12 @@ restart:
 	/* Set credentials */
 	sk->sk_peercred = other->sk_peercred;
 
-	sock_hold(newsk);
-	unix_peer(sk)	= newsk;
 	sock->state	= SS_CONNECTED;
 	sk->sk_state	= TCP_ESTABLISHED;
+	sock_hold(newsk);
+
+	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
+	unix_peer(sk)	= newsk;
 
 	unix_state_wunlock(sk);
 
@@ -1414,7 +1416,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	} else {
 		sunaddr = NULL;
 		err = -ENOTCONN;
-		other = unix_peer_get(sk);
+		other = unix_peer(sk);
 		if (!other)
 			goto out_err;
 	}
@@ -1476,7 +1478,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		other->sk_data_ready(other, size);
 		sent+=size;
 	}
-	sock_put(other);
 
 	scm_destroy(siocb->scm);
 	siocb->scm = NULL;
@@ -1491,8 +1492,6 @@ pipe_err:
 		send_sig(SIGPIPE,current,0);
 	err = -EPIPE;
 out_err:
-        if (other)
-		sock_put(other);
 	scm_destroy(siocb->scm);
 	siocb->scm = NULL;
 	return sent ? : err;
@@ -1860,7 +1859,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		}
 
 		default:
-			err = dev_ioctl(cmd, (void __user *)arg);
+			err = -ENOIOCTLCMD;
 			break;
 	}
 	return err;
@@ -1917,7 +1916,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos)
 
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
 }
 
@@ -1932,7 +1931,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 }
 
 static int unix_seq_show(struct seq_file *seq, void *v)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6ffc64e1712..411802bd4d3 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -182,7 +182,7 @@ void unix_gc(void)
 	if (down_trylock(&unix_gc_sem))
 		return;
 
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 
 	forall_unix_sockets(i, s)
 	{
@@ -301,7 +301,7 @@ void unix_gc(void)
 		}
 		u->gc_tree = GC_ORPHAN;
 	}
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 
 	/*
 	 *	Here we are. Hitlist is filled. Die.
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index 59fec59b213..8b9bf4a763b 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/capability.h>
 #include <linux/fcntl.h>
 #include <linux/socket.h>
 #include <linux/in.h>
@@ -181,7 +182,7 @@ struct wanpipe_opt
 #endif
 
 static int sk_count;
-extern struct proto_ops wanpipe_ops;
+extern const struct proto_ops wanpipe_ops;
 static unsigned long find_free_critical;
 
 static void wanpipe_unlink_driver(struct sock *sk);
@@ -1839,7 +1840,7 @@ static int wanpipe_ioctl(struct socket *sock, unsigned int cmd, unsigned long ar
 #endif
 
 		default:
-			return dev_ioctl(cmd,(void __user *) arg);
+			return -ENOIOCTLCMD;
 	}
 	/*NOTREACHED*/
 }
@@ -2546,7 +2547,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr
 	return 0;
 }
 
-struct proto_ops wanpipe_ops = {
+const struct proto_ops wanpipe_ops = {
 	.family = 	PF_WANPIPE,
 	.owner =	THIS_MODULE,
 	.release = 	wanpipe_release,
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index bcf7b3faa76..c34833dc7cc 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -44,6 +44,7 @@
 
 #include <linux/config.h>
 #include <linux/stddef.h>	/* offsetof(), etc. */
+#include <linux/capability.h>
 #include <linux/errno.h>	/* return codes */
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 020d73cc841..72b6ff3299b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -37,6 +37,7 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -64,7 +65,7 @@ int sysctl_x25_ack_holdback_timeout    = X25_DEFAULT_T2;
 HLIST_HEAD(x25_list);
 DEFINE_RWLOCK(x25_list_lock);
 
-static struct proto_ops x25_proto_ops;
+static const struct proto_ops x25_proto_ops;
 
 static struct x25_address null_x25_address = {"               "};
 
@@ -540,12 +541,7 @@ static struct sock *x25_make_new(struct sock *osk)
 	sk->sk_state       = TCP_ESTABLISHED;
 	sk->sk_sleep       = osk->sk_sleep;
 	sk->sk_backlog_rcv = osk->sk_backlog_rcv;
-
-	if (sock_flag(osk, SOCK_ZAPPED))
-		sock_set_flag(sk, SOCK_ZAPPED);
-	
-	if (sock_flag(osk, SOCK_DBG))
-		sock_set_flag(sk, SOCK_DBG);
+	sock_copy_flags(sk, osk);
 
 	ox25 = x25_sk(osk);
 	x25->t21        = ox25->t21;
@@ -1378,7 +1374,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		}
 
  		default:
-			rc = dev_ioctl(cmd, argp);
+			rc = -ENOIOCTLCMD;
 			break;
 	}
 
@@ -1391,7 +1387,7 @@ static struct net_proto_family x25_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
 	.family =	AF_X25,
 	.owner =	THIS_MODULE,
 	.release =	x25_release,
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 2f4531fcaca..6ed3302312f 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -540,8 +540,7 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
 			start = end;
 		}
 	}
-	if (len)
-		BUG();
+	BUG_ON(len);
 }
 EXPORT_SYMBOL_GPL(skb_icv_walk);
 
@@ -610,8 +609,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 			start = end;
 		}
 	}
-	if (len)
-		BUG();
+	BUG_ON(len);
 	return elt;
 }
 EXPORT_SYMBOL_GPL(skb_to_sgvec);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0db9e57013f..077bbf9fb9b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -10,7 +10,7 @@
  * 	YOSHIFUJI Hideaki
  * 		Split up af-specific portion
  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
- * 	
+ *
  */
 
 #include <asm/bug.h>
@@ -22,6 +22,7 @@
 #include <linux/workqueue.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
@@ -247,15 +248,14 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
 
 void __xfrm_policy_destroy(struct xfrm_policy *policy)
 {
-	if (!policy->dead)
-		BUG();
+	BUG_ON(!policy->dead);
 
-	if (policy->bundles)
-		BUG();
+	BUG_ON(policy->bundles);
 
 	if (del_timer(&policy->timer))
 		BUG();
 
+	security_xfrm_policy_free(policy);
 	kfree(policy);
 }
 EXPORT_SYMBOL(__xfrm_policy_destroy);
@@ -346,10 +346,12 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct xfrm_policy *pol, **p;
 	struct xfrm_policy *delpol = NULL;
 	struct xfrm_policy **newpos = NULL;
+	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
-		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
 				write_unlock_bh(&xfrm_policy_lock);
 				return -EEXIST;
@@ -381,21 +383,49 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 		xfrm_pol_hold(policy);
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (delpol) {
+	if (delpol)
 		xfrm_policy_kill(delpol);
+
+	read_lock_bh(&xfrm_policy_lock);
+	gc_list = NULL;
+	for (policy = policy->next; policy; policy = policy->next) {
+		struct dst_entry *dst;
+
+		write_lock(&policy->lock);
+		dst = policy->bundles;
+		if (dst) {
+			struct dst_entry *tail = dst;
+			while (tail->next)
+				tail = tail->next;
+			tail->next = gc_list;
+			gc_list = dst;
+
+			policy->bundles = NULL;
+		}
+		write_unlock(&policy->lock);
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		struct dst_entry *dst = gc_list;
+
+		gc_list = dst->next;
+		dst_free(dst);
 	}
+
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
-				      int delete)
+struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+					  struct xfrm_sec_ctx *ctx, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
+		    (xfrm_sec_ctx_match(ctx, pol->security))) {
 			xfrm_pol_hold(pol);
 			if (delete)
 				*p = pol->next;
@@ -410,7 +440,7 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
 	}
 	return pol;
 }
-EXPORT_SYMBOL(xfrm_policy_bysel);
+EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 {
@@ -491,7 +521,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
@@ -505,9 +535,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			continue;
 
 		match = xfrm_selector_match(sel, fl, family);
+
 		if (match) {
-			xfrm_pol_hold(pol);
-			break;
+ 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+				xfrm_pol_hold(pol);
+				break;
+			}
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
@@ -515,15 +548,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 		*obj_refp = &pol->refcnt;
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+static inline int policy_to_flow_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+ 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+ 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+ 		return dir;
+ 	switch (dir) {
+ 	default:
+ 	case XFRM_POLICY_IN:
+ 		return FLOW_DIR_IN;
+ 	case XFRM_POLICY_OUT:
+ 		return FLOW_DIR_OUT;
+ 	case XFRM_POLICY_FWD:
+ 		return FLOW_DIR_FWD;
+	};
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
 {
 	struct xfrm_policy *pol;
 
 	read_lock_bh(&xfrm_policy_lock);
 	if ((pol = sk->sk_policy[dir]) != NULL) {
-		int match = xfrm_selector_match(&pol->selector, fl,
+ 		int match = xfrm_selector_match(&pol->selector, fl,
 						sk->sk_family);
+ 		int err = 0;
+
 		if (match)
+		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+
+ 		if (match && !err)
 			xfrm_pol_hold(pol);
 		else
 			pol = NULL;
@@ -596,6 +651,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 
 	if (newp) {
 		newp->selector = old->selector;
+		if (security_xfrm_policy_clone(old, newp)) {
+			kfree(newp);
+			return NULL;  /* ENOMEM */
+		}
 		newp->lft = old->lft;
 		newp->curlft = old->curlft;
 		newp->action = old->action;
@@ -707,22 +766,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
 	return err;
 }
 
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	};
-}
 
 static int stale_bundle(struct dst_entry *dst);
 
@@ -741,19 +784,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	int err;
 	u32 genid;
 	u16 family = dst_orig->ops->family;
+	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u32 sk_sid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, family,
-					   policy_to_flow_dir(XFRM_POLICY_OUT),
+		policy = flow_cache_lookup(fl, sk_sid, family, dir,
 					   xfrm_policy_lookup);
 	}
 
@@ -906,8 +950,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	return start;
 }
 
-static int
-_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+int
+xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
@@ -918,6 +962,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 	xfrm_policy_put_afinfo(afinfo);
 	return 0;
 }
+EXPORT_SYMBOL(xfrm_decode_session);
 
 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
 {
@@ -934,16 +979,21 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 {
 	struct xfrm_policy *pol;
 	struct flowi fl;
+	u8 fl_dir = policy_to_flow_dir(dir);
+	u32 sk_sid;
 
-	if (_decode_session(skb, &fl, family) < 0)
+	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
+	nf_nat_decode_session(skb, &fl, family);
+
+	sk_sid = security_sk_sid(sk, &fl, fl_dir);
 
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
 
 		for (i=skb->sp->len-1; i>=0; i--) {
-		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
+			struct sec_decap_state *xvec = &(skb->sp->x[i]);
 			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
 				return 0;
 
@@ -958,11 +1008,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, family,
-					policy_to_flow_dir(dir),
+		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (!pol)
@@ -1007,20 +1056,19 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
 	struct flowi fl;
 
-	if (_decode_session(skb, &fl, family) < 0)
+	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
 
 	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
-/* Optimize later using cookies and generation ids. */
-
 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 {
-	if (!stale_bundle(dst))
-		return dst;
-
+	/* If it is marked obsolete, which is how we even get here,
+	 * then we have purged it from the policy bundle list and we
+	 * did that for a good reason.
+	 */
 	return NULL;
 }
 
@@ -1104,6 +1152,16 @@ int xfrm_flush_bundles(void)
 	return 0;
 }
 
+static int always_true(struct dst_entry *dst)
+{
+	return 1;
+}
+
+void xfrm_flush_all_bundles(void)
+{
+	xfrm_prune_bundles(always_true);
+}
+
 void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7cf48aa6c95..e12d0be5f97 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -10,7 +10,7 @@
  * 		Split up af-specific functions
  *	Derek Atkins <derek@ihtfp.com>
  *		Add UDP Encapsulation
- * 	
+ *
  */
 
 #include <linux/workqueue.h>
@@ -70,6 +70,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 		x->type->destructor(x);
 		xfrm_put_type(x->type);
 	}
+	security_xfrm_state_free(x);
 	kfree(x);
 }
 
@@ -343,7 +344,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			      selector.
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
-				if (!xfrm_selector_match(&x->sel, fl, family))
+				if (!xfrm_selector_match(&x->sel, fl, family) ||
+				    !xfrm_sec_ctx_match(pol->security, x->security))
 					continue;
 				if (!best ||
 				    best->km.dying > x->km.dying ||
@@ -354,7 +356,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 				acquire_in_progress = 1;
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
-				if (xfrm_selector_match(&x->sel, fl, family))
+ 				if (xfrm_selector_match(&x->sel, fl, family) &&
+				    xfrm_sec_ctx_match(pol->security, x->security))
 					error = -ESRCH;
 			}
 		}
@@ -431,6 +434,8 @@ void xfrm_state_insert(struct xfrm_state *x)
 	spin_lock_bh(&xfrm_state_lock);
 	__xfrm_state_insert(x);
 	spin_unlock_bh(&xfrm_state_lock);
+
+	xfrm_flush_all_bundles();
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
@@ -478,6 +483,9 @@ out:
 	spin_unlock_bh(&xfrm_state_lock);
 	xfrm_state_put_afinfo(afinfo);
 
+	if (!err)
+		xfrm_flush_all_bundles();
+
 	if (x1) {
 		xfrm_state_delete(x1);
 		xfrm_state_put(x1);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0cdd9a07e04..ac87a09ba83 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -7,7 +7,7 @@
  * 	Kazunori MIYAZAWA @USAGI
  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
  * 		IPv6 support
- * 	
+ *
  */
 
 #include <linux/module.h>
@@ -88,6 +88,34 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
 	return 0;
 }
 
+
+static inline int verify_sec_ctx_len(struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1];
+	struct xfrm_user_sec_ctx *uctx;
+	int len = 0;
+
+	if (!rt)
+		return 0;
+
+	if (rt->rta_len < sizeof(*uctx))
+		return -EINVAL;
+
+	uctx = RTA_DATA(rt);
+
+	if (uctx->ctx_len > PAGE_SIZE)
+		return -EINVAL;
+
+	len += sizeof(struct xfrm_user_sec_ctx);
+	len += uctx->ctx_len;
+
+	if (uctx->len != len)
+		return -EINVAL;
+
+	return 0;
+}
+
+
 static int verify_newsa_info(struct xfrm_usersa_info *p,
 			     struct rtattr **xfrma)
 {
@@ -145,6 +173,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		goto out;
 	if ((err = verify_encap_tmpl(xfrma)))
 		goto out;
+	if ((err = verify_sec_ctx_len(xfrma)))
+		goto out;
 
 	err = -EINVAL;
 	switch (p->mode) {
@@ -209,6 +239,30 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
 	return 0;
 }
 
+
+static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp)
+{
+	struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+	int len = 0;
+
+	if (xfrm_ctx) {
+		len += sizeof(struct xfrm_user_sec_ctx);
+		len += xfrm_ctx->ctx_len;
+	}
+	return len;
+}
+
+static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
+{
+	struct xfrm_user_sec_ctx *uctx;
+
+	if (!u_arg)
+		return 0;
+
+	uctx = RTA_DATA(u_arg);
+	return security_xfrm_state_alloc(x, uctx);
+}
+
 static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
 {
 	memcpy(&x->id, &p->id, sizeof(x->id));
@@ -253,6 +307,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
 	if (err)
 		goto error;
 
+	if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1])))
+		goto error;
+
 	x->km.seq = p->seq;
 
 	return x;
@@ -272,11 +329,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	int err;
 	struct km_event c;
 
-	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	err = verify_newsa_info(p, (struct rtattr **)xfrma);
 	if (err)
 		return err;
 
-	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err);
 	if (!x)
 		return err;
 
@@ -390,6 +447,19 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	if (x->encap)
 		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
 
+	if (x->security) {
+		int ctx_size = sizeof(struct xfrm_sec_ctx) +
+				x->security->ctx_len;
+		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+		uctx->exttype = XFRMA_SEC_CTX;
+		uctx->len = ctx_size;
+		uctx->ctx_doi = x->security->ctx_doi;
+		uctx->ctx_alg = x->security->ctx_alg;
+		uctx->ctx_len = x->security->ctx_len;
+		memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
+	}
 	nlh->nlmsg_len = skb->tail - b;
 out:
 	sp->this_idx++;
@@ -603,6 +673,18 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 	return verify_policy_dir(p->dir);
 }
 
+static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1];
+	struct xfrm_user_sec_ctx *uctx;
+
+	if (!rt)
+		return 0;
+
+	uctx = RTA_DATA(rt);
+	return security_xfrm_policy_alloc(pol, uctx);
+}
+
 static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 			   int nr)
 {
@@ -681,7 +763,10 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 	}
 
 	copy_from_user_policy(xp, p);
-	err = copy_from_user_tmpl(xp, xfrma);
+
+	if (!(err = copy_from_user_tmpl(xp, xfrma)))
+		err = copy_from_user_sec_ctx(xp, xfrma);
+
 	if (err) {
 		*errp = err;
 		kfree(xp);
@@ -702,8 +787,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	err = verify_newpolicy_info(p);
 	if (err)
 		return err;
+	err = verify_sec_ctx_len((struct rtattr **)xfrma);
+	if (err)
+		return err;
 
-	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err);
 	if (!xp)
 		return err;
 
@@ -714,6 +802,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
 	if (err) {
+		security_xfrm_policy_free(xp);
 		kfree(xp);
 		return err;
 	}
@@ -761,6 +850,27 @@ rtattr_failure:
 	return -1;
 }
 
+static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	if (xp->security) {
+		int ctx_size = sizeof(struct xfrm_sec_ctx) +
+				xp->security->ctx_len;
+		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+		uctx->exttype = XFRMA_SEC_CTX;
+		uctx->len = ctx_size;
+		uctx->ctx_doi = xp->security->ctx_doi;
+		uctx->ctx_alg = xp->security->ctx_alg;
+		uctx->ctx_len = xp->security->ctx_len;
+		memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
+	}
+	return 0;
+
+ rtattr_failure:
+	return -1;
+}
+
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
 {
 	struct xfrm_dump_info *sp = ptr;
@@ -782,6 +892,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	copy_to_user_policy(xp, p, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 out:
@@ -852,8 +964,25 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 
 	if (p->index)
 		xp = xfrm_policy_byid(p->dir, p->index, delete);
-	else
-		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
+	else {
+		struct rtattr **rtattrs = (struct rtattr **)xfrma;
+		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
+		struct xfrm_policy tmp;
+
+		err = verify_sec_ctx_len(rtattrs);
+		if (err)
+			return err;
+
+		memset(&tmp, 0, sizeof(struct xfrm_policy));
+		if (rt) {
+			struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
+				return err;
+		}
+		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
+		security_xfrm_policy_free(&tmp);
+	}
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -1224,6 +1353,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
@@ -1241,6 +1372,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
@@ -1324,6 +1456,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	copy_to_user_policy(xp, &upe->pol, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 	upe->hard = !!hard;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -1341,6 +1475,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;