220 files changed, 8191 insertions, 2971 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7982656b9c8..a5144e43aae 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -63,7 +63,7 @@
 #include <linux/atalk.h>
 
 struct datalink_proto *ddp_dl, *aarp_dl;
-static struct proto_ops atalk_dgram_ops;
+static const struct proto_ops atalk_dgram_ops;
 
 /**************************************************************************\
 *                                                                          *
@@ -1763,7 +1763,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
  */
 static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
-	int rc = -EINVAL;
+	int rc = -ENOIOCTLCMD;
 	struct sock *sk = sock->sk;
 	void __user *argp = (void __user *)arg;
 
@@ -1813,23 +1813,6 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = atif_ioctl(cmd, argp);
 			rtnl_unlock();
 			break;
-		/* Physical layer ioctl calls */
-		case SIOCSIFLINK:
-		case SIOCGIFHWADDR:
-		case SIOCSIFHWADDR:
-		case SIOCGIFFLAGS:
-		case SIOCSIFFLAGS:
-		case SIOCGIFTXQLEN:
-		case SIOCSIFTXQLEN:
-		case SIOCGIFMTU:
-		case SIOCGIFCONF:
-		case SIOCADDMULTI:
-		case SIOCDELMULTI:
-		case SIOCGIFCOUNT:
-		case SIOCGIFINDEX:
-		case SIOCGIFNAME:
-			rc = dev_ioctl(cmd, argp);
-			break;
 	}
 
 	return rc;
@@ -1841,7 +1824,7 @@ static struct net_proto_family atalk_family_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
 	.family		= PF_APPLETALK,
 	.owner		= THIS_MODULE,
 	.release	= atalk_release,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 72f3f7b8de8..bdb4d89730d 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -295,7 +295,7 @@ static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
 	unsigned char *rawp;
 	eth = eth_hdr(skb);
 
-	if (*eth->h_dest & 1) {
+	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0)
 			skb->pkt_type = PACKET_BROADCAST;
 		else
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2684a92da22..f2c541774dc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
 }
 
 
-static struct proto_ops pvc_proto_ops = {
+static const struct proto_ops pvc_proto_ops = {
 	.family =	PF_ATMPVC,
 	.owner =	THIS_MODULE,
 
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d7b266136bf..3a180cfd7b4 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return error;
 }
 
-static struct proto_ops svc_proto_ops = {
+static const struct proto_ops svc_proto_ops = {
 	.family =	PF_ATMSVC,
 	.owner =	THIS_MODULE,
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1b683f30265..e8753c7fcad 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -54,7 +54,7 @@
 HLIST_HEAD(ax25_list);
 DEFINE_SPINLOCK(ax25_list_lock);
 
-static struct proto_ops ax25_proto_ops;
+static const struct proto_ops ax25_proto_ops;
 
 static void ax25_free_sock(struct sock *sk)
 {
@@ -1827,7 +1827,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		break;
 
 	default:
-		res = dev_ioctl(cmd, argp);
+		res = -ENOIOCTLCMD;
 		break;
 	}
 	release_sock(sk);
@@ -1944,7 +1944,7 @@ static struct net_proto_family ax25_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops ax25_proto_ops = {
+static const struct proto_ops ax25_proto_ops = {
 	.family		= PF_AX25,
 	.owner		= THIS_MODULE,
 	.release	= ax25_release,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ea616e3fc98..fb031fe9be9 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -287,10 +287,9 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
 
-		if (sk->sk_err) {
-			err = sock_error(sk);
+		err = sock_error(sk);
+		if (err)
 			break;
-		}
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk->sk_sleep, &wait);
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 9778c6acd53..ccbaf69afc5 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -146,7 +146,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return 0;
 }
 
-static struct proto_ops bnep_sock_ops = {
+static const struct proto_ops bnep_sock_ops = {
 	.family     = PF_BLUETOOTH,
 	.owner      = THIS_MODULE,
 	.release    = bnep_sock_release,
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index beb045bf571..5e22343b609 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -137,7 +137,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
-static struct proto_ops cmtp_sock_ops = {
+static const struct proto_ops cmtp_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= cmtp_sock_release,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d6d0a15c09..84e6c93a044 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -575,7 +575,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char
 	return 0;
 }
 
-static struct proto_ops hci_sock_ops = {
+static const struct proto_ops hci_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= hci_sock_release,
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index bd7568ac87f..0ed38740388 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -78,7 +78,7 @@ static struct class_device_attribute *bt_attrs[] = {
 };
 
 #ifdef CONFIG_HOTPLUG
-static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
 {
 	struct hci_dev *hdev = class_get_devdata(cdev);
 	int n, i = 0;
@@ -107,7 +107,7 @@ struct class bt_class = {
 	.name		= "bluetooth",
 	.release	= bt_release,
 #ifdef CONFIG_HOTPLUG
-	.hotplug	= bt_hotplug,
+	.uevent		= bt_uevent,
 #endif
 };
 
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index f8986f88143..8f8dd931b29 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -143,7 +143,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
-static struct proto_ops hidp_sock_ops = {
+static const struct proto_ops hidp_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= hidp_sock_release,
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e3bb11ca423..7f0781e4326 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,7 +57,7 @@
 
 #define VERSION "2.8"
 
-static struct proto_ops l2cap_sock_ops;
+static const struct proto_ops l2cap_sock_ops;
 
 static struct bt_sock_list l2cap_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -767,8 +767,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	if (sk->sk_err)
-		return sock_error(sk);
+	err = sock_error(sk);
+	if (err)
+		return err;
 
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
@@ -2160,7 +2161,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL);
 
-static struct proto_ops l2cap_sock_ops = {
+static const struct proto_ops l2cap_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= l2cap_sock_release,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 6c34261b232..757d2dd3b02 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -58,7 +58,7 @@
 #define BT_DBG(D...)
 #endif
 
-static struct proto_ops rfcomm_sock_ops;
+static const struct proto_ops rfcomm_sock_ops;
 
 static struct bt_sock_list rfcomm_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL);
 
-static struct proto_ops rfcomm_sock_ops = {
+static const struct proto_ops rfcomm_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= rfcomm_sock_release,
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 9cb00dc6c08..6b61323ce23 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -56,7 +56,7 @@
 
 #define VERSION "0.5"
 
-static struct proto_ops sco_sock_ops;
+static const struct proto_ops sco_sock_ops;
 
 static struct bt_sock_list sco_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -637,8 +637,9 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	if (sk->sk_err)
-		return sock_error(sk);
+	err = sock_error(sk);
+	if (err)
+		return err;
 
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
@@ -913,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL);
 
-static struct proto_ops sco_sock_ops = {
+static const struct proto_ops sco_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= sco_sock_release,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index f8f184942aa..188cc1ac49e 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -67,3 +67,4 @@ EXPORT_SYMBOL(br_should_route_hook);
 module_init(br_init)
 module_exit(br_deinit)
 MODULE_LICENSE("GPL");
+MODULE_VERSION(BR_VERSION);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f564ee99782..0b33a7b3a00 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -15,7 +15,9 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+
 #include <asm/uaccess.h>
 #include "br_private.h"
 
@@ -82,6 +84,87 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+/* Allow setting mac address of pseudo-bridge to be same as
+ * any of the bound interfaces
+ */
+static int br_set_mac_address(struct net_device *dev, void *p)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct sockaddr *addr = p;
+	struct net_bridge_port *port;
+	int err = -EADDRNOTAVAIL;
+
+	spin_lock_bh(&br->lock);
+	list_for_each_entry(port, &br->port_list, list) {
+		if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
+			br_stp_change_bridge_id(br, addr->sa_data);
+			err = 0;
+			break;
+		}
+	}
+	spin_unlock_bh(&br->lock);
+
+	return err;
+}
+
+static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "bridge");
+	strcpy(info->version, BR_VERSION);
+	strcpy(info->fw_version, "N/A");
+	strcpy(info->bus_info, "N/A");
+}
+
+static int br_set_sg(struct net_device *dev, u32 data)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (data)
+		br->feature_mask |= NETIF_F_SG;
+	else
+		br->feature_mask &= ~NETIF_F_SG;
+
+	br_features_recompute(br);
+	return 0;
+}
+
+static int br_set_tso(struct net_device *dev, u32 data)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (data)
+		br->feature_mask |= NETIF_F_TSO;
+	else
+		br->feature_mask &= ~NETIF_F_TSO;
+
+	br_features_recompute(br);
+	return 0;
+}
+
+static int br_set_tx_csum(struct net_device *dev, u32 data)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (data)
+		br->feature_mask |= NETIF_F_IP_CSUM;
+	else
+		br->feature_mask &= ~NETIF_F_IP_CSUM;
+
+	br_features_recompute(br);
+	return 0;
+}
+
+static struct ethtool_ops br_ethtool_ops = {
+	.get_drvinfo = br_getinfo,
+	.get_link = ethtool_op_get_link,
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = br_set_sg,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = br_set_tx_csum,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = br_set_tso,
+};
+
 void br_dev_setup(struct net_device *dev)
 {
 	memset(dev->dev_addr, 0, ETH_ALEN);
@@ -96,8 +179,12 @@ void br_dev_setup(struct net_device *dev)
 	dev->change_mtu = br_change_mtu;
 	dev->destructor = free_netdev;
 	SET_MODULE_OWNER(dev);
+ 	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
 	dev->stop = br_dev_stop;
 	dev->tx_queue_len = 0;
-	dev->set_mac_address = NULL;
+	dev->set_mac_address = br_set_mac_address;
 	dev->priv_flags = IFF_EBRIDGE;
+
+ 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
+ 		| NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 975abe254b7..ba442883e87 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/rtnetlink.h>
+#include <linux/if_ether.h>
 #include <net/sock.h>
 
 #include "br_private.h"
@@ -32,9 +33,8 @@
  * ethtool, use ethtool_ops.  Also, since driver might sleep need to
  * not be holding any locks.
  */
-static int br_initial_port_cost(struct net_device *dev)
+static int port_cost(struct net_device *dev)
 {
-
 	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
 	struct ifreq ifr;
 	mm_segment_t old_fs;
@@ -58,10 +58,6 @@ static int br_initial_port_cost(struct net_device *dev)
 			return 2;
 		case SPEED_10:
 			return 100;
-		default:
-			pr_info("bridge: can't decode speed from %s: %d\n",
-				dev->name, ecmd.speed);
-			return 100;
 		}
 	}
 
@@ -75,6 +71,35 @@ static int br_initial_port_cost(struct net_device *dev)
 	return 100;	/* assume old 10Mbps */
 }
 
+
+/*
+ * Check for port carrier transistions.
+ * Called from work queue to allow for calling functions that
+ * might sleep (such as speed check), and to debounce.
+ */
+static void port_carrier_check(void *arg)
+{
+	struct net_bridge_port *p = arg;
+
+	rtnl_lock();
+	if (netif_carrier_ok(p->dev)) {
+		u32 cost = port_cost(p->dev);
+
+		spin_lock_bh(&p->br->lock);
+		if (p->state == BR_STATE_DISABLED) {
+			p->path_cost = cost;
+			br_stp_enable_port(p);
+		}
+		spin_unlock_bh(&p->br->lock);
+	} else {
+		spin_lock_bh(&p->br->lock);
+		if (p->state != BR_STATE_DISABLED)
+			br_stp_disable_port(p);
+		spin_unlock_bh(&p->br->lock);
+	}
+	rtnl_unlock();
+}
+
 static void destroy_nbp(struct net_bridge_port *p)
 {
 	struct net_device *dev = p->dev;
@@ -102,6 +127,9 @@ static void del_nbp(struct net_bridge_port *p)
 	dev->br_port = NULL;
 	dev_set_promiscuity(dev, -1);
 
+	cancel_delayed_work(&p->carrier_check);
+	flush_scheduled_work();
+
 	spin_lock_bh(&br->lock);
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
@@ -155,6 +183,7 @@ static struct net_device *new_bridge_dev(const char *name)
 	br->bridge_id.prio[1] = 0x00;
 	memset(br->bridge_id.addr, 0, ETH_ALEN);
 
+	br->feature_mask = dev->features;
 	br->stp_enabled = 0;
 	br->designated_root = br->bridge_id;
 	br->root_path_cost = 0;
@@ -195,10 +224,9 @@ static int find_portno(struct net_bridge *br)
 	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
 }
 
-/* called with RTNL */
+/* called with RTNL but without bridge lock */
 static struct net_bridge_port *new_nbp(struct net_bridge *br, 
-				       struct net_device *dev,
-				       unsigned long cost)
+				       struct net_device *dev)
 {
 	int index;
 	struct net_bridge_port *p;
@@ -215,12 +243,13 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	p->br = br;
 	dev_hold(dev);
 	p->dev = dev;
-	p->path_cost = cost;
+	p->path_cost = port_cost(dev);
  	p->priority = 0x8000 >> BR_PORT_BITS;
 	dev->br_port = p;
 	p->port_no = index;
 	br_init_port(p);
 	p->state = BR_STATE_DISABLED;
+	INIT_WORK(&p->carrier_check, port_carrier_check, p);
 	kobject_init(&p->kobj);
 
 	return p;
@@ -295,7 +324,7 @@ int br_del_bridge(const char *name)
 	return ret;
 }
 
-/* Mtu of the bridge pseudo-device 1500 or the minimum of the ports */
+/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
 int br_min_mtu(const struct net_bridge *br)
 {
 	const struct net_bridge_port *p;
@@ -304,7 +333,7 @@ int br_min_mtu(const struct net_bridge *br)
 	ASSERT_RTNL();
 
 	if (list_empty(&br->port_list))
-		mtu = 1500;
+		mtu = ETH_DATA_LEN;
 	else {
 		list_for_each_entry(p, &br->port_list, list) {
 			if (!mtu  || p->dev->mtu < mtu)
@@ -322,9 +351,8 @@ void br_features_recompute(struct net_bridge *br)
 	struct net_bridge_port *p;
 	unsigned long features, checksum;
 
-	features = NETIF_F_SG | NETIF_F_FRAGLIST 
-		| NETIF_F_HIGHDMA | NETIF_F_TSO;
-	checksum = NETIF_F_IP_CSUM;	/* least commmon subset */
+	features = br->feature_mask &~ NETIF_F_IP_CSUM;
+	checksum = br->feature_mask & NETIF_F_IP_CSUM;
 
 	list_for_each_entry(p, &br->port_list, list) {
 		if (!(p->dev->features 
@@ -351,7 +379,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (dev->br_port != NULL)
 		return -EBUSY;
 
-	if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev))))
+	if (IS_ERR(p = new_nbp(br, dev)))
 		return PTR_ERR(p);
 
  	if ((err = br_fdb_insert(br, p, dev->dev_addr)))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b88220a64cd..e3a73cead6b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -53,6 +53,11 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
 
+	if (p->state == BR_STATE_LEARNING) {
+		kfree_skb(skb);
+		goto out;
+	}
+
 	if (br->dev->flags & IFF_PROMISC) {
 		struct sk_buff *skb2;
 
@@ -63,7 +68,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 		}
 	}
 
-	if (dest[0] & 1) {
+	if (is_multicast_ether_addr(dest)) {
 		br_flood_forward(br, skb, !passedup);
 		if (!passedup)
 			br_pass_frame_up(br, skb);
@@ -107,9 +112,6 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto err;
 
-	if (p->state == BR_STATE_LEARNING)
-		br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
-
 	if (p->br->stp_enabled &&
 	    !memcmp(dest, bridge_ula, 5) &&
 	    !(dest[5] & 0xF0)) {
@@ -118,9 +120,10 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 				NULL, br_stp_handle_bpdu);
 			return 1;
 		}
+		goto err;
 	}
 
-	else if (p->state == BR_STATE_FORWARDING) {
+	if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
 		if (br_should_route_hook) {
 			if (br_should_route_hook(pskb)) 
 				return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 23422bd53a5..7cac3fb9f80 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -26,6 +26,7 @@
 #include <linux/ip.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -33,8 +34,11 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
 #include <linux/in_route.h>
+
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/route.h>
+
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
 #include "br_private.h"
@@ -390,8 +394,9 @@ inhdr_error:
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
 static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
-   const struct net_device *in, const struct net_device *out,
-   int (*okfn)(struct sk_buff *))
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
 {
 	struct iphdr *iph;
 	__u32 len;
@@ -408,8 +413,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 			goto out;
 
 		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+			u8 *vhdr = skb->data;
 			skb_pull(skb, VLAN_HLEN);
-			(skb)->nh.raw += VLAN_HLEN;
+			skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+			skb->nh.raw += VLAN_HLEN;
 		}
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
@@ -425,8 +432,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 		goto out;
 
 	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		u8 *vhdr = skb->data;
 		skb_pull(skb, VLAN_HLEN);
-		(skb)->nh.raw += VLAN_HLEN;
+		skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+		skb->nh.raw += VLAN_HLEN;
 	}
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 917311c6828..a43a9c1d50d 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -52,17 +52,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		br_stp_recalculate_bridge_id(br);
 		break;
 
-	case NETDEV_CHANGE:	/* device is up but carrier changed */
-		if (!(br->dev->flags & IFF_UP))
-			break;
-
-		if (netif_carrier_ok(dev)) {
-			if (p->state == BR_STATE_DISABLED)
-				br_stp_enable_port(p);
-		} else {
-			if (p->state != BR_STATE_DISABLED)
-				br_stp_disable_port(p);
-		}
+	case NETDEV_CHANGE:
+		if (br->dev->flags & IFF_UP)
+			schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE);
 		break;
 
 	case NETDEV_FEAT_CHANGE:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index bdf95a74d8c..c5bd631ffcd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -27,6 +27,10 @@
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
 
+#define BR_PORT_DEBOUNCE (HZ/10)
+
+#define BR_VERSION	"2.1"
+
 typedef struct bridge_id bridge_id;
 typedef struct mac_addr mac_addr;
 typedef __u16 port_id;
@@ -78,6 +82,7 @@ struct net_bridge_port
 	struct timer_list		hold_timer;
 	struct timer_list		message_age_timer;
 	struct kobject			kobj;
+	struct work_struct		carrier_check;
 	struct rcu_head			rcu;
 };
 
@@ -90,6 +95,7 @@ struct net_bridge
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	struct list_head		age_list;
+	unsigned long			feature_mask;
 
 	/* STP */
 	bridge_id			designated_root;
@@ -201,6 +207,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br);
 extern void br_stp_enable_port(struct net_bridge_port *p);
 extern void br_stp_disable_port(struct net_bridge_port *p);
 extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
+extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
 extern void br_stp_set_bridge_priority(struct net_bridge *br,
 				       u16 newprio);
 extern void br_stp_set_port_priority(struct net_bridge_port *p,
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index ac09b6a2352..cc047f7fb6e 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -120,8 +120,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
 }
 
 /* called under bridge lock */
-static void br_stp_change_bridge_id(struct net_bridge *br, 
-				    const unsigned char *addr)
+void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 {
 	unsigned char oldaddr[6];
 	struct net_bridge_port *p;
@@ -158,7 +157,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
 
 	list_for_each_entry(p, &br->port_list, list) {
 		if (addr == br_mac_zero ||
-		    compare_ether_addr(p->dev->dev_addr, addr) < 0)
+		    memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
 			addr = p->dev->dev_addr;
 
 	}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index f6a19d53eae..2ebdc23bbe2 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -248,7 +248,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
 	if (err)
 		goto out2;
 
-	kobject_hotplug(&p->kobj, KOBJ_ADD);
+	kobject_uevent(&p->kobj, KOBJ_ADD);
 	return 0;
  out2:
 	kobject_del(&p->kobj);
@@ -260,7 +260,7 @@ void br_sysfs_removeif(struct net_bridge_port *p)
 {
 	pr_debug("br_sysfs_removeif\n");
 	sysfs_remove_link(&p->br->ifobj, p->dev->name);
-	kobject_hotplug(&p->kobj, KOBJ_REMOVE);
+	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 }
 
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index c70b3be2302..b84fc6075fe 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -196,9 +196,13 @@ config BRIDGE_EBT_LOG
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config BRIDGE_EBT_ULOG
-	tristate "ebt: ulog support"
+	tristate "ebt: ulog support (OBSOLETE)"
 	depends on BRIDGE_NF_EBTABLES
 	help
+	  This option enables the old bridge-specific "ebt_ulog" implementation
+	  which has been obsoleted by the new "nfnetlink_log" code (see
+	  CONFIG_NETFILTER_NETLINK_LOG).
+
 	  This option adds the ulog watcher, that you can use in any rule
 	  in any ebtables table. The packet is passed to a userspace
 	  logging daemon using netlink multicast sockets. This differs
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 662975be3d1..9f6e0193ae1 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -3,13 +3,16 @@
  *
  *	Authors:
  *	Bart De Schuymer <bdschuym@pandora.be>
+ *	Harald Welte <laforge@netfilter.org>
  *
  *  April, 2002
  *
  */
 
+#include <linux/in.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_log.h>
+#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <linux/if_arp.h>
@@ -55,27 +58,30 @@ static void print_MAC(unsigned char *p)
 }
 
 #define myNIPQUAD(a) a[0], a[1], a[2], a[3]
-static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static void
+ebt_log_packet(unsigned int pf, unsigned int hooknum,
+   const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const struct nf_loginfo *loginfo,
+   const char *prefix)
 {
-	struct ebt_log_info *info = (struct ebt_log_info *)data;
-	char level_string[4] = "< >";
+	unsigned int bitmask;
 
-	level_string[1] = '0' + info->loglevel;
 	spin_lock_bh(&ebt_log_lock);
-	printk(level_string);
-	printk("%s IN=%s OUT=%s ", info->prefix, in ? in->name : "",
-	   out ? out->name : "");
+	printk("<%c>%s IN=%s OUT=%s MAC source = ", '0' + loginfo->u.log.level,
+	       prefix, in ? in->name : "", out ? out->name : "");
 
-	printk("MAC source = ");
 	print_MAC(eth_hdr(skb)->h_source);
 	printk("MAC dest = ");
 	print_MAC(eth_hdr(skb)->h_dest);
 
 	printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto));
 
-	if ((info->bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
+	if (loginfo->type == NF_LOG_TYPE_LOG)
+		bitmask = loginfo->u.log.logflags;
+	else
+		bitmask = NF_LOG_MASK;
+
+	if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
 	   htons(ETH_P_IP)){
 		struct iphdr _iph, *ih;
 
@@ -84,10 +90,9 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 			printk(" INCOMPLETE IP header");
 			goto out;
 		}
-		printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,",
-		   NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
-		printk(" IP tos=0x%02X, IP proto=%d", ih->tos,
-		       ih->protocol);
+		printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP "
+		       "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr),
+		       NIPQUAD(ih->daddr), ih->tos, ih->protocol);
 		if (ih->protocol == IPPROTO_TCP ||
 		    ih->protocol == IPPROTO_UDP) {
 			struct tcpudphdr _ports, *pptr;
@@ -104,7 +109,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 		goto out;
 	}
 
-	if ((info->bitmask & EBT_LOG_ARP) &&
+	if ((bitmask & EBT_LOG_ARP) &&
 	    ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
 	     (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) {
 		struct arphdr _arph, *ah;
@@ -144,6 +149,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 out:
 	printk("\n");
 	spin_unlock_bh(&ebt_log_lock);
+
+}
+
+static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_log_info *info = (struct ebt_log_info *)data;
+	struct nf_loginfo li;
+
+	li.type = NF_LOG_TYPE_LOG;
+	li.u.log.level = info->loglevel;
+	li.u.log.logflags = info->bitmask;
+
+	nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix);
 }
 
 static struct ebt_watcher log =
@@ -154,13 +174,32 @@ static struct ebt_watcher log =
 	.me		= THIS_MODULE,
 };
 
+static struct nf_logger ebt_log_logger = {
+	.name 		= "ebt_log",
+	.logfn		= &ebt_log_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __init init(void)
 {
-	return ebt_register_watcher(&log);
+	int ret;
+
+	ret = ebt_register_watcher(&log);
+	if (ret < 0)
+		return ret;
+	if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
+		printk(KERN_WARNING "ebt_log: not logging via system console "
+		       "since somebody else already registered for PF_INET\n");
+		/* we cannot make module load fail here, since otherwise 
+		 * ebtables userspace would abort */
+	}
+
+	return 0;
 }
 
 static void __exit fini(void)
 {
+	nf_log_unregister_logger(&ebt_log_logger);
 	ebt_unregister_watcher(&log);
 }
 
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index aae26ae2e61..ce617b3dbbb 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -3,6 +3,7 @@
  *
  *	Authors:
  *	Bart De Schuymer <bdschuym@pandora.be>
+ *	Harald Welte <laforge@netfilter.org>
  *
  *  November, 2004
  *
@@ -115,14 +116,13 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 	return skb;
 }
 
-static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
+static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+   const struct ebt_ulog_info *uloginfo, const char *prefix)
 {
 	ebt_ulog_packet_msg_t *pm;
 	size_t size, copy_len;
 	struct nlmsghdr *nlh;
-	struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
 	unsigned int group = uloginfo->nlgroup;
 	ebt_ulog_buff_t *ub = &ulog_buffers[group];
 	spinlock_t *lock = &ub->lock;
@@ -216,6 +216,39 @@ alloc_failure:
 	goto unlock;
 }
 
+/* this function is registered with the netfilter core */
+static void ebt_log_packet(unsigned int pf, unsigned int hooknum,
+   const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const struct nf_loginfo *li,
+   const char *prefix)
+{
+	struct ebt_ulog_info loginfo;
+
+	if (!li || li->type != NF_LOG_TYPE_ULOG) {
+		loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP;
+		loginfo.cprange = 0;
+		loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD;
+		loginfo.prefix[0] = '\0';
+	} else {
+		loginfo.nlgroup = li->u.ulog.group;
+		loginfo.cprange = li->u.ulog.copy_len;
+		loginfo.qthreshold = li->u.ulog.qthreshold;
+		strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
+	}
+
+	ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+}
+
+static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
+
+	ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
+}
+
+
 static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
@@ -240,6 +273,12 @@ static struct ebt_watcher ulog = {
 	.me		= THIS_MODULE,
 };
 
+static struct nf_logger ebt_ulog_logger = {
+	.name		= EBT_ULOG_WATCHER,
+	.logfn		= &ebt_log_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __init init(void)
 {
 	int i, ret = 0;
@@ -265,6 +304,13 @@ static int __init init(void)
 	else if ((ret = ebt_register_watcher(&ulog)))
 		sock_release(ebtulognl->sk_socket);
 
+	if (nf_log_register(PF_BRIDGE, &ebt_ulog_logger) < 0) {
+		printk(KERN_WARNING "ebt_ulog: not logging via ulog "
+		       "since somebody else already registered for PF_BRIDGE\n");
+		/* we cannot make module load fail here, since otherwise
+		 * ebtables userspace would abort */
+	}
+
 	return ret;
 }
 
@@ -273,6 +319,7 @@ static void __exit fini(void)
 	ebt_ulog_buff_t *ub;
 	int i;
 
+	nf_log_unregister_logger(&ebt_ulog_logger);
 	ebt_unregister_watcher(&ulog);
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 1bcfef51ac5..f8d322e1ea9 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -47,6 +47,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/poll.h>
 #include <linux/highmem.h>
+#include <linux/spinlock.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -200,6 +201,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
+ *	skb_kill_datagram - Free a datagram skbuff forcibly
+ *	@sk: socket
+ *	@skb: datagram skbuff
+ *	@flags: MSG_ flags
+ *
+ *	This function frees a datagram skbuff that was received by
+ *	skb_recv_datagram.  The flags argument must match the one
+ *	used for skb_recv_datagram.
+ *
+ *	If the MSG_PEEK flag is set, and the packet is still on the
+ *	receive queue of the socket, it will be taken off the queue
+ *	before it is freed.
+ *
+ *	This function currently only disables BH when acquiring the
+ *	sk_receive_queue lock.  Therefore it must not be used in a
+ *	context where that lock is acquired in an IRQ context.
+ */
+
+void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+{
+	if (flags & MSG_PEEK) {
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			atomic_dec(&skb->users);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+
+	kfree_skb(skb);
+}
+
+EXPORT_SYMBOL(skb_kill_datagram);
+
+/**
  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
  *	@skb: buffer to copy
  *	@offset: offset in the buffer to start copying from
diff --git a/net/core/dev.c b/net/core/dev.c
index a5efc9ae010..5081287923d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -626,7 +626,7 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
  *	Network device names need to be valid file names to
  *	to allow sysfs to work
  */
-static int dev_valid_name(const char *name)
+int dev_valid_name(const char *name)
 {
 	return !(*name == '\0' 
 		 || !strcmp(name, ".")
@@ -3270,13 +3270,13 @@ EXPORT_SYMBOL(__dev_get_by_index);
 EXPORT_SYMBOL(__dev_get_by_name);
 EXPORT_SYMBOL(__dev_remove_pack);
 EXPORT_SYMBOL(__skb_linearize);
+EXPORT_SYMBOL(dev_valid_name);
 EXPORT_SYMBOL(dev_add_pack);
 EXPORT_SYMBOL(dev_alloc_name);
 EXPORT_SYMBOL(dev_close);
 EXPORT_SYMBOL(dev_get_by_flags);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);
-EXPORT_SYMBOL(dev_ioctl);
 EXPORT_SYMBOL(dev_open);
 EXPORT_SYMBOL(dev_queue_xmit);
 EXPORT_SYMBOL(dev_remove_pack);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a10e0bc90e..9eb9d0017a0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -13,6 +13,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Andi Kleen - Fix a few bad bugs and races.
+ * Kris Katterjohn - Added many additional checks in sk_chk_filter()
  */
 
 #include <linux/module.h>
@@ -74,7 +75,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
  * len is the number of filter blocks in the array.
  */
  
-int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
 	struct sock_filter *fentry;	/* We walk down these */
 	void *ptr;
@@ -240,9 +241,9 @@ load_b:
 			A = X;
 			continue;
 		case BPF_RET|BPF_K:
-			return ((unsigned int)fentry->k);
+			return fentry->k;
 		case BPF_RET|BPF_A:
-			return ((unsigned int)A);
+			return A;
 		case BPF_ST:
 			mem[fentry->k] = A;
 			continue;
@@ -250,7 +251,7 @@ load_b:
 			mem[fentry->k] = X;
 			continue;
 		default:
-			/* Invalid instruction counts as RET */
+			WARN_ON(1);
 			return 0;
 		}
 
@@ -283,8 +284,8 @@ load_b:
  *
  * Check the user's filter code. If we let some ugly
  * filter code slip through kaboom! The filter must contain
- * no references or jumps that are out of range, no illegal instructions
- * and no backward jumps. It must end with a RET instruction
+ * no references or jumps that are out of range, no illegal
+ * instructions, and must end with a RET instruction.
  *
  * Returns 0 if the rule set is legal or a negative errno code if not.
  */
@@ -300,38 +301,85 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 	for (pc = 0; pc < flen; pc++) {
 		/* all jumps are forward as they are not signed */
 		ftest = &filter[pc];
-		if (BPF_CLASS(ftest->code) == BPF_JMP) {
-			/* but they mustn't jump off the end */
-			if (BPF_OP(ftest->code) == BPF_JA) {
-				/*
-				 * Note, the large ftest->k might cause loops.
-				 * Compare this with conditional jumps below,
-				 * where offsets are limited. --ANK (981016)
-				 */
-				if (ftest->k >= (unsigned)(flen-pc-1))
-					return -EINVAL;
-			} else {
-				/* for conditionals both must be safe */
- 				if (pc + ftest->jt +1 >= flen ||
-				    pc + ftest->jf +1 >= flen)
-					return -EINVAL;
-			}
-		}
 
-		/* check for division by zero   -Kris Katterjohn 2005-10-30 */
-		if (ftest->code == (BPF_ALU|BPF_DIV|BPF_K) && ftest->k == 0)
-			return -EINVAL;
+		/* Only allow valid instructions */
+		switch (ftest->code) {
+		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_ALU|BPF_NEG:
+		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_LD|BPF_IMM:
+		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_LDX|BPF_IMM:
+		case BPF_MISC|BPF_TAX:
+		case BPF_MISC|BPF_TXA:
+		case BPF_RET|BPF_K:
+		case BPF_RET|BPF_A:
+			break;
+
+		/* Some instructions need special checks */
 
-		/* check that memory operations use valid addresses. */
-		if (ftest->k >= BPF_MEMWORDS) {
-			/* but it might not be a memory operation... */
-			switch (ftest->code) {
-			case BPF_ST:	
-			case BPF_STX:	
-			case BPF_LD|BPF_MEM:	
-			case BPF_LDX|BPF_MEM:	
+		case BPF_ALU|BPF_DIV|BPF_K:
+			/* check for division by zero */
+			if (ftest->k == 0)
 				return -EINVAL;
-			}
+			break;
+
+		case BPF_LD|BPF_MEM:
+		case BPF_LDX|BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			/* check for invalid memory addresses */
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			break;
+
+		case BPF_JMP|BPF_JA:
+			/*
+			 * Note, the large ftest->k might cause loops.
+			 * Compare this with conditional jumps below,
+			 * where offsets are limited. --ANK (981016)
+			 */
+			if (ftest->k >= (unsigned)(flen-pc-1))
+				return -EINVAL;
+			break;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_JMP|BPF_JSET|BPF_X:
+			/* for conditionals both must be safe */
+ 			if (pc + ftest->jt + 1 >= flen ||
+			    pc + ftest->jf + 1 >= flen)
+				return -EINVAL;
+			break;
+
+		default:
+			return -EINVAL;
 		}
 	}
 
diff --git a/net/core/flow.c b/net/core/flow.c
index 7e95b39de9f..c4f25385029 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -23,6 +23,7 @@
 #include <net/flow.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
+#include <linux/security.h>
 
 struct flow_cache_entry {
 	struct flow_cache_entry	*next;
@@ -30,6 +31,7 @@ struct flow_cache_entry {
 	u8			dir;
 	struct flowi		key;
 	u32			genid;
+	u32			sk_sid;
 	void			*object;
 	atomic_t		*object_ref;
 };
@@ -162,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -186,6 +188,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
+		    fle->sk_sid == sk_sid &&
 		    flow_key_compare(key, &fle->key) == 0) {
 			if (fle->genid == atomic_read(&flow_cache_genid)) {
 				void *ret = fle->object;
@@ -210,6 +213,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
+			fle->sk_sid = sk_sid;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
 			flow_count(cpu)++;
@@ -221,7 +225,7 @@ nocache:
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, family, dir, &obj, &obj_ref);
+		resolver(key, sk_sid, family, dir, &obj, &obj_ref);
 
 		if (fle) {
 			fle->genid = atomic_read(&flow_cache_genid);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e2137f3e489..e1da81d261d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -84,16 +84,11 @@ static ssize_t netdev_store(struct class_device *dev,
 	return ret;
 }
 
-/* generate a read-only network device class attribute */
-#define NETDEVICE_ATTR(field, format_string)				\
-NETDEVICE_SHOW(field, format_string)					\
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL)		\
-
-NETDEVICE_ATTR(addr_len, fmt_dec);
-NETDEVICE_ATTR(iflink, fmt_dec);
-NETDEVICE_ATTR(ifindex, fmt_dec);
-NETDEVICE_ATTR(features, fmt_long_hex);
-NETDEVICE_ATTR(type, fmt_dec);
+NETDEVICE_SHOW(addr_len, fmt_dec);
+NETDEVICE_SHOW(iflink, fmt_dec);
+NETDEVICE_SHOW(ifindex, fmt_dec);
+NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(type, fmt_dec);
 
 /* use same locking rules as GIFHWADDR ioctl's */
 static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
@@ -136,10 +131,6 @@ static ssize_t show_carrier(struct class_device *dev, char *buf)
 	return -EINVAL;
 }
 
-static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
-static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-
 /* read-write attributes */
 NETDEVICE_SHOW(mtu, fmt_dec);
 
@@ -153,8 +144,6 @@ static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len)
 	return netdev_store(dev, buf, len, change_mtu);
 }
 
-static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu);
-
 NETDEVICE_SHOW(flags, fmt_hex);
 
 static int change_flags(struct net_device *net, unsigned long new_flags)
@@ -167,8 +156,6 @@ static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len
 	return netdev_store(dev, buf, len, change_flags);
 }
 
-static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
-
 NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
 
 static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
@@ -182,9 +169,6 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
 	return netdev_store(dev, buf, len, change_tx_queue_len);
 }
 
-static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 
-			 store_tx_queue_len);
-
 NETDEVICE_SHOW(weight, fmt_dec);
 
 static int change_weight(struct net_device *net, unsigned long new_weight)
@@ -198,24 +182,21 @@ static ssize_t store_weight(struct class_device *dev, const char *buf, size_t le
 	return netdev_store(dev, buf, len, change_weight);
 }
 
-static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, 
-			 store_weight);
-
-
-static struct class_device_attribute *net_class_attributes[] = {
-	&class_device_attr_ifindex,
-	&class_device_attr_iflink,
-	&class_device_attr_addr_len,
-	&class_device_attr_tx_queue_len,
-	&class_device_attr_features,
-	&class_device_attr_mtu,
-	&class_device_attr_flags,
-	&class_device_attr_weight,
-	&class_device_attr_type,
-	&class_device_attr_address,
-	&class_device_attr_broadcast,
-	&class_device_attr_carrier,
-	NULL
+static struct class_device_attribute net_class_attributes[] = {
+	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
+	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
+	__ATTR(features, S_IRUGO, show_features, NULL),
+	__ATTR(type, S_IRUGO, show_type, NULL),
+	__ATTR(address, S_IRUGO, show_address, NULL),
+	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
+	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
+	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
+	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
+	       store_tx_queue_len),
+	__ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
+	{}
 };
 
 /* Show a given an attribute in the statistics group */
@@ -369,14 +350,14 @@ static struct attribute_group wireless_group = {
 #endif
 
 #ifdef CONFIG_HOTPLUG
-static int netdev_hotplug(struct class_device *cd, char **envp,
-			  int num_envp, char *buf, int size)
+static int netdev_uevent(struct class_device *cd, char **envp,
+			 int num_envp, char *buf, int size)
 {
 	struct net_device *dev = to_net_dev(cd);
 	int i = 0;
 	int n;
 
-	/* pass interface in env to hotplug. */
+	/* pass interface to uevent. */
 	envp[i++] = buf;
 	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
 	buf += n;
@@ -407,8 +388,9 @@ static void netdev_release(struct class_device *cd)
 static struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
+	.class_dev_attrs = net_class_attributes,
 #ifdef CONFIG_HOTPLUG
-	.hotplug = netdev_hotplug,
+	.uevent = netdev_uevent,
 #endif
 };
 
@@ -431,8 +413,6 @@ void netdev_unregister_sysfs(struct net_device * net)
 int netdev_register_sysfs(struct net_device *net)
 {
 	struct class_device *class_dev = &(net->class_dev);
-	int i;
-	struct class_device_attribute *attr;
 	int ret;
 
 	class_dev->class = &net_class;
@@ -442,12 +422,6 @@ int netdev_register_sysfs(struct net_device *net)
 	if ((ret = class_device_register(class_dev)))
 		goto out;
 
-	for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) {
-		if ((ret = class_device_create_file(class_dev, attr)))
-		    goto out_unreg;
-	}
-
-
 	if (net->get_stats &&
 	    (ret = sysfs_create_group(&class_dev->kobj, &netstat_group)))
 		goto out_unreg; 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 49424a42a2c..281a632fa6a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -13,6 +13,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/string.h>
+#include <linux/if_arp.h>
 #include <linux/inetdevice.h>
 #include <linux/inet.h>
 #include <linux/interrupt.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7fc3e9e28c3..631056d44b7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -473,7 +473,6 @@ static char version[] __initdata = VERSION;
 
 static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
 static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
-static struct pktgen_thread* pktgen_find_thread(const char* name);
 static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname);
 static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
 static void pktgen_run_all_threads(void);
@@ -487,9 +486,9 @@ static unsigned int fmt_ip6(char *s,const char ip[16]);
 
 /* Module parameters, defaults. */
 static int pg_count_d = 1000; /* 1000 pkts by default */
-static int pg_delay_d = 0;
-static int pg_clone_skb_d = 0;
-static int debug = 0;
+static int pg_delay_d;
+static int pg_clone_skb_d;
+static int debug;
 
 static DECLARE_MUTEX(pktgen_sem);
 static struct pktgen_thread *pktgen_threads = NULL;
@@ -2883,7 +2882,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname)
 	return add_dev_to_thread(t, pkt_dev);
 }
 
-static struct pktgen_thread *pktgen_find_thread(const char* name) 
+static struct pktgen_thread * __init pktgen_find_thread(const char* name) 
 {
         struct pktgen_thread *t = NULL;
 
@@ -2900,7 +2899,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name)
         return t;
 }
 
-static int pktgen_create_thread(const char* name, int cpu) 
+static int __init pktgen_create_thread(const char* name, int cpu) 
 {
         struct pktgen_thread *t = NULL;
 	struct proc_dir_entry *pe;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83fee37de38..070f91cfde5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int fclone)
 {
+	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	u8 *data;
 
 	/* Get the HEAD */
-	if (fclone)
-		skb = kmem_cache_alloc(skbuff_fclone_cache,
-				       gfp_mask & ~__GFP_DMA);
-	else
-		skb = kmem_cache_alloc(skbuff_head_cache,
-				       gfp_mask & ~__GFP_DMA);
-
+	skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache,
+				gfp_mask & ~__GFP_DMA);
 	if (!skb)
 		goto out;
 
@@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	atomic_set(&shinfo->dataref, 1);
+	shinfo->nr_frags  = 0;
+	shinfo->tso_size = 0;
+	shinfo->tso_segs = 0;
+	shinfo->ufo_size = 0;
+	shinfo->ip6_frag_id = 0;
+	shinfo->frag_list = NULL;
+
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
-	atomic_set(&(skb_shinfo(skb)->dataref), 1);
-	skb_shinfo(skb)->nr_frags  = 0;
-	skb_shinfo(skb)->tso_size = 0;
-	skb_shinfo(skb)->tso_segs = 0;
-	skb_shinfo(skb)->frag_list = NULL;
-	skb_shinfo(skb)->ufo_size = 0;
-	skb_shinfo(skb)->ip6_frag_id = 0;
 out:
 	return skb;
 nodata:
diff --git a/net/core/sock.c b/net/core/sock.c
index 13cc3be4f05..6465b0e4c8c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1488,7 +1488,7 @@ int proto_register(struct proto *prot, int alloc_slab)
 			}
 		}
 
-		if (prot->twsk_obj_size) {
+		if (prot->twsk_prot != NULL) {
 			static const char mask[] = "tw_sock_%s";
 
 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
@@ -1497,11 +1497,12 @@ int proto_register(struct proto *prot, int alloc_slab)
 				goto out_free_request_sock_slab;
 
 			sprintf(timewait_sock_slab_name, mask, prot->name);
-			prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
-							    prot->twsk_obj_size,
-							    0, SLAB_HWCACHE_ALIGN,
-							    NULL, NULL);
-			if (prot->twsk_slab == NULL)
+			prot->twsk_prot->twsk_slab =
+				kmem_cache_create(timewait_sock_slab_name,
+						  prot->twsk_prot->twsk_obj_size,
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL, NULL);
+			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
 		}
 	}
@@ -1548,12 +1549,12 @@ void proto_unregister(struct proto *prot)
 		prot->rsk_prot->slab = NULL;
 	}
 
-	if (prot->twsk_slab != NULL) {
-		const char *name = kmem_cache_name(prot->twsk_slab);
+	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
+		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
 
-		kmem_cache_destroy(prot->twsk_slab);
+		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
 		kfree(name);
-		prot->twsk_slab = NULL;
+		prot->twsk_prot->twsk_slab = NULL;
 	}
 }
 
diff --git a/net/core/stream.c b/net/core/stream.c
index 15bfd03e802..35e25259fd9 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -55,8 +55,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 	int done;
 
 	do {
-		if (sk->sk_err)
-			return sock_error(sk);
+		int err = sock_error(sk);
+		if (err)
+			return err;
 		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
 			return -EPIPE;
 		if (!*timeo_p)
@@ -67,6 +68,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		sk->sk_write_pending++;
 		done = sk_wait_event(sk, timeo_p,
+				     !sk->sk_err &&
 				     !((1 << sk->sk_state) & 
 				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
 		finish_wait(sk->sk_sleep, &wait);
@@ -137,7 +139,9 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
-		sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+		sk_wait_event(sk, &current_timeo, !sk->sk_err && 
+						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+						  sk_stream_memory_free(sk) &&
 						  vm_wait);
 		sk->sk_write_pending--;
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 7b5970fc9e4..ac1d1fcf867 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL(net_srandom);
  * is otherwise not dependent on the TCP/IP stack.
  */
 
-__u32 in_aton(const char *str)
+__be32 in_aton(const char *str)
 {
 	unsigned long l;
 	unsigned int val;
@@ -175,7 +175,7 @@ __u32 in_aton(const char *str)
 		if (*str != '\0')
 		{
 			val = 0;
-			while (*str != '\0' && *str != '.')
+			while (*str != '\0' && *str != '.' && *str != '\n')
 			{
 				val *= 10;
 				val += *str - '0';
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 344a8da153f..87b27fff6e3 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,7 @@
+obj-$(CONFIG_IPV6) += dccp_ipv6.o
+
+dccp_ipv6-y := ipv6.o
+
 obj-$(CONFIG_IP_DCCP) += dccp.o
 
 dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index c9a62cca22f..ce9cb77c5c2 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -55,8 +55,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	from = av->dccpav_buf + av->dccpav_buf_head;
 
 	/* Check if buf_head wraps */
-	if (av->dccpav_buf_head + len > av->dccpav_vec_len) {
-		const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head);
+	if ((int)av->dccpav_buf_head + len > av->dccpav_vec_len) {
+		const u32 tailsize = av->dccpav_vec_len - av->dccpav_buf_head;
 
 		memcpy(to, from, tailsize);
 		to   += tailsize;
@@ -93,8 +93,14 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
 				      const gfp_t priority)
 {
-	struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
+	struct dccp_ackvec *av;
 
+	BUG_ON(len == 0);
+
+	if (len > DCCP_MAX_ACKVEC_LEN)
+		return NULL;
+
+	av = kmalloc(sizeof(*av) + len, priority);
 	if (av != NULL) {
 		av->dccpav_buf_len	= len;
 		av->dccpav_buf_head	=
@@ -117,13 +123,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
 }
 
 static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
-				   const unsigned int index)
+				   const u8 index)
 {
 	return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
 }
 
 static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
-				 const unsigned int index)
+				 const u8 index)
 {
 	return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
 }
@@ -135,7 +141,7 @@ static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
  */
 static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 						 const unsigned int packets,
-						  const unsigned char state)
+						 const unsigned char state)
 {
 	unsigned int gap;
 	signed long new_head;
@@ -223,7 +229,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 *	could reduce the complexity of this scan.)
 		 */
 		u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
-		unsigned int index = av->dccpav_buf_head;
+		u8 index = av->dccpav_buf_head;
 
 		while (1) {
 			const u8 len = dccp_ackvec_len(av, index);
@@ -291,7 +297,7 @@ void dccp_ackvec_print(const struct dccp_ackvec *av)
 }
 #endif
 
-static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
+static void dccp_ackvec_throw_away_ack_record(struct dccp_ackvec *av)
 {
 	/*
 	 * As we're keeping track of the ack vector size (dccpav_vec_len) and
@@ -301,9 +307,10 @@ static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
 	 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
 	 */
 #if 0
-	av->dccpav_buf_tail = av->dccpav_ack_ptr + 1;
-	if (av->dccpav_buf_tail >= av->dccpav_vec_len)
-		av->dccpav_buf_tail -= av->dccpav_vec_len;
+	u32 new_buf_tail = av->dccpav_ack_ptr + 1;
+	if (new_buf_tail >= av->dccpav_vec_len)
+		new_buf_tail -= av->dccpav_vec_len;
+	av->dccpav_buf_tail = new_buf_tail;
 #endif
 	av->dccpav_vec_len -= av->dccpav_sent_len;
 }
@@ -326,7 +333,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
 			      debug_prefix, 1,
 			      (unsigned long long)av->dccpav_ack_seqno,
 			      (unsigned long long)av->dccpav_ack_ackno);
-		dccp_ackvec_trow_away_ack_record(av);
+		dccp_ackvec_throw_away_ack_record(av);
 		av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
 	}
 }
@@ -389,7 +396,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 					      av->dccpav_ack_seqno,
 					      (unsigned long long)
 					      av->dccpav_ack_ackno);
-				dccp_ackvec_trow_away_ack_record(av);
+				dccp_ackvec_throw_away_ack_record(av);
 			}
 			/*
 			 * If dccpav_ack_seqno was not received, no problem
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index d0fd6c60c57..f7dfb5f67b8 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -54,16 +54,16 @@
  * @dccpav_buf - circular buffer of acknowledgeable packets
  */
 struct dccp_ackvec {
-	unsigned int	dccpav_buf_head;
-	unsigned int	dccpav_buf_tail;
 	u64		dccpav_buf_ackno;
 	u64		dccpav_ack_seqno;
 	u64		dccpav_ack_ackno;
-	unsigned int	dccpav_ack_ptr;
-	unsigned int	dccpav_sent_len;
-	unsigned int	dccpav_vec_len;
-	unsigned int	dccpav_buf_len;
 	struct timeval	dccpav_time;
+	u8		dccpav_buf_head;
+	u8		dccpav_buf_tail;
+	u8		dccpav_ack_ptr;
+	u8		dccpav_sent_len;
+	u8		dccpav_vec_len;
+	u8		dccpav_buf_len;
 	u8		dccpav_buf_nonce;
 	u8		dccpav_ack_nonce;
 	u8		dccpav_buf[0];
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c37eeeaf5c6..de681c6ad08 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -21,6 +21,8 @@
 
 #define CCID_MAX 255
 
+struct tcp_info;
+
 struct ccid {
 	unsigned char	ccid_id;
 	const char	*ccid_name;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f97b85d55ad..93f26dd6e6c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -59,7 +59,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 
 #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
 
-extern struct proto dccp_v4_prot;
+extern struct proto dccp_prot;
 
 /* is seq1 < seq2 ? */
 static inline int before48(const u64 seq1, const u64 seq2)
@@ -228,6 +228,9 @@ extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				const struct dccp_hdr *dh, const unsigned len);
 
+extern int dccp_v4_init_sock(struct sock *sk);
+extern int dccp_v4_destroy_sock(struct sock *sk);
+
 extern void		dccp_close(struct sock *sk, long timeout);
 extern struct sk_buff	*dccp_make_response(struct sock *sk,
 					    struct dst_entry *dst,
@@ -238,6 +241,7 @@ extern struct sk_buff	*dccp_make_reset(struct sock *sk,
 
 extern int	   dccp_connect(struct sock *sk);
 extern int	   dccp_disconnect(struct sock *sk, int flags);
+extern void	   dccp_unhash(struct sock *sk);
 extern int	   dccp_getsockopt(struct sock *sk, int level, int optname,
 				   char __user *optval, int __user *optlen);
 extern int	   dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -249,6 +253,13 @@ extern int	   dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
 				struct msghdr *msg, size_t len, int nonblock,
 				int flags, int *addr_len);
 extern void	   dccp_shutdown(struct sock *sk, int how);
+extern int	   inet_dccp_listen(struct socket *sock, int backlog);
+extern unsigned int dccp_poll(struct file *file, struct socket *sock,
+			     poll_table *wait);
+extern void	   dccp_v4_send_check(struct sock *sk, int len,
+				      struct sk_buff *skb);
+extern int	   dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+				   int addr_len);
 
 extern int	   dccp_v4_checksum(const struct sk_buff *skb,
 				    const u32 saddr, const u32 daddr);
@@ -256,6 +267,17 @@ extern int	   dccp_v4_checksum(const struct sk_buff *skb,
 extern int	   dccp_v4_send_reset(struct sock *sk,
 				      enum dccp_reset_codes code);
 extern void	   dccp_send_close(struct sock *sk, const int active);
+extern int	   dccp_invalid_packet(struct sk_buff *skb);
+
+static inline int dccp_bad_service_code(const struct sock *sk,
+					const __u32 service)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+
+	if (dp->dccps_service == service)
+		return 0;
+	return !dccp_list_has_service(dp->dccps_service_list, service);
+}
 
 struct dccp_skb_cb {
 	__u8  dccpd_type:4;
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index f675d8e642d..3f78c00e382 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_retransmits	= icsk->icsk_retransmits;
 	info->tcpi_probes	= icsk->icsk_probes_out;
 	info->tcpi_backoff	= icsk->icsk_backoff;
-	info->tcpi_pmtu		= dp->dccps_pmtu_cookie;
+	info->tcpi_pmtu		= icsk->icsk_pmtu_cookie;
 
 	if (dp->dccps_options.dccpo_send_ack_vector)
 		info->tcpi_options |= TCPI_OPT_SACK;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3454d594190..b6cba72b44e 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -151,29 +151,12 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct dccp_hdr *dh, const unsigned len)
+static inline int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+					 const struct dccp_hdr *dh,
+					 const unsigned len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	if (dccp_check_seqno(sk, skb))
-		goto discard;
-
-	if (dccp_parse_options(sk, skb))
-		goto discard;
-
-	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-		dccp_event_ack_recv(sk, skb);
-
-	if (dp->dccps_options.dccpo_send_ack_vector &&
-	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq,
-			    DCCP_ACKVEC_STATE_RECEIVED))
-		goto discard;
-
-	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
-	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
-
 	switch (dccp_hdr(skb)->dccph_type) {
 	case DCCP_PKT_DATAACK:
 	case DCCP_PKT_DATA:
@@ -250,6 +233,37 @@ discard:
 	return 0;
 }
 
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			 const struct dccp_hdr *dh, const unsigned len)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	if (dccp_check_seqno(sk, skb))
+		goto discard;
+
+	if (dccp_parse_options(sk, skb))
+		goto discard;
+
+	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_event_ack_recv(sk, skb);
+
+	if (dp->dccps_options.dccpo_send_ack_vector &&
+	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+			    DCCP_SKB_CB(skb)->dccpd_seq,
+			    DCCP_ACKVEC_STATE_RECEIVED))
+		goto discard;
+
+	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+	return __dccp_rcv_established(sk, skb, dh, len);
+discard:
+	__kfree_skb(skb);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rcv_established);
+
 static int dccp_rcv_request_sent_state_process(struct sock *sk,
 					       struct sk_buff *skb,
 					       const struct dccp_hdr *dh,
@@ -286,6 +300,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
+                if (dp->dccps_options.dccpo_send_ack_vector &&
+                    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+                                    DCCP_SKB_CB(skb)->dccpd_seq,
+                                    DCCP_ACKVEC_STATE_RECEIVED))
+                        goto out_invalid_packet; /* FIXME: change error code */
+
 		dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
 		dccp_update_gsr(sk, dp->dccps_isr);
 		/*
@@ -309,7 +329,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
-		dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+		dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 
 		/*
 		 *    Step 10: Process REQUEST state (second part)
@@ -329,7 +349,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		dccp_set_state(sk, DCCP_PARTOPEN);
 
 		/* Make sure socket is routed, for correct metrics. */
-		inet_sk_rebuild_header(sk);
+		icsk->icsk_af_ops->rebuild_header(sk);
 
 		if (!sock_flag(sk, SOCK_DEAD)) {
 			sk->sk_state_change(sk);
@@ -398,9 +418,9 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 
 		if (dh->dccph_type == DCCP_PKT_DATAACK ||
 		    dh->dccph_type == DCCP_PKT_DATA) {
-			dccp_rcv_established(sk, skb, dh, len);
+			__dccp_rcv_established(sk, skb, dh, len);
 			queued = 1; /* packet was queued
-				       (by dccp_rcv_established) */
+				       (by __dccp_rcv_established) */
 		}
 		break;
 	}
@@ -444,7 +464,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (sk->sk_state == DCCP_LISTEN) {
 		if (dh->dccph_type == DCCP_PKT_REQUEST) {
-			if (dccp_v4_conn_request(sk, skb) < 0)
+			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
+								    skb) < 0)
 				return 1;
 
 			/* FIXME: do congestion control initialization */
@@ -471,14 +492,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
 
-		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
-		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
-
  		if (dp->dccps_options.dccpo_send_ack_vector &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
  				    DCCP_SKB_CB(skb)->dccpd_seq,
  				    DCCP_ACKVEC_STATE_RECEIVED))
  			goto discard;
+
+		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
 	}
 
 	/*
@@ -566,3 +587,5 @@ discard:
 	}
 	return 0;
 }
+
+EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 656e13e38cf..3f244670764 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -19,7 +19,9 @@
 
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
 #include <net/sock.h>
+#include <net/timewait_sock.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
@@ -37,7 +39,8 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo);
 
 static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
 {
-	return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+	return inet_csk_get_port(&dccp_hashinfo, sk, snum,
+				 inet_csk_bind_conflict);
 }
 
 static void dccp_v4_hash(struct sock *sk)
@@ -45,171 +48,14 @@ static void dccp_v4_hash(struct sock *sk)
 	inet_hash(&dccp_hashinfo, sk);
 }
 
-static void dccp_v4_unhash(struct sock *sk)
+void dccp_unhash(struct sock *sk)
 {
 	inet_unhash(&dccp_hashinfo, sk);
 }
 
-/* called with local bh disabled */
-static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const u32 daddr = inet->rcv_saddr;
-	const u32 saddr = inet->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
-	const struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
+EXPORT_SYMBOL_GPL(dccp_unhash);
 
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp != NULL) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw != NULL) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &dccp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static int dccp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (snum == 0) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
- 		int remaining = (high - low) + 1;
- 		int rover = net_random() % (high - low) + low;
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
- 		do {
- 			head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
-						    dccp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == rover) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__dccp_v4_check_established(sk,
-									 rover,
-									 &tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
-						     head, rover);
- 			if (tb == NULL) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 			if (++rover > high)
- 				rover = low;
- 		} while (--remaining > 0);
-
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
- 		/* All locks still held and bhs disabled */
- 		inet_bind_hash(sk, tb, rover);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(rover);
- 			__inet_hash(&dccp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw != NULL) {
- 			inet_twsk_deschedule(tw, &dccp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
-						 dccp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
-		__inet_hash(&dccp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __dccp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
-static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
-			   int addr_len)
+int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -259,9 +105,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	dp->dccps_ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt != NULL)
-		dp->dccps_ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 	/*
 	 * Socket identity is still unknown (sport may be zero).
 	 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -269,7 +115,7 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 	 * complete initialization after this.
 	 */
 	dccp_set_state(sk, DCCP_REQUESTING);
-	err = dccp_v4_hash_connect(sk);
+	err = inet_hash_connect(&dccp_death_row, sk);
 	if (err != 0)
 		goto failure;
 
@@ -287,16 +133,6 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 							    usin->sin_port);
 	dccp_update_gss(sk, dp->dccps_iss);
 
-	/*
-	 * SWL and AWL are initially adjusted so that they are not less than
-	 * the initial Sequence Numbers received and sent, respectively:
-	 *	SWL := max(GSR + 1 - floor(W/4), ISR),
-	 *	AWL := max(GSS - W' + 1, ISS).
-	 * These adjustments MUST be applied only at the beginning of the
-	 * connection.
-	 */
-	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
-
 	inet->id = dp->dccps_iss ^ jiffies;
 
 	err = dccp_connect(sk);
@@ -316,6 +152,8 @@ failure:
 	goto out;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_connect);
+
 /*
  * This routine does path mtu discovery as defined in RFC1191.
  */
@@ -354,7 +192,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    dp->dccps_pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		dccp_sync_mss(sk, mtu);
 
 		/*
@@ -606,6 +444,17 @@ out:
 	sock_put(sk);
 }
 
+/* This routine computes an IPv4 DCCP checksum. */
+void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	struct dccp_hdr *dh = dccp_hdr(skb);
+
+	dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_send_check);
+
 int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
 {
 	struct sk_buff *skb;
@@ -641,16 +490,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
 					   dccp_hdr(skb)->dccph_sport);
 }
 
-static inline int dccp_bad_service_code(const struct sock *sk,
-					const __u32 service)
-{
-	const struct dccp_sock *dp = dccp_sk(sk);
-
-	if (dp->dccps_service == service)
-		return 0;
-	return !dccp_list_has_service(dp->dccps_service_list, service);
-}
-
 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -662,7 +501,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
  	const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
-	struct dst_entry *dst = NULL;
 
 	/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
 	if (((struct rtable *)skb->dst)->rt_flags &
@@ -703,7 +541,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
-	/* FIXME: Merge Aristeu's option parsing code when ready */
 	req->rcv_wnd	= 100; /* Fake, option parsing will get the
 				  right value */
 	ireq->opt	= NULL;
@@ -721,23 +558,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_iss	   = dccp_v4_init_sequence(sk, skb);
 	dreq->dreq_service = service;
 
-	if (dccp_v4_send_response(sk, req, dst))
+	if (dccp_v4_send_response(sk, req, NULL))
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
 	return 0;
 
 drop_and_free:
-	/*
-	 * FIXME: should be reqsk_free after implementing req->rsk_ops
-	 */
-	__reqsk_free(req);
+	reqsk_free(req);
 drop:
 	DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
 	dcb->dccpd_reset_code = reset_code;
 	return -1;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
 /*
  * The three way handshake has completed - we got a valid ACK or DATAACK -
  * now create the new socket.
@@ -792,6 +628,8 @@ exit:
 	return NULL;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
+
 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -1011,7 +849,9 @@ discard:
 	return 0;
 }
 
-static inline int dccp_invalid_packet(struct sk_buff *skb)
+EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
+
+int dccp_invalid_packet(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
 
@@ -1065,29 +905,30 @@ static inline int dccp_invalid_packet(struct sk_buff *skb)
 		return 1;
 	}
 
-	/* If the header checksum is incorrect, drop packet and return */
-	if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
-				    skb->nh.iph->daddr) < 0) {
-		LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
-					    "incorrect\n");
-		return 1;
-	}
-
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_invalid_packet);
+
 /* this is called when real data arrives */
 int dccp_v4_rcv(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
 	struct sock *sk;
-	int rc;
 
 	/* Step 1: Check header basics: */
 
 	if (dccp_invalid_packet(skb))
 		goto discard_it;
 
+	/* If the header checksum is incorrect, drop packet and return */
+	if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+				    skb->nh.iph->daddr) < 0) {
+		LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
+			       __FUNCTION__);
+		goto discard_it;
+	}
+
 	dh = dccp_hdr(skb);
 
 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
@@ -1143,28 +984,10 @@ int dccp_v4_rcv(struct sk_buff *skb)
                 goto do_time_wait;
 	}
 
-	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
-		dccp_pr_debug("xfrm4_policy_check failed\n");
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
-	}
-
-        if (sk_filter(sk, skb, 0)) {
-		dccp_pr_debug("sk_filter failed\n");
-                goto discard_and_relse;
-	}
-
-	skb->dev = NULL;
-
-	bh_lock_sock(sk);
-	rc = 0;
-	if (!sock_owned_by_user(sk))
-		rc = dccp_v4_do_rcv(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
-	bh_unlock_sock(sk);
 
-	sock_put(sk);
-	return rc;
+	return sk_receive_skb(sk, skb);
 
 no_dccp_socket:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -1194,9 +1017,23 @@ do_time_wait:
 	goto no_dccp_socket;
 }
 
-static int dccp_v4_init_sock(struct sock *sk)
+struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+	.queue_xmit	= ip_queue_xmit,
+	.send_check	= dccp_v4_send_check,
+	.rebuild_header	= inet_sk_rebuild_header,
+	.conn_request	= dccp_v4_conn_request,
+	.syn_recv_sock	= dccp_v4_request_recv_sock,
+	.net_header_len	= sizeof(struct iphdr),
+	.setsockopt	= ip_setsockopt,
+	.getsockopt	= ip_getsockopt,
+	.addr2sockaddr	= inet_csk_addr2sockaddr,
+	.sockaddr_len	= sizeof(struct sockaddr_in),
+};
+
+int dccp_v4_init_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	static int dccp_ctl_socket_init = 1;
 
 	dccp_options_init(&dp->dccps_options);
@@ -1236,9 +1073,11 @@ static int dccp_v4_init_sock(struct sock *sk)
 		dccp_ctl_socket_init = 0;
 
 	dccp_init_xmit_timers(sk);
-	inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+	icsk->icsk_rto = DCCP_TIMEOUT_INIT;
 	sk->sk_state = DCCP_CLOSED;
 	sk->sk_write_space = dccp_write_space;
+	icsk->icsk_af_ops = &dccp_ipv4_af_ops;
+	icsk->icsk_sync_mss = dccp_sync_mss;
 	dp->dccps_mss_cache = 536;
 	dp->dccps_role = DCCP_ROLE_UNDEFINED;
 	dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
@@ -1246,7 +1085,9 @@ static int dccp_v4_init_sock(struct sock *sk)
 	return 0;
 }
 
-static int dccp_v4_destroy_sock(struct sock *sk)
+EXPORT_SYMBOL_GPL(dccp_v4_init_sock);
+
+int dccp_v4_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
@@ -1279,6 +1120,8 @@ static int dccp_v4_destroy_sock(struct sock *sk)
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock);
+
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
 	kfree(inet_rsk(req)->opt);
@@ -1293,7 +1136,11 @@ static struct request_sock_ops dccp_request_sock_ops = {
 	.send_reset	= dccp_v4_ctl_send_reset,
 };
 
-struct proto dccp_v4_prot = {
+static struct timewait_sock_ops dccp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct inet_timewait_sock),
+};
+
+struct proto dccp_prot = {
 	.name			= "DCCP",
 	.owner			= THIS_MODULE,
 	.close			= dccp_close,
@@ -1307,7 +1154,7 @@ struct proto dccp_v4_prot = {
 	.recvmsg		= dccp_recvmsg,
 	.backlog_rcv		= dccp_v4_do_rcv,
 	.hash			= dccp_v4_hash,
-	.unhash			= dccp_v4_unhash,
+	.unhash			= dccp_unhash,
 	.accept			= inet_csk_accept,
 	.get_port		= dccp_v4_get_port,
 	.shutdown		= dccp_shutdown,
@@ -1316,5 +1163,7 @@ struct proto dccp_v4_prot = {
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
 	.rsk_prot		= &dccp_request_sock_ops,
-	.twsk_obj_size		= sizeof(struct inet_timewait_sock),
+	.twsk_prot		= &dccp_timewait_sock_ops,
 };
+
+EXPORT_SYMBOL_GPL(dccp_prot);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
new file mode 100644
index 00000000000..c609dc78f48
--- /dev/null
+++ b/net/dccp/ipv6.c
@@ -0,0 +1,1261 @@
+/*
+ *	DCCP over IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Based on net/dccp6/ipv6.c
+ *
+ *	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/xfrm.h>
+
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
+#include <net/inet6_connection_sock.h>
+#include <net/inet6_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/xfrm.h>
+
+#include "dccp.h"
+#include "ipv6.h"
+
+static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
+static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
+				   struct request_sock *req);
+static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb);
+
+static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+
+static struct inet_connection_sock_af_ops dccp_ipv6_mapped;
+static struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
+
+static int dccp_v6_get_port(struct sock *sk, unsigned short snum)
+{
+	return inet_csk_get_port(&dccp_hashinfo, sk, snum,
+				 inet6_csk_bind_conflict);
+}
+
+static void dccp_v6_hash(struct sock *sk)
+{
+	if (sk->sk_state != DCCP_CLOSED) {
+		if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) {
+			dccp_prot.hash(sk);
+			return;
+		}
+		local_bh_disable();
+		__inet6_hash(&dccp_hashinfo, sk);
+		local_bh_enable();
+	}
+}
+
+static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len,
+				struct in6_addr *saddr, 
+				struct in6_addr *daddr, 
+				unsigned long base)
+{
+	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base);
+}
+
+static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
+						    skb->nh.ipv6h->saddr.s6_addr32,
+						    dh->dccph_dport,
+						    dh->dccph_sport);
+	else
+		return secure_dccp_sequence_number(skb->nh.iph->daddr,
+						   skb->nh.iph->saddr,
+						   dh->dccph_dport,
+						   dh->dccph_sport);
+}
+
+static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
+			   int addr_len)
+{
+	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct flowi fl;
+	struct dst_entry *dst;
+	int addr_type;
+	int err;
+
+	dp->dccps_role = DCCP_ROLE_CLIENT;
+
+	if (addr_len < SIN6_LEN_RFC2133) 
+		return -EINVAL;
+
+	if (usin->sin6_family != AF_INET6) 
+		return -EAFNOSUPPORT;
+
+	memset(&fl, 0, sizeof(fl));
+
+	if (np->sndflow) {
+		fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+		IP6_ECN_flow_init(fl.fl6_flowlabel);
+		if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
+			struct ip6_flowlabel *flowlabel;
+			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			if (flowlabel == NULL)
+				return -EINVAL;
+			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			fl6_sock_release(flowlabel);
+		}
+	}
+
+	/*
+  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+  	 */
+  	
+  	if (ipv6_addr_any(&usin->sin6_addr))
+		usin->sin6_addr.s6_addr[15] = 0x1; 
+
+	addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+	if(addr_type & IPV6_ADDR_MULTICAST)
+		return -ENETUNREACH;
+
+	if (addr_type & IPV6_ADDR_LINKLOCAL) {
+		if (addr_len >= sizeof(struct sockaddr_in6) &&
+		    usin->sin6_scope_id) {
+			/* If interface is set while binding, indices
+			 * must coincide.
+			 */
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != usin->sin6_scope_id)
+				return -EINVAL;
+
+			sk->sk_bound_dev_if = usin->sin6_scope_id;
+		}
+
+		/* Connect to link-local address requires an interface */
+		if (!sk->sk_bound_dev_if)
+			return -EINVAL;
+	}
+
+	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->flow_label = fl.fl6_flowlabel;
+
+	/*
+	 *	DCCP over IPv4
+	 */
+
+	if (addr_type == IPV6_ADDR_MAPPED) {
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
+		struct sockaddr_in sin;
+
+		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+
+		if (__ipv6_only_sock(sk))
+			return -ENETUNREACH;
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = usin->sin6_port;
+		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+		icsk->icsk_af_ops = &dccp_ipv6_mapped;
+		sk->sk_backlog_rcv = dccp_v4_do_rcv;
+
+		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+
+		if (err) {
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
+			sk->sk_backlog_rcv = dccp_v6_do_rcv;
+			goto failure;
+		} else {
+			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
+				      inet->saddr);
+			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
+				      inet->rcv_saddr);
+		}
+
+		return err;
+	}
+
+	if (!ipv6_addr_any(&np->rcv_saddr))
+		saddr = &np->rcv_saddr;
+
+	fl.proto = IPPROTO_DCCP;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_dport = usin->sin6_port;
+	fl.fl_ip_sport = inet->sport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto failure;
+	if (final_p)
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		goto failure;
+
+	if (saddr == NULL) {
+		saddr = &fl.fl6_src;
+		ipv6_addr_copy(&np->rcv_saddr, saddr);
+	}
+
+	/* set the source address */
+	ipv6_addr_copy(&np->saddr, saddr);
+	inet->rcv_saddr = LOOPBACK4_IPV6;
+
+	ip6_dst_store(sk, dst, NULL);
+
+	icsk->icsk_ext_hdr_len = 0;
+	if (np->opt)
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
+
+	inet->dport = usin->sin6_port;
+
+	dccp_set_state(sk, DCCP_REQUESTING);
+	err = inet6_hash_connect(&dccp_death_row, sk);
+	if (err)
+		goto late_failure;
+	/* FIXME */
+#if 0
+	dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32,
+						       np->daddr.s6_addr32,
+						       inet->sport,
+						       inet->dport);
+#endif
+	err = dccp_connect(sk);
+	if (err)
+		goto late_failure;
+
+	return 0;
+
+late_failure:
+	dccp_set_state(sk, DCCP_CLOSED);
+	__sk_dst_reset(sk);
+failure:
+	inet->dport = 0;
+	sk->sk_route_caps = 0;
+	return err;
+}
+
+static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
+	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+	int err;
+	__u64 seq;
+
+	sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
+			  &hdr->saddr, dh->dccph_sport, skb->dev->ifindex);
+
+	if (sk == NULL) {
+		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		return;
+	}
+
+	if (sk->sk_state == DCCP_TIME_WAIT) {
+		inet_twsk_put((struct inet_timewait_sock *)sk);
+		return;
+	}
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk))
+		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+	if (sk->sk_state == DCCP_CLOSED)
+		goto out;
+
+	np = inet6_sk(sk);
+
+	if (type == ICMPV6_PKT_TOOBIG) {
+		struct dst_entry *dst = NULL;
+
+		if (sock_owned_by_user(sk))
+			goto out;
+		if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
+			goto out;
+
+		/* icmp should have updated the destination cache entry */
+		dst = __sk_dst_check(sk, np->dst_cookie);
+
+		if (dst == NULL) {
+			struct inet_sock *inet = inet_sk(sk);
+			struct flowi fl;
+
+			/* BUGGG_FUTURE: Again, it is not clear how
+			   to handle rthdr case. Ignore this complexity
+			   for now.
+			 */
+			memset(&fl, 0, sizeof(fl));
+			fl.proto = IPPROTO_DCCP;
+			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+			fl.oif = sk->sk_bound_dev_if;
+			fl.fl_ip_dport = inet->dport;
+			fl.fl_ip_sport = inet->sport;
+
+			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
+				sk->sk_err_soft = -err;
+				goto out;
+			}
+
+			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+				sk->sk_err_soft = -err;
+				goto out;
+			}
+
+		} else
+			dst_hold(dst);
+
+		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+			dccp_sync_mss(sk, dst_mtu(dst));
+		} /* else let the usual retransmit timer handle it */
+		dst_release(dst);
+		goto out;
+	}
+
+	icmpv6_err_convert(type, code, &err);
+
+	seq = DCCP_SKB_CB(skb)->dccpd_seq;
+	/* Might be for an request_sock */
+	switch (sk->sk_state) {
+		struct request_sock *req, **prev;
+	case DCCP_LISTEN:
+		if (sock_owned_by_user(sk))
+			goto out;
+
+		req = inet6_csk_search_req(sk, &prev, dh->dccph_dport,
+					   &hdr->daddr, &hdr->saddr,
+					   inet6_iif(skb));
+		if (!req)
+			goto out;
+
+		/* ICMPs are not backlogged, hence we cannot get
+		 * an established socket here.
+		 */
+		BUG_TRAP(req->sk == NULL);
+
+		if (seq != dccp_rsk(req)->dreq_iss) {
+			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			goto out;
+		}
+
+		inet_csk_reqsk_queue_drop(sk, req, prev);
+		goto out;
+
+	case DCCP_REQUESTING:
+	case DCCP_RESPOND:  /* Cannot happen.
+			       It can, it SYNs are crossed. --ANK */ 
+		if (!sock_owned_by_user(sk)) {
+			DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+			sk->sk_err = err;
+			/*
+			 * Wake people up to see the error
+			 * (see connect in sock.c)
+			 */
+			sk->sk_error_report(sk);
+
+			dccp_done(sk);
+		} else
+			sk->sk_err_soft = err;
+		goto out;
+	}
+
+	if (!sock_owned_by_user(sk) && np->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else
+		sk->sk_err_soft = err;
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
+				 struct dst_entry *dst)
+{
+	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct ipv6_txoptions *opt = NULL;
+	struct in6_addr *final_p = NULL, final;
+	struct flowi fl;
+	int err = -1;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_DCCP;
+	ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+	ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+	fl.fl6_flowlabel = 0;
+	fl.oif = ireq6->iif;
+	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+	fl.fl_ip_sport = inet_sk(sk)->sport;
+
+	if (dst == NULL) {
+		opt = np->opt;
+		if (opt == NULL &&
+		    np->rxopt.bits.osrcrt == 2 &&
+		    ireq6->pktopts) {
+			struct sk_buff *pktopts = ireq6->pktopts;
+			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
+			if (rxopt->srcrt)
+				opt = ipv6_invert_rthdr(sk,
+					(struct ipv6_rt_hdr *)(pktopts->nh.raw +
+							       rxopt->srcrt));
+		}
+
+		if (opt && opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err)
+			goto done;
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto done;
+	}
+
+	skb = dccp_make_response(sk, dst, req);
+	if (skb != NULL) {
+		struct dccp_hdr *dh = dccp_hdr(skb);
+		dh->dccph_checksum = dccp_v6_check(dh, skb->len,
+						   &ireq6->loc_addr,
+						   &ireq6->rmt_addr,
+						   csum_partial((char *)dh,
+								skb->len,
+								skb->csum));
+		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		if (err == NET_XMIT_CN)
+			err = 0;
+	}
+
+done:
+        if (opt && opt != np->opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+	return err;
+}
+
+static void dccp_v6_reqsk_destructor(struct request_sock *req)
+{
+	if (inet6_rsk(req)->pktopts != NULL)
+		kfree_skb(inet6_rsk(req)->pktopts);
+}
+
+static struct request_sock_ops dccp6_request_sock_ops = {
+	.family		= AF_INET6,
+	.obj_size	= sizeof(struct dccp6_request_sock),
+	.rtx_syn_ack	= dccp_v6_send_response,
+	.send_ack	= dccp_v6_reqsk_send_ack,
+	.destructor	= dccp_v6_reqsk_destructor,
+	.send_reset	= dccp_v6_ctl_send_reset,
+};
+
+static struct timewait_sock_ops dccp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
+};
+
+static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct dccp_hdr *dh = dccp_hdr(skb);
+
+	dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr,
+					     len, IPPROTO_DCCP, 
+					     csum_partial((char *)dh,
+							  dh->dccph_doff << 2,
+							  skb->csum));
+}
+
+static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
+{
+	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 
+	const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+				       sizeof(struct dccp_hdr_ext) +
+				       sizeof(struct dccp_hdr_reset);
+	struct sk_buff *skb;
+	struct flowi fl;
+	u64 seqno;
+
+	if (rxdh->dccph_type == DCCP_PKT_RESET)
+		return;
+
+	if (!ipv6_unicast_destination(rxskb))
+		return; 
+
+	/*
+	 * We need to grab some memory, and put together an RST,
+	 * and then put it into the queue to be sent.
+	 */
+
+	skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
+			dccp_hdr_reset_len, GFP_ATOMIC);
+	if (skb == NULL) 
+	  	return;
+
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
+		    dccp_hdr_reset_len);
+
+	skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
+	dh = dccp_hdr(skb);
+	memset(dh, 0, dccp_hdr_reset_len);
+
+	/* Swap the send and the receive. */
+	dh->dccph_type	= DCCP_PKT_RESET;
+	dh->dccph_sport	= rxdh->dccph_dport;
+	dh->dccph_dport	= rxdh->dccph_sport;
+	dh->dccph_doff	= dccp_hdr_reset_len / 4;
+	dh->dccph_x	= 1;
+	dccp_hdr_reset(skb)->dccph_reset_code =
+				DCCP_SKB_CB(rxskb)->dccpd_reset_code;
+
+	/* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
+	seqno = 0;
+	if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
+
+	dccp_hdr_set_seq(dh, seqno);
+	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+			 DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+	dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
+					     sizeof(*dh), IPPROTO_DCCP,
+					     skb->csum);
+	fl.proto = IPPROTO_DCCP;
+	fl.oif = inet6_iif(rxskb);
+	fl.fl_ip_dport = dh->dccph_dport;
+	fl.fl_ip_sport = dh->dccph_sport;
+
+	/* sk = NULL, but it is safe for now. RST socket required. */
+	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
+		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+			ip6_xmit(NULL, skb, &fl, NULL, 0);
+			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+			return;
+		}
+	}
+
+	kfree_skb(skb);
+}
+
+static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
+{
+	struct flowi fl;
+	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+	const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+				     sizeof(struct dccp_hdr_ext) +
+				     sizeof(struct dccp_hdr_ack_bits);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
+			dccp_hdr_ack_len, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
+			 dccp_hdr_ack_len);
+
+	skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
+	dh = dccp_hdr(skb);
+	memset(dh, 0, dccp_hdr_ack_len);
+
+	/* Build DCCP header and checksum it. */
+	dh->dccph_type	= DCCP_PKT_ACK;
+	dh->dccph_sport = rxdh->dccph_dport;
+	dh->dccph_dport = rxdh->dccph_sport;
+	dh->dccph_doff	= dccp_hdr_ack_len / 4;
+	dh->dccph_x	= 1;
+	
+	dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+			 DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+
+	/* FIXME: calculate checksum, IPv4 also should... */
+
+	fl.proto = IPPROTO_DCCP;
+	fl.oif = inet6_iif(rxskb);
+	fl.fl_ip_dport = dh->dccph_dport;
+	fl.fl_ip_sport = dh->dccph_sport;
+
+	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
+		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+			ip6_xmit(NULL, skb, &fl, NULL, 0);
+			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+			return;
+		}
+	}
+
+	kfree_skb(skb);
+}
+
+static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
+				   struct request_sock *req)
+{
+	dccp_v6_ctl_send_ack(skb);
+}
+
+static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+	const struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct sock *nsk;
+	struct request_sock **prev;
+	/* Find possible connection requests. */
+	struct request_sock *req = inet6_csk_search_req(sk, &prev,
+							dh->dccph_sport,
+							&iph->saddr,
+							&iph->daddr,
+							inet6_iif(skb));
+	if (req != NULL)
+		return dccp_check_req(sk, skb, req, prev);
+
+	nsk = __inet6_lookup_established(&dccp_hashinfo,
+					 &iph->saddr, dh->dccph_sport,
+					 &iph->daddr, ntohs(dh->dccph_dport),
+					 inet6_iif(skb));
+
+	if (nsk != NULL) {
+		if (nsk->sk_state != DCCP_TIME_WAIT) {
+			bh_lock_sock(nsk);
+			return nsk;
+		}
+		inet_twsk_put((struct inet_timewait_sock *)nsk);
+		return NULL;
+	}
+
+	return sk;
+}
+
+static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+	struct inet_request_sock *ireq;
+	struct dccp_sock dp;
+	struct request_sock *req;
+	struct dccp_request_sock *dreq;
+	struct inet6_request_sock *ireq6;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+ 	const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+	__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return dccp_v4_conn_request(sk, skb);
+
+	if (!ipv6_unicast_destination(skb))
+		goto drop; 
+
+	if (dccp_bad_service_code(sk, service)) {
+		reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+		goto drop;
+ 	}
+	/*
+	 *	There are no SYN attacks on IPv6, yet...	
+	 */
+	if (inet_csk_reqsk_queue_is_full(sk))
+		goto drop;		
+
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+		goto drop;
+
+	req = inet6_reqsk_alloc(sk->sk_prot->rsk_prot);
+	if (req == NULL)
+		goto drop;
+
+	/* FIXME: process options */
+
+	dccp_openreq_init(req, &dp, skb);
+
+	ireq6 = inet6_rsk(req);
+	ireq = inet_rsk(req);
+	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
+	req->rcv_wnd	= 100; /* Fake, option parsing will get the
+				  right value */
+	ireq6->pktopts	= NULL;
+
+	if (ipv6_opt_accepted(sk, skb) ||
+	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+		atomic_inc(&skb->users);
+		ireq6->pktopts = skb;
+	}
+	ireq6->iif = sk->sk_bound_dev_if;
+
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq6->iif = inet6_iif(skb);
+
+	/* 
+	 * Step 3: Process LISTEN state
+	 *
+	 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+	 *
+	 * In fact we defer setting S.GSR, S.SWL, S.SWH to
+	 * dccp_create_openreq_child.
+	 */
+	dreq = dccp_rsk(req);
+	dreq->dreq_isr	   = dcb->dccpd_seq;
+	dreq->dreq_iss	   = dccp_v6_init_sequence(sk, skb);
+	dreq->dreq_service = service;
+
+	if (dccp_v6_send_response(sk, req, NULL))
+		goto drop_and_free;
+
+	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+	return 0;
+
+drop_and_free:
+	reqsk_free(req);
+drop:
+	DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+	dcb->dccpd_reset_code = reset_code;
+	return -1;
+}
+
+static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
+					      struct sk_buff *skb,
+					      struct request_sock *req,
+					      struct dst_entry *dst)
+{
+	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct inet_sock *newinet;
+	struct dccp_sock *newdp;
+	struct dccp6_sock *newdp6;
+	struct sock *newsk;
+	struct ipv6_txoptions *opt;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		/*
+		 *	v6 mapped
+		 */
+
+		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
+		if (newsk == NULL) 
+			return NULL;
+
+		newdp6 = (struct dccp6_sock *)newsk;
+		newdp = dccp_sk(newsk);
+		newinet = inet_sk(newsk);
+		newinet->pinet6 = &newdp6->inet6;
+		newnp = inet6_sk(newsk);
+
+		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
+			      newinet->daddr);
+
+		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
+			      newinet->saddr);
+
+		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+
+		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
+		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
+		newnp->pktoptions  = NULL;
+		newnp->opt	   = NULL;
+		newnp->mcast_oif   = inet6_iif(skb);
+		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+
+		/*
+		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
+		 * here, dccp_create_openreq_child now does this for us, see the comment in
+		 * that function for the gory details. -acme
+		 */
+
+		/* It is tricky place. Until this moment IPv4 tcp
+		   worked with IPv6 icsk.icsk_af_ops.
+		   Sync it now.
+		 */
+		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
+
+		return newsk;
+	}
+
+	opt = np->opt;
+
+	if (sk_acceptq_is_full(sk))
+		goto out_overflow;
+
+	if (np->rxopt.bits.osrcrt == 2 &&
+	    opt == NULL && ireq6->pktopts) {
+		struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts);
+		if (rxopt->srcrt)
+			opt = ipv6_invert_rthdr(sk,
+				(struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
+						       rxopt->srcrt));
+	}
+
+	if (dst == NULL) {
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = IPPROTO_DCCP;
+		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+		if (opt && opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+		fl.fl_ip_sport = inet_sk(sk)->sport;
+
+		if (ip6_dst_lookup(sk, &dst, &fl))
+			goto out;
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto out;
+	} 
+
+	newsk = dccp_create_openreq_child(sk, req, skb);
+	if (newsk == NULL)
+		goto out;
+
+	/*
+	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
+	 * count here, dccp_create_openreq_child now does this for us, see the
+	 * comment in that function for the gory details. -acme
+	 */
+
+	ip6_dst_store(newsk, dst, NULL);
+	newsk->sk_route_caps = dst->dev->features &
+		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+
+	newdp6 = (struct dccp6_sock *)newsk;
+	newinet = inet_sk(newsk);
+	newinet->pinet6 = &newdp6->inet6;
+	newdp = dccp_sk(newsk);
+	newnp = inet6_sk(newsk);
+
+	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+	ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
+	ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
+	ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
+	newsk->sk_bound_dev_if = ireq6->iif;
+
+	/* Now IPv6 options... 
+
+	   First: no IPv4 options.
+	 */
+	newinet->opt = NULL;
+
+	/* Clone RX bits */
+	newnp->rxopt.all = np->rxopt.all;
+
+	/* Clone pktoptions received with SYN */
+	newnp->pktoptions = NULL;
+	if (ireq6->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
+		kfree_skb(ireq6->pktopts);
+		ireq6->pktopts = NULL;
+		if (newnp->pktoptions)
+			skb_set_owner_r(newnp->pktoptions, newsk);
+	}
+	newnp->opt	  = NULL;
+	newnp->mcast_oif  = inet6_iif(skb);
+	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+
+	/* Clone native IPv6 options from listening socket (if any)
+
+	   Yes, keeping reference count would be much more clever,
+	   but we make one more one thing there: reattach optmem
+	   to newsk.
+	 */
+	if (opt) {
+		newnp->opt = ipv6_dup_options(newsk, opt);
+		if (opt != np->opt)
+			sock_kfree_s(sk, opt, opt->tot_len);
+	}
+
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
+	if (newnp->opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
+
+	dccp_sync_mss(newsk, dst_mtu(dst));
+
+	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+
+	__inet6_hash(&dccp_hashinfo, newsk);
+	inet_inherit_port(&dccp_hashinfo, sk, newsk);
+
+	return newsk;
+
+out_overflow:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+out:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	if (opt && opt != np->opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+	dst_release(dst);
+	return NULL;
+}
+
+/* The socket must have it's spinlock held when we get
+ * here.
+ *
+ * We have a potential double-lock case here, so even when
+ * doing backlog processing we use the BH locking scheme.
+ * This is because we cannot sleep with the original spinlock
+ * held.
+ */
+static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *opt_skb = NULL;
+
+	/* Imagine: socket is IPv6. IPv4 packet arrives,
+	   goes to IPv4 receive handler and backlogged.
+	   From backlog it always goes here. Kerboom...
+	   Fortunately, dccp_rcv_established and rcv_established
+	   handle them correctly, but it is not case with
+	   dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
+	 */
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return dccp_v4_do_rcv(sk, skb);
+
+	if (sk_filter(sk, skb, 0))
+		goto discard;
+
+	/*
+	 *	socket locking is here for SMP purposes as backlog rcv
+	 *	is currently called with bh processing disabled.
+	 */
+
+	/* Do Stevens' IPV6_PKTOPTIONS.
+
+	   Yes, guys, it is the only place in our code, where we
+	   may make it not affecting IPv4.
+	   The rest of code is protocol independent,
+	   and I do not like idea to uglify IPv4.
+
+	   Actually, all the idea behind IPV6_PKTOPTIONS
+	   looks not very well thought. For now we latch
+	   options, received in the last packet, enqueued
+	   by tcp. Feel free to propose better solution.
+	                                       --ANK (980728)
+	 */
+	if (np->rxopt.all)
+		opt_skb = skb_clone(skb, GFP_ATOMIC);
+
+	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
+			goto reset;
+		return 0;
+	}
+
+	if (sk->sk_state == DCCP_LISTEN) { 
+		struct sock *nsk = dccp_v6_hnd_req(sk, skb);
+		if (!nsk)
+			goto discard;
+
+		/*
+		 * Queue it on the new socket if the new socket is active,
+		 * otherwise we just shortcircuit this and continue with
+		 * the new socket..
+		 */
+ 		if(nsk != sk) {
+			if (dccp_child_process(sk, nsk, skb))
+				goto reset;
+			if (opt_skb)
+				__kfree_skb(opt_skb);
+			return 0;
+		}
+	}
+
+	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
+		goto reset;
+	return 0;
+
+reset:
+	dccp_v6_ctl_send_reset(skb);
+discard:
+	if (opt_skb)
+		__kfree_skb(opt_skb);
+	kfree_skb(skb);
+	return 0;
+}
+
+static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	const struct dccp_hdr *dh;
+	struct sk_buff *skb = *pskb;
+	struct sock *sk;
+
+	/* Step 1: Check header basics: */
+
+	if (dccp_invalid_packet(skb))
+		goto discard_it;
+
+	dh = dccp_hdr(skb);
+
+	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
+	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
+
+	if (dccp_packet_without_ack(skb))
+		DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
+	else
+		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+
+	/* Step 2:
+	 * 	Look up flow ID in table and get corresponding socket */
+	sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
+			    dh->dccph_sport,
+			    &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
+			    inet6_iif(skb));
+	/* 
+	 * Step 2:
+	 * 	If no socket ...
+	 *		Generate Reset(No Connection) unless P.type == Reset
+	 *		Drop packet and return
+	 */
+	if (sk == NULL)
+		goto no_dccp_socket;
+
+	/* 
+	 * Step 2:
+	 * 	... or S.state == TIMEWAIT,
+	 *		Generate Reset(No Connection) unless P.type == Reset
+	 *		Drop packet and return
+	 */
+	       
+	if (sk->sk_state == DCCP_TIME_WAIT)
+                goto do_time_wait;
+
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
+	return sk_receive_skb(sk, skb) ? -1 : 0;
+
+no_dccp_socket:
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_it;
+	/*
+	 * Step 2:
+	 *		Generate Reset(No Connection) unless P.type == Reset
+	 *		Drop packet and return
+	 */
+	if (dh->dccph_type != DCCP_PKT_RESET) {
+		DCCP_SKB_CB(skb)->dccpd_reset_code =
+					DCCP_RESET_CODE_NO_CONNECTION;
+		dccp_v6_ctl_send_reset(skb);
+	}
+discard_it:
+
+	/*
+	 *	Discard frame
+	 */
+
+	kfree_skb(skb);
+	return 0;
+
+discard_and_relse:
+	sock_put(sk);
+	goto discard_it;
+
+do_time_wait:
+	inet_twsk_put((struct inet_timewait_sock *)sk);
+	goto no_dccp_socket;
+}
+
+static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
+	.queue_xmit	=	inet6_csk_xmit,
+	.send_check	=	dccp_v6_send_check,
+	.rebuild_header	=	inet6_sk_rebuild_header,
+	.conn_request	=	dccp_v6_conn_request,
+	.syn_recv_sock	=	dccp_v6_request_recv_sock,
+	.net_header_len	=	sizeof(struct ipv6hdr),
+	.setsockopt	=	ipv6_setsockopt,
+	.getsockopt	=	ipv6_getsockopt,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+};
+
+/*
+ *	DCCP over IPv4 via INET6 API
+ */
+static struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
+	.queue_xmit	=	ip_queue_xmit,
+	.send_check	=	dccp_v4_send_check,
+	.rebuild_header	=	inet_sk_rebuild_header,
+	.conn_request	=	dccp_v6_conn_request,
+	.syn_recv_sock	=	dccp_v6_request_recv_sock,
+	.net_header_len	=	sizeof(struct iphdr),
+	.setsockopt	=	ipv6_setsockopt,
+	.getsockopt	=	ipv6_getsockopt,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+};
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+static int dccp_v6_init_sock(struct sock *sk)
+{
+	int err = dccp_v4_init_sock(sk);
+
+	if (err == 0)
+		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
+
+	return err;
+}
+
+static int dccp_v6_destroy_sock(struct sock *sk)
+{
+	dccp_v4_destroy_sock(sk);
+	return inet6_destroy_sock(sk);
+}
+
+static struct proto dccp_v6_prot = {
+	.name			= "DCCPv6",
+	.owner			= THIS_MODULE,
+	.close			= dccp_close,
+	.connect		= dccp_v6_connect,
+	.disconnect		= dccp_disconnect,
+	.ioctl			= dccp_ioctl,
+	.init			= dccp_v6_init_sock,
+	.setsockopt		= dccp_setsockopt,
+	.getsockopt		= dccp_getsockopt,
+	.sendmsg		= dccp_sendmsg,
+	.recvmsg		= dccp_recvmsg,
+	.backlog_rcv		= dccp_v6_do_rcv,
+	.hash			= dccp_v6_hash,
+	.unhash			= dccp_unhash,
+	.accept			= inet_csk_accept,
+	.get_port		= dccp_v6_get_port,
+	.shutdown		= dccp_shutdown,
+	.destroy		= dccp_v6_destroy_sock,
+	.orphan_count		= &dccp_orphan_count,
+	.max_header		= MAX_DCCP_HEADER,
+	.obj_size		= sizeof(struct dccp6_sock),
+	.rsk_prot		= &dccp6_request_sock_ops,
+	.twsk_prot		= &dccp6_timewait_sock_ops,
+};
+
+static struct inet6_protocol dccp_v6_protocol = {
+	.handler	=	dccp_v6_rcv,
+	.err_handler	=	dccp_v6_err,
+	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+};
+
+static struct proto_ops inet6_dccp_ops = {
+	.family		= PF_INET6,
+	.owner		= THIS_MODULE,
+	.release	= inet6_release,
+	.bind		= inet6_bind,
+	.connect	= inet_stream_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= inet_accept,
+	.getname	= inet6_getname,
+	.poll		= dccp_poll,
+	.ioctl		= inet6_ioctl,
+	.listen		= inet_dccp_listen,
+	.shutdown	= inet_shutdown,
+	.setsockopt	= sock_common_setsockopt,
+	.getsockopt	= sock_common_getsockopt,
+	.sendmsg	= inet_sendmsg,
+	.recvmsg	= sock_common_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+static struct inet_protosw dccp_v6_protosw = {
+	.type		= SOCK_DCCP,
+	.protocol	= IPPROTO_DCCP,
+	.prot		= &dccp_v6_prot,
+	.ops		= &inet6_dccp_ops,
+	.capability	= -1,
+	.flags		= INET_PROTOSW_ICSK,
+};
+
+static int __init dccp_v6_init(void)
+{
+	int err = proto_register(&dccp_v6_prot, 1);
+
+	if (err != 0)
+		goto out;
+
+	err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
+	if (err != 0)
+		goto out_unregister_proto;
+
+	inet6_register_protosw(&dccp_v6_protosw);
+out:
+	return err;
+out_unregister_proto:
+	proto_unregister(&dccp_v6_prot);
+	goto out;
+}
+
+static void __exit dccp_v6_exit(void)
+{
+	inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
+	inet6_unregister_protosw(&dccp_v6_protosw);
+	proto_unregister(&dccp_v6_prot);
+}
+
+module_init(dccp_v6_init);
+module_exit(dccp_v6_exit);
+
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
new file mode 100644
index 00000000000..e4d4e930927
--- /dev/null
+++ b/net/dccp/ipv6.h
@@ -0,0 +1,37 @@
+#ifndef _DCCP_IPV6_H
+#define _DCCP_IPV6_H
+/*
+ *  net/dccp/ipv6.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/ipv6.h>
+
+struct dccp6_sock {
+	struct dccp_sock  dccp;
+	/*
+	 * ipv6_pinfo has to be the last member of dccp6_sock,
+	 * see inet6_sk_generic.
+	 */
+	struct ipv6_pinfo inet6;
+};
+
+struct dccp6_request_sock {
+	struct dccp_request_sock  dccp;
+	struct inet6_request_sock inet6;
+};
+
+struct dccp6_timewait_sock {
+	struct inet_timewait_sock   inet;
+	struct inet6_timewait_sock  tw6;
+};
+
+#endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1393461898b..29261fc198e 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -40,6 +40,8 @@ struct inet_timewait_death_row dccp_death_row = {
 					    (unsigned long)&dccp_death_row),
 };
 
+EXPORT_SYMBOL_GPL(dccp_death_row);
+
 void dccp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
@@ -50,7 +52,18 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 	if (tw != NULL) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if (tw->tw_family == PF_INET6) {
+			const struct ipv6_pinfo *np = inet6_sk(sk);
+			struct inet6_timewait_sock *tw6;
+
+			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+			tw6 = inet6_twsk((struct sock *)tw);
+			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_ipv6only = np->ipv6only;
+		}
+#endif
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
 
@@ -170,6 +183,8 @@ out_free:
 	return newsk;
 }
 
+EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
+
 /* 
  * Process an incoming packet for RESPOND sockets represented
  * as an request_sock.
@@ -214,7 +229,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 		goto drop;
 	}
 
-	child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
 	if (child == NULL)
 		goto listen_overflow;
 
@@ -236,6 +251,8 @@ drop:
 	goto out;
 }
 
+EXPORT_SYMBOL_GPL(dccp_check_req);
+
 /*
  *  Queue segment on the new socket if the new socket is active,
  *  otherwise we just shortcircuit this and continue with
@@ -266,3 +283,5 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 	sock_put(child);
 	return ret;
 }
+
+EXPORT_SYMBOL_GPL(dccp_child_process);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 74ff8702587..efd7ffb903a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+#include <net/inet_sock.h>
 #include <net/sock.h>
 
 #include "ackvec.h"
@@ -43,6 +44,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 {
 	if (likely(skb != NULL)) {
 		const struct inet_sock *inet = inet_sk(sk);
+		const struct inet_connection_sock *icsk = inet_csk(sk);
 		struct dccp_sock *dp = dccp_sk(sk);
 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 		struct dccp_hdr *dh;
@@ -108,8 +110,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
-						      inet->daddr);
+		icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
 		if (set_ack)
 			dccp_event_ack_sent(sk);
@@ -117,7 +118,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 
 		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-		err = ip_queue_xmit(skb, 0);
+		err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 		if (err <= 0)
 			return err;
 
@@ -134,20 +135,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	int mss_now;
-
-	/*
-	 * FIXME: we really should be using the af_specific thing to support
-	 * 	  IPv6.
-	 * mss_now = pmtu - tp->af_specific->net_header_len -
-	 * 	     sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
-	 */
-	mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
-		  sizeof(struct dccp_hdr_ext);
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
+		       sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
 
 	/* Now subtract optional transport overhead */
-	mss_now -= dp->dccps_ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/*
 	 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -160,12 +154,14 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 	mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
 
 	/* And store cached results */
-	dp->dccps_pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	dp->dccps_mss_cache = mss_now;
 
 	return mss_now;
 }
 
+EXPORT_SYMBOL_GPL(dccp_sync_mss);
+
 void dccp_write_space(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
@@ -266,7 +262,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 
 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 {
-	if (inet_sk_rebuild_header(sk) != 0)
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
 	return dccp_transmit_skb(sk, (skb_cloned(skb) ?
@@ -321,6 +317,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	return skb;
 }
 
+EXPORT_SYMBOL_GPL(dccp_make_response);
+
 struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
 				const enum dccp_reset_codes code)
 				   
@@ -377,6 +375,7 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
  */
 static inline void dccp_connect_init(struct sock *sk)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -385,10 +384,16 @@ static inline void dccp_connect_init(struct sock *sk)
 	
 	dccp_sync_mss(sk, dst_mtu(dst));
 
-	/*
-	 * FIXME: set dp->{dccps_swh,dccps_swl}, with
-	 * something like dccp_inc_seq
-	 */
+	dccp_update_gss(sk, dp->dccps_iss);
+ 	/*
+	 * SWL and AWL are initially adjusted so that they are not less than
+	 * the initial Sequence Numbers received and sent, respectively:
+	 *	SWL := max(GSR + 1 - floor(W/4), ISR),
+	 *	AWL := max(GSS - W' + 1, ISS).
+	 * These adjustments MUST be applied only at the beginning of the
+	 * connection.
+ 	 */
+	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
 
 	icsk->icsk_retransmits = 0;
 }
@@ -420,6 +425,8 @@ int dccp_connect(struct sock *sk)
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_connect);
+
 void dccp_send_ack(struct sock *sk)
 {
 	/* If we have been reset, we may not send again. */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8a6b2a9e458..65b11ea90d8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,7 +24,7 @@
 #include <net/checksum.h>
 
 #include <net/inet_common.h>
-#include <net/ip.h>
+#include <net/inet_sock.h>
 #include <net/protocol.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
@@ -34,15 +34,18 @@
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/poll.h>
-#include <linux/dccp.h>
 
 #include "ccid.h"
 #include "dccp.h"
 
 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
+EXPORT_SYMBOL_GPL(dccp_statistics);
+
 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
 
+EXPORT_SYMBOL_GPL(dccp_orphan_count);
+
 static struct net_protocol dccp_protocol = {
 	.handler	= dccp_v4_rcv,
 	.err_handler	= dccp_v4_err,
@@ -149,6 +152,8 @@ int dccp_disconnect(struct sock *sk, int flags)
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(dccp_disconnect);
+
 /*
  *	Wait for a DCCP event.
  *
@@ -156,8 +161,8 @@ int dccp_disconnect(struct sock *sk, int flags)
  *	take care of normal races (between the test and the event) and we don't
  *	go look at any of the socket buffers directly.
  */
-static unsigned int dccp_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
+unsigned int dccp_poll(struct file *file, struct socket *sock,
+		       poll_table *wait)
 {
 	unsigned int mask;
 	struct sock *sk = sock->sk;
@@ -205,12 +210,16 @@ static unsigned int dccp_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
+EXPORT_SYMBOL_GPL(dccp_poll);
+
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	dccp_pr_debug("entry\n");
 	return -ENOIOCTLCMD;
 }
 
+EXPORT_SYMBOL_GPL(dccp_ioctl);
+
 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
 				   char __user *optval, int optlen)
 {
@@ -254,7 +263,9 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	int val;
 
 	if (level != SOL_DCCP)
-		return ip_setsockopt(sk, level, optname, optval, optlen);
+		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
+							     optname, optval,
+							     optlen);
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -282,6 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(dccp_setsockopt);
+
 static int dccp_getsockopt_service(struct sock *sk, int len,
 				   u32 __user *optval,
 				   int __user *optlen)
@@ -320,8 +333,9 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	int val, len;
 
 	if (level != SOL_DCCP)
-		return ip_getsockopt(sk, level, optname, optval, optlen);
-
+		return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
+							     optname, optval,
+							     optlen);
 	if (get_user(len, optlen))
 		return -EFAULT;
 
@@ -354,6 +368,8 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_getsockopt);
+
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len)
 {
@@ -410,6 +426,8 @@ out_discard:
 	goto out_release;
 }
 
+EXPORT_SYMBOL_GPL(dccp_sendmsg);
+
 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len, int nonblock, int flags, int *addr_len)
 {
@@ -507,7 +525,9 @@ out:
 	return len;
 }
 
-static int inet_dccp_listen(struct socket *sock, int backlog)
+EXPORT_SYMBOL_GPL(dccp_recvmsg);
+
+int inet_dccp_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
 	unsigned char old_state;
@@ -543,6 +563,8 @@ out:
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(inet_dccp_listen);
+
 static const unsigned char dccp_new_state[] = {
 	/* current state:   new state:      action:	*/
 	[0]		  = DCCP_CLOSED,
@@ -648,12 +670,16 @@ adjudge_to_death:
 	sock_put(sk);
 }
 
+EXPORT_SYMBOL_GPL(dccp_close);
+
 void dccp_shutdown(struct sock *sk, int how)
 {
 	dccp_pr_debug("entry\n");
 }
 
-static struct proto_ops inet_dccp_ops = {
+EXPORT_SYMBOL_GPL(dccp_shutdown);
+
+static const struct proto_ops inet_dccp_ops = {
 	.family		= PF_INET,
 	.owner		= THIS_MODULE,
 	.release	= inet_release,
@@ -681,11 +707,11 @@ extern struct net_proto_family inet_family_ops;
 static struct inet_protosw dccp_v4_protosw = {
 	.type		= SOCK_DCCP,
 	.protocol	= IPPROTO_DCCP,
-	.prot		= &dccp_v4_prot,
+	.prot		= &dccp_prot,
 	.ops		= &inet_dccp_ops,
 	.capability	= -1,
 	.no_check	= 0,
-	.flags		= 0,
+	.flags		= INET_PROTOSW_ICSK,
 };
 
 /*
@@ -760,13 +786,15 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
 int dccp_debug;
 module_param(dccp_debug, int, 0444);
 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
+
+EXPORT_SYMBOL_GPL(dccp_debug);
 #endif
 
 static int __init dccp_init(void)
 {
 	unsigned long goal;
 	int ehash_order, bhash_order, i;
-	int rc = proto_register(&dccp_v4_prot, 1);
+	int rc = proto_register(&dccp_prot, 1);
 
 	if (rc)
 		goto out;
@@ -869,7 +897,7 @@ out_free_bind_bucket_cachep:
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
 	dccp_hashinfo.bind_bucket_cachep = NULL;
 out_proto_unregister:
-	proto_unregister(&dccp_v4_prot);
+	proto_unregister(&dccp_prot);
 	goto out;
 }
 
@@ -892,7 +920,7 @@ static void __exit dccp_fini(void)
 		   get_order(dccp_hashinfo.ehash_size *
 			     sizeof(struct inet_ehash_bucket)));
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
-	proto_unregister(&dccp_v4_prot);
+	proto_unregister(&dccp_prot);
 }
 
 module_init(dccp_init);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d402e9020c6..78ec5344be8 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -149,7 +149,7 @@ static void dn_keepalive(struct sock *sk);
 #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
 
 
-static struct proto_ops dn_proto_ops;
+static const struct proto_ops dn_proto_ops;
 static DEFINE_RWLOCK(dn_hash_lock);
 static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
@@ -1252,7 +1252,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		break;
 
 	default:
-		err = dev_ioctl(cmd, (void __user *)arg);
+		err = -ENOIOCTLCMD;
 		break;
 	}
 
@@ -2342,7 +2342,7 @@ static struct net_proto_family	dn_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops dn_proto_ops = {
+static const struct proto_ops dn_proto_ops = {
 	.family =	AF_DECnet,
 	.owner =	THIS_MODULE,
 	.release =	dn_release,
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 8d0cc3cf3e4..33ab256cfd4 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -408,11 +408,14 @@ int dn_neigh_router_hello(struct sk_buff *skb)
 			}
 		}
 
-		if (!dn_db->router) {
-			dn_db->router = neigh_clone(neigh);
-		} else {
-			if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
-				neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
+		/* Only use routers in our area */
+		if ((dn_ntohs(src)>>10) == dn_ntohs((decnet_address)>>10)) {
+			if (!dn_db->router) {
+				dn_db->router = neigh_clone(neigh);
+			} else {
+				if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+					neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
+			}
 		}
 		write_unlock(&neigh->lock);
 		neigh_release(neigh);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 369f25b60f3..44bda85e678 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -793,7 +793,6 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
 got_it:
 	if (sk != NULL) {
 		struct dn_scp *scp = DN_SK(sk);
-		int ret;
 
 		/* Reset backoff */
 		scp->nsp_rxtshift = 0;
@@ -807,21 +806,7 @@ got_it:
 				goto free_out;
 		}
 
-		bh_lock_sock(sk);
-		ret = NET_RX_SUCCESS;
-		if (decnet_debug_level & 8)
-			printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n",
-				(int)cb->rt_flags, (int)cb->nsp_flags, 
-				(int)cb->src_port, (int)cb->dst_port, 
-				!!sock_owned_by_user(sk));
-		if (!sock_owned_by_user(sk))
-			ret = dn_nsp_backlog_rcv(sk, skb);
-		else
-			sk_add_backlog(sk, skb);
-		bh_unlock_sock(sk);
-		sock_put(sk);
-
-		return ret;
+		return sk_receive_skb(sk, skb);
 	}
 
 	return dn_nsp_no_socket(skb, reason);
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 34fdac51df9..c792994d795 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
 #include <linux/if_arp.h>
 #include <linux/wireless.h>
 #include <linux/skbuff.h>
+#include <linux/udp.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <linux/stat.h>
@@ -45,7 +46,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-static struct proto_ops econet_ops;
+static const struct proto_ops econet_ops;
 static struct hlist_head econet_sklist;
 static DEFINE_RWLOCK(econet_lock);
 
@@ -56,7 +57,7 @@ static struct net_device *net2dev_map[256];
 #define EC_PORT_IP	0xd2
 
 #ifdef CONFIG_ECONET_AUNUDP
-static spinlock_t aun_queue_lock;
+static DEFINE_SPINLOCK(aun_queue_lock);
 static struct socket *udpsock;
 #define AUN_PORT	0x8000
 
@@ -686,7 +687,7 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
 			break;
 
 		default:
-			return dev_ioctl(cmd, argp);
+			return -ENOIOCTLCMD;
 	}
 	/*NOTREACHED*/
 	return 0;
@@ -698,7 +699,7 @@ static struct net_proto_family econet_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
 	.family =	PF_ECONET,
 	.owner =	THIS_MODULE,
 	.release =	econet_release,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index e2457736727..9890fd97e53 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -53,6 +53,7 @@
 #include <linux/errno.h>
 #include <linux/config.h>
 #include <linux/init.h>
+#include <linux/if_ether.h>
 #include <net/dst.h>
 #include <net/arp.h>
 #include <net/sock.h>
@@ -162,7 +163,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	skb_pull(skb,ETH_HLEN);
 	eth = eth_hdr(skb);
 	
-	if (*eth->h_dest&1) {
+	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
 			skb->pkt_type = PACKET_BROADCAST;
 		else
@@ -251,7 +252,7 @@ static int eth_mac_addr(struct net_device *dev, void *p)
 
 static int eth_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if ((new_mtu < 68) || (new_mtu > 1500))
+	if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
 		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
@@ -272,7 +273,7 @@ void ether_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_ETHER;
 	dev->hard_header_len 	= ETH_HLEN;
-	dev->mtu		= 1500; /* eth_mtu */
+	dev->mtu		= ETH_DATA_LEN;
 	dev->addr_len		= ETH_ALEN;
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 03efaacbdb7..4cc6f41c693 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -410,9 +410,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		return 1;
 	}
 
-	if ((is_multicast_ether_addr(hdr->addr1) ||
-	     is_broadcast_ether_addr(hdr->addr2)) ? ieee->host_mc_decrypt :
-	    ieee->host_decrypt) {
+	if (is_multicast_ether_addr(hdr->addr1)
+	    ? ieee->host_mc_decrypt : ieee->host_decrypt) {
 		int idx = 0;
 		if (skb->len >= hdrlen + 3)
 			idx = skb->data[hdrlen + 3] >> 6;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e55136ae09f..011cca7ae02 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -456,6 +456,14 @@ config TCP_CONG_BIC
 	increase provides TCP friendliness.
 	See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
 
+config TCP_CONG_CUBIC
+	tristate "CUBIC TCP"
+	default m
+	---help---
+	This is version 2.0 of BIC-TCP which uses a cubic growth function
+	among other techniques.
+	See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+
 config TCP_CONG_WESTWOOD
 	tristate "TCP Westwood+"
 	default m
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f0435d00db6..c54edd76de0 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
 obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
 obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d368cf24900..966a071a408 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -93,6 +93,7 @@
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -302,6 +303,7 @@ lookup_protocol:
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
@@ -775,16 +777,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			err = devinet_ioctl(cmd, (void __user *)arg);
 			break;
 		default:
-			if (!sk->sk_prot->ioctl ||
-			    (err = sk->sk_prot->ioctl(sk, cmd, arg)) ==
-			    					-ENOIOCTLCMD)
-				err = dev_ioctl(cmd, (void __user *)arg);
+			if (sk->sk_prot->ioctl)
+				err = sk->sk_prot->ioctl(sk, cmd, arg);
+			else
+				err = -ENOIOCTLCMD;
 			break;
 	}
 	return err;
 }
 
-struct proto_ops inet_stream_ops = {
+const struct proto_ops inet_stream_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -805,7 +807,7 @@ struct proto_ops inet_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet_dgram_ops = {
+const struct proto_ops inet_dgram_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -830,7 +832,7 @@ struct proto_ops inet_dgram_ops = {
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
  * udp_poll
  */
-static struct proto_ops inet_sockraw_ops = {
+static const struct proto_ops inet_sockraw_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -869,7 +871,8 @@ static struct inet_protosw inetsw_array[] =
                 .ops =        &inet_stream_ops,
                 .capability = -1,
                 .no_check =   0,
-                .flags =      INET_PROTOSW_PERMANENT,
+                .flags =      INET_PROTOSW_PERMANENT |
+			      INET_PROTOSW_ICSK,
         },
 
         {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1b..aed537fa2c8 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <asm/scatterlist.h>
 
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d..37432088fe6 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -86,6 +86,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a..7b9bb28e2ee 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -58,6 +58,7 @@
 #endif
 #include <linux/kmod.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b..73bfcae8af9 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <net/udp.h>
 
 /* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 19b1b984d68..18f5e509281 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -30,6 +30,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb16..e2890ec8159 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc151..0dd4d06e456 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e..ef4724de735 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
 #include <linux/netlink.h>
 #include <linux/init.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 705e3ce86df..e320b32373e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -41,6 +41,13 @@
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
+ *
+ * Substantial contributions to this work comes from:
+ *
+ *		David S. Miller, <davem@davemloft.net>
+ *		Stephen Hemminger <shemminger@osdl.org>
+ *		Paul E. McKenney <paulmck@us.ibm.com>
+ *		Patrick McHardy <kaber@trash.net>
  */
 
 #define VERSION "0.404"
@@ -59,6 +66,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d..be5a519cd2f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4a195c724f0..34758118c10 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/times.h>
+
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3fe021f1a56..ae20281d8de 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
  */
 int sysctl_local_port_range[2] = { 1024, 4999 };
 
-static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
+int inet_csk_bind_conflict(const struct sock *sk,
+			   const struct inet_bind_bucket *tb)
 {
 	const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
 	struct sock *sk2;
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke
 	return node != NULL;
 }
 
+EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
+
 /* Obtain a reference to a local port for the given sock,
  * if snum is zero it means select any available local port.
  */
 int inet_csk_get_port(struct inet_hashinfo *hashinfo,
-		      struct sock *sk, unsigned short snum)
+		      struct sock *sk, unsigned short snum,
+		      int (*bind_conflict)(const struct sock *sk,
+					   const struct inet_bind_bucket *tb))
 {
 	struct inet_bind_hashbucket *head;
 	struct hlist_node *node;
@@ -125,7 +130,7 @@ tb_found:
 			goto success;
 		} else {
 			ret = 1;
-			if (inet_csk_bind_conflict(sk, tb))
+			if (bind_conflict(sk, tb))
 				goto fail_unlock;
 		}
 	}
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 EXPORT_SYMBOL_GPL(inet_csk_search_req);
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
-				   const unsigned timeout)
+				   unsigned long timeout)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk)
 }
 
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
+
+void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+	const struct inet_sock *inet = inet_sk(sk);
+
+	sin->sin_family		= AF_INET;
+	sin->sin_addr.s_addr	= inet->daddr;
+	sin->sin_port		= inet->dport;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cf..c4990819204 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -112,12 +112,12 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
 		r->idiag_inode = 0;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 		if (r->idiag_family == AF_INET6) {
-			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
+			const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 
 			ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-				       &tcp6tw->tw_v6_rcv_saddr);
+				       &tw6->tw_v6_rcv_saddr);
 			ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-				       &tcp6tw->tw_v6_daddr);
+				       &tw6->tw_v6_daddr);
 		}
 #endif
 		nlh->nlmsg_len = skb->tail - b;
@@ -489,9 +489,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &tcp6_rsk(req)->loc_addr);
+			       &inet6_rsk(req)->loc_addr);
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &tcp6_rsk(req)->rmt_addr);
+			       &inet6_rsk(req)->rmt_addr);
 	}
 #endif
 	nlh->nlmsg_len = skb->tail - b;
@@ -553,13 +553,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 				entry.saddr =
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->loc_addr.s6_addr32 :
+					inet6_rsk(req)->loc_addr.s6_addr32 :
 #endif
 					&ireq->loc_addr;
 				entry.daddr = 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->rmt_addr.s6_addr32 :
+					inet6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
 					&ireq->rmt_addr;
 				entry.dport = ntohs(ireq->rmt_port);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d..33228115cda 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#include <net/ip.h>
 
 /*
  * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
 }
 
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+
+/* called with local bh disabled */
+static int __inet_check_established(struct inet_timewait_death_row *death_row,
+				    struct sock *sk, __u16 lport,
+				    struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	struct inet_sock *inet = inet_sk(sk);
+	u32 daddr = inet->rcv_saddr;
+	u32 saddr = inet->daddr;
+	int dif = sk->sk_bound_dev_if;
+	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		tw = inet_twsk(sk2);
+
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity. */
+	inet->num = lport;
+	inet->sport = htons(lport);
+	sk->sk_hash = hash;
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, 
+					  inet->dport);
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+int inet_hash_connect(struct inet_timewait_death_row *death_row,
+		      struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + inet_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet_check_established(death_row,
+								      sk, port,
+								      &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet_hash(hinfo, sk, 0);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);;
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb  = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+		__inet_hash(hinfo, sk, 0);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a6881..417f126c749 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
-							 SLAB_ATOMIC);
+	struct inet_timewait_sock *tw =
+		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+				 SLAB_ATOMIC);
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924..ce5fe3f74a3 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
 		return NULL;
 	n->v4daddr = daddr;
 	atomic_set(&n->refcnt, 1);
+	atomic_set(&n->rid, 0);
 	n->ip_id_count = secure_ip_id(daddr);
 	n->tcp_ts_stamp = 0;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48..2a8adda15e1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
  *		Patrick McHardy :	LRU queue of frag heads for evictor.
  */
 
+#include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -38,6 +39,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/inetpeer.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -56,6 +58,8 @@
 int sysctl_ipfrag_high_thresh = 256*1024;
 int sysctl_ipfrag_low_thresh = 192*1024;
 
+int sysctl_ipfrag_max_dist = 64;
+
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
@@ -89,8 +93,10 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	int		iif;
 	struct timeval	stamp;
+	int             iif;
+	unsigned int    rid;
+	struct inet_peer *peer;
 };
 
 /* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
 	BUG_TRAP(qp->last_in&COMPLETE);
 	BUG_TRAP(del_timer(&qp->timer) == 0);
 
+	if (qp->peer)
+		inet_putpeer(qp->peer);
+
 	/* Release all fragment data. */
 	fp = qp->fragments;
 	while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
 	qp->meat = 0;
 	qp->fragments = NULL;
 	qp->iif = 0;
+	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
 	init_timer(&qp->timer);
@@ -373,7 +383,7 @@ out_nomem:
  */
 static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 {
-	__u16 id = iph->id;
+	__be16 id = iph->id;
 	__u32 saddr = iph->saddr;
 	__u32 daddr = iph->daddr;
 	__u8 protocol = iph->protocol;
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	return ip_frag_create(hash, iph, user);
 }
 
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+	struct inet_peer *peer = qp->peer;
+	unsigned int max = sysctl_ipfrag_max_dist;
+	unsigned int start, end;
+
+	int rc;
+
+	if (!peer || !max)
+		return 0;
+
+	start = qp->rid;
+	end = atomic_inc_return(&peer->rid);
+	qp->rid = end;
+
+	rc = qp->fragments && (end - start) > max;
+
+	if (rc) {
+		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	}
+
+	return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+	struct sk_buff *fp;
+
+	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+		atomic_inc(&qp->refcnt);
+		return -ETIMEDOUT;
+	}
+
+	fp = qp->fragments;
+	do {
+		struct sk_buff *xp = fp->next;
+		frag_kfree_skb(fp, NULL);
+		fp = xp;
+	} while (fp);
+
+	qp->last_in = 0;
+	qp->len = 0;
+	qp->meat = 0;
+	qp->fragments = NULL;
+	qp->iif = 0;
+
+	return 0;
+}
+
 /* Add new segment to existing queue. */
 static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (qp->last_in & COMPLETE)
 		goto err;
 
+	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+	    unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+		ipq_kill(qp);
+		goto err;
+	}
+
  	offset = ntohs(skb->nh.iph->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 46f9d9cf7a5..912c42f57c7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -28,6 +28,7 @@
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/ip.h>
@@ -1140,7 +1141,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_IPGRE;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
-	dev->mtu		= 1500 - sizeof(struct iphdr) - 4;
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -1152,7 +1153,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
 	int hlen = LL_MAX_HEADER;
-	int mtu = 1500;
+	int mtu = ETH_DATA_LEN;
 	int addend = sizeof(struct iphdr) + 4;
 
 	tunnel = (struct ip_tunnel*)dev->priv;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0..e45846ae570 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
 #include <linux/sockios.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b..d3f6c468faf 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -22,6 +22,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
+#include <net/route.h>
 
 /* 
  * Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd39..8b1c9bd0091 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -202,13 +202,11 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 
 static inline int ip_finish_output(struct sk_buff *skb)
 {
-	struct net_device *dev = skb->dst->dev;
-
-	skb->dev = dev;
-	skb->protocol = htons(ETH_P_IP);
-
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
-		       ip_finish_output2);
+	if (skb->len > dst_mtu(skb->dst) &&
+	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+		return ip_fragment(skb, ip_finish_output2);
+	else
+		return ip_finish_output2(skb);
 }
 
 int ip_mc_output(struct sk_buff *skb)
@@ -265,21 +263,21 @@ int ip_mc_output(struct sk_buff *skb)
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	if (skb->len > dst_mtu(&rt->u.dst))
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+		       ip_finish_output);
 }
 
 int ip_output(struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dst->dev;
+
 	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 
-	if (skb->len > dst_mtu(skb->dst) &&
-		!(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_IP);
+
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		       ip_finish_output);
 }
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -420,7 +418,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	struct sk_buff *skb2;
 	unsigned int mtu, hlen, left, len, ll_rs;
 	int offset;
-	int not_last_frag;
+	__be16 not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	int err = 0;
 
@@ -445,6 +443,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
 	 * some transformers could create wrong frag_list or break existing
@@ -1181,7 +1180,7 @@ int ip_push_pending_frames(struct sock *sk)
 	struct ip_options *opt = NULL;
 	struct rtable *rt = inet->cork.rt;
 	struct iphdr *iph;
-	int df = 0;
+	__be16 df = 0;
 	__u8 ttl;
 	int err = 0;
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d8725730..6986e11d65c 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,12 +25,12 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
-#include <net/tcp.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <linux/udp.h>
 #include <linux/igmp.h>
 #include <linux/netfilter.h>
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 			err = ip_options_get_from_user(&opt, optval, optlen);
 			if (err)
 				break;
-			if (sk->sk_type == SOCK_STREAM) {
-				struct tcp_sock *tp = tcp_sk(sk);
+			if (inet->is_icsk) {
+				struct inet_connection_sock *icsk = inet_csk(sk);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				if (sk->sk_family == PF_INET ||
 				    (!((1 << sk->sk_state) &
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				     inet->daddr != LOOPBACK4_IPV6)) {
 #endif
 					if (inet->opt)
-						tp->ext_header_len -= inet->opt->optlen;
+						icsk->icsk_ext_hdr_len -= inet->opt->optlen;
 					if (opt)
-						tp->ext_header_len += opt->optlen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+						icsk->icsk_ext_hdr_len += opt->optlen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				}
 #endif
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b4..d64e2ec8da7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
+#include <net/protocol.h>
 
 struct ipcomp_tfms {
 	struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d..bb3613ec448 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/if.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
@@ -58,6 +59,7 @@
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/ipconfig.h>
+#include <net/route.h>
 
 #include <asm/uaccess.h>
 #include <net/checksum.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c05c1df0bb0..35571cff81c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -108,6 +108,7 @@
 #include <linux/mroute.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/ip.h>
@@ -786,7 +787,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_TUNNEL;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c..9a5c0ce7ff3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -49,9 +49,11 @@
 #include <linux/seq_file.h>
 #include <linux/mroute.h>
 #include <linux/init.h>
+#include <linux/if_ether.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
+#include <net/route.h>
 #include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
@@ -192,7 +194,7 @@ static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 static void reg_vif_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_PIMREG;
-	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->hard_start_xmit	= reg_vif_xmit;
 	dev->get_stats		= reg_vif_get_stats;
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d7eb680101c..9b176a942ac 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 }
 
 
-#if 0000
-/*
- *	Get reference to app by name (called from user context)
- */
-struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
-{
-	struct ip_vs_app *app, *a = NULL;
-
-	down(&__ip_vs_app_mutex);
-
-	list_for_each_entry(ent, &ip_vs_app_list, a_list) {
-		if (strcmp(app->name, appname))
-			continue;
-
-		/* softirq may call ip_vs_app_get too, so the caller
-		   must disable softirq on the current CPU */
-		if (ip_vs_app_get(app))
-			a = app;
-		break;
-	}
-
-	up(&__ip_vs_app_mutex);
-
-	return a;
-}
-#endif
-
-
 /*
  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c65..87b83813cf2 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,11 @@
  *
  */
 
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>		/* for proc_net_* */
 #include <linux/seq_file.h>
@@ -219,7 +223,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
 		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
 
-	IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -254,7 +258,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
   out:
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -295,7 +299,7 @@ struct ip_vs_conn *ip_vs_conn_out_get
 
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -391,8 +395,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	cp->flags |= atomic_read(&dest->conn_flags);
 	cp->dest = dest;
 
-	IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+	IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -430,8 +435,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 	if (!dest)
 		return;
 
-	IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+	IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -571,7 +577,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	ip_vs_conn_hash(cp);
 
   expire_later:
-	IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
+	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
 		  atomic_read(&cp->refcnt)-1,
 		  atomic_read(&cp->n_control));
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1a0843cd58a..3f47ad8e1ca 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 
 	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-		  "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+		  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
 		  ip_vs_fwd_tag(cp),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -532,11 +532,8 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
 {
 	if (!((*pskb)->ipvs_property))
 		return NF_ACCEPT;
-
 	/* The packet was sent from IPVS, exit this chain */
-	(*okfn)(*pskb);
-
-	return NF_STOLEN;
+	return NF_STOP;
 }
 
 u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760..c935c5086d3 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -35,6 +35,7 @@
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip.h>
+#include <net/route.h>
 #include <net/sock.h>
 
 #include <asm/uaccess.h>
@@ -447,7 +448,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
   out:
 	read_unlock(&__ip_vs_svc_lock);
 
-	IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+	IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
 		  fwmark, ip_vs_proto_name(protocol),
 		  NIPQUAD(vaddr), ntohs(vport),
 		  svc?"hit":"not hit");
@@ -597,7 +598,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
 	 */
 	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
-			  "refcnt=%d\n",
+			  "dest->refcnt=%d\n",
 			  dest->vfwmark,
 			  NIPQUAD(dest->addr), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
@@ -804,7 +805,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 	dest = ip_vs_trash_get_dest(svc, daddr, dport);
 	if (dest != NULL) {
 		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
-			  "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+			  "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
 			  NIPQUAD(daddr), ntohs(dport),
 			  atomic_read(&dest->refcnt),
 			  dest->vfwmark,
@@ -949,7 +950,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 		atomic_dec(&dest->svc->refcnt);
 		kfree(dest);
 	} else {
-		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
+			  "dest->refcnt=%d\n",
 			  NIPQUAD(dest->addr), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
 		list_add(&dest->n_list, &ip_vs_dest_trash);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce9..9fee19c4c61 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa..c453e1e57f4 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,8 +13,12 @@
  * Changes:
  *
  */
+#include <linux/config.h>
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/interrupt.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 561cda326fa..6e5cb92a5c8 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
  * me to write this module.
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
 }
 
 
-#if 0000
-/*
- *	Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
- *	returns bool success.
- */
-static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
-			     struct ip_vs_lblc_entry *en)
-{
-	if (list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	/*
-	 * Remove it from the table
-	 */
-	write_lock(&tbl->lock);
-	list_del(&en->list);
-	INIT_LIST_HEAD(&en->list);
-	write_unlock(&tbl->lock);
-
-	return 1;
-}
-#endif
-
-
 /*
  *  Get ip_vs_lblc_entry associated with supplied parameters.
  */
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index ce456dbf09a..32ba37ba72d 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 }
 
 
-#if 0000
-/*
- *	Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
- *	returns bool success.
- */
-static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
-			     struct ip_vs_lblcr_entry *en)
-{
-	if (list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	/*
-	 * Remove it from the table
-	 */
-	write_lock(&tbl->lock);
-	list_del(&en->list);
-	INIT_LIST_HEAD(&en->list);
-	write_unlock(&tbl->lock);
-
-	return 1;
-}
-#endif
-
-
 /*
  *  Get ip_vs_lblcr_entry associated with supplied parameters.
  */
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd..8b0505b0931 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8..c36ccf057a1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 0e878fd6215..bc28b1160a3 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_LAST]		=	2*HZ,
 };
 
-
-#if 0
-
-/* FIXME: This is going to die */
-
-static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
-	[IP_VS_TCP_S_NONE]		=	2*HZ,
-	[IP_VS_TCP_S_ESTABLISHED]	=	8*60*HZ,
-	[IP_VS_TCP_S_SYN_SENT]		=	60*HZ,
-	[IP_VS_TCP_S_SYN_RECV]		=	10*HZ,
-	[IP_VS_TCP_S_FIN_WAIT]		=	60*HZ,
-	[IP_VS_TCP_S_TIME_WAIT]		=	60*HZ,
-	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
-	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
-	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
-	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
-	[IP_VS_TCP_S_SYNACK]		=	100*HZ,
-	[IP_VS_TCP_S_LAST]		=	2*HZ,
-};
-
-#endif
-
 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	"NONE",
 	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		struct ip_vs_dest *dest = cp->dest;
 
 		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
-			  "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
+			  "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
 			  pp->name,
 			  (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
 			  th->syn? 'S' : '.',
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aef..89d9175d8f2 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/kernel.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 0f7c56a225b..8bc42b76223 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
+#include <linux/interrupt.h>
 #include <asm/string.h>
 #include <linux/kmod.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a3..7775e6cc68b 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d806..1bca714bda3 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/inetdevice.h>
 #include <linux/net.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049c..52c12e9edbb 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct net_device *tdev;		/* Device to other host */
 	struct iphdr  *old_iph = skb->nh.iph;
 	u8     tos = old_iph->tos;
-	u16    df = old_iph->frag_off;
+	__be16 df = old_iph->frag_off;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba..bba15630469 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -68,19 +68,14 @@ struct arpt_table_info {
 	unsigned int initial_entries;
 	unsigned int hook_entry[NF_ARP_NUMHOOKS];
 	unsigned int underflow[NF_ARP_NUMHOOKS];
-	char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
+	void *entries[NR_CPUS];
 };
 
 static LIST_HEAD(arpt_target);
 static LIST_HEAD(arpt_tables);
+#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
 
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      char *hdr_addr, int len)
 {
@@ -269,9 +264,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	outdev = out ? out->name : nulldevname;
 
 	read_lock_bh(&table->lock);
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private,
-			       smp_processor_id());
+	table_base = (void *)table->private->entries[smp_processor_id()];
 	e = get_entry(table_base, table->private->hook_entry[hook]);
 	back = get_entry(table_base, table->private->underflow[hook]);
 
@@ -462,7 +455,8 @@ static inline int unconditional(const struct arpt_arp *arp)
 /* Figures out from what hook each rule can be called: returns 0 if
  * there are loops.  Puts hook bitmask in comefrom.
  */
-static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
+static int mark_source_chains(struct arpt_table_info *newinfo,
+			      unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -472,7 +466,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 	for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct arpt_entry *e
-			= (struct arpt_entry *)(newinfo->entries + pos);
+			= (struct arpt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -514,13 +508,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 						goto next;
 
 					e = (struct arpt_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct arpt_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -537,7 +531,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 					newpos = pos + e->next_offset;
 				}
 				e = (struct arpt_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -689,6 +683,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
 static int translate_table(const char *name,
 			   unsigned int valid_hooks,
 			   struct arpt_table_info *newinfo,
+			   void *entry0,
 			   unsigned int size,
 			   unsigned int number,
 			   const unsigned int *hook_entries,
@@ -710,11 +705,11 @@ static int translate_table(const char *name,
 	i = 0;
 
 	/* Walk through entries, checking offsets. */
-	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				 check_entry_size_and_hooks,
 				 newinfo,
-				 newinfo->entries,
-				 newinfo->entries + size,
+				 entry0,
+				 entry0 + size,
 				 hook_entries, underflows, &i);
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -743,29 +738,26 @@ static int translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks)) {
+	if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
 		duprintf("Looping hook\n");
 		return -ELOOP;
 	}
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				 check_entry, name, size, &i);
 
 	if (ret != 0) {
-		ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				   cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
@@ -807,15 +799,42 @@ static inline int add_entry_to_counter(const struct arpt_entry *e,
 	return 0;
 }
 
+static inline int set_entry_to_counter(const struct arpt_entry *e,
+				       struct arpt_counters total[],
+				       unsigned int *i)
+{
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static void get_counters(const struct arpt_table_info *t,
 			 struct arpt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	ARPT_ENTRY_ITERATE(t->entries[curcpu],
+			   t->size,
+			   set_entry_to_counter,
+			   counters,
+			   &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		ARPT_ENTRY_ITERATE(t->entries[cpu],
 				   t->size,
 				   add_entry_to_counter,
 				   counters,
@@ -831,6 +850,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	struct arpt_entry *e;
 	struct arpt_counters *counters;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
@@ -843,13 +863,13 @@ static int copy_entries_to_user(unsigned int total_size,
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
 	get_counters(table->private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+	/* ... then copy entire thing ... */
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -859,7 +879,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 		struct arpt_entry_target *t;
 
-		e = (struct arpt_entry *)(table->private->entries + off);
+		e = (struct arpt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct arpt_entry, counters),
 				 &counters[num],
@@ -911,6 +931,47 @@ static int get_entries(const struct arpt_get_entries *entries,
 	return ret;
 }
 
+static void free_table_info(struct arpt_table_info *info)
+{
+	int cpu;
+	for_each_cpu(cpu) {
+		if (info->size <= PAGE_SIZE)
+			kfree(info->entries[cpu]);
+		else
+			vfree(info->entries[cpu]);
+	}
+	kfree(info);
+}
+
+static struct arpt_table_info *alloc_table_info(unsigned int size)
+{
+	struct arpt_table_info *newinfo;
+	int cpu;
+	
+	newinfo = kzalloc(sizeof(struct arpt_table_info), GFP_KERNEL);
+	if (!newinfo)
+		return NULL;
+
+	newinfo->size = size;
+
+	for_each_cpu(cpu) {
+		if (size <= PAGE_SIZE)
+			newinfo->entries[cpu] = kmalloc_node(size,
+							GFP_KERNEL,
+							cpu_to_node(cpu));
+		else
+			newinfo->entries[cpu] = vmalloc_node(size,
+							     cpu_to_node(cpu));
+
+		if (newinfo->entries[cpu] == NULL) {
+			free_table_info(newinfo);
+			return NULL;
+		}
+	}
+
+	return newinfo;
+}
+
 static int do_replace(void __user *user, unsigned int len)
 {
 	int ret;
@@ -918,6 +979,7 @@ static int do_replace(void __user *user, unsigned int len)
 	struct arpt_table *t;
 	struct arpt_table_info *newinfo, *oldinfo;
 	struct arpt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -930,13 +992,13 @@ static int do_replace(void __user *user, unsigned int len)
 	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
 		return -ENOMEM;
 
-	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(tmp.size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
@@ -947,10 +1009,9 @@ static int do_replace(void __user *user, unsigned int len)
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
@@ -989,8 +1050,10 @@ static int do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+
+	free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
 			 sizeof(struct arpt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
@@ -1002,11 +1065,11 @@ static int do_replace(void __user *user, unsigned int len)
 	module_put(t->me);
 	up(&arpt_mutex);
  free_newinfo_counters_untrans:
-	ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	free_table_info(newinfo);
 	return ret;
 }
 
@@ -1030,6 +1093,7 @@ static int do_add_counters(void __user *user, unsigned int len)
 	struct arpt_counters_info tmp, *paddc;
 	struct arpt_table *t;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1059,7 +1123,9 @@ static int do_add_counters(void __user *user, unsigned int len)
 	}
 
 	i = 0;
-	ARPT_ENTRY_ITERATE(t->private->entries,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = t->private->entries[smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_entry,
 			   t->private->size,
 			   add_counter_to_entry,
 			   paddc->counters,
@@ -1220,30 +1286,32 @@ int arpt_register_table(struct arpt_table *table,
 	struct arpt_table_info *newinfo;
 	static struct arpt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(repl->size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = alloc_table_info(repl->size);
 	if (!newinfo) {
 		ret = -ENOMEM;
 		return ret;
 	}
-	memcpy(newinfo->entries, repl->entries, repl->size);
+
+	/* choose the copy on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
 	duprintf("arpt_register_table: translate table gives %d\n", ret);
 	if (ret != 0) {
-		vfree(newinfo);
+		free_table_info(newinfo);
 		return ret;
 	}
 
 	ret = down_interruptible(&arpt_mutex);
 	if (ret != 0) {
-		vfree(newinfo);
+		free_table_info(newinfo);
 		return ret;
 	}
 
@@ -1272,20 +1340,23 @@ int arpt_register_table(struct arpt_table *table,
 	return ret;
 
  free_unlock:
-	vfree(newinfo);
+	free_table_info(newinfo);
 	goto unlock;
 }
 
 void arpt_unregister_table(struct arpt_table *table)
 {
+	void *loc_cpu_entry;
+
 	down(&arpt_mutex);
 	LIST_DELETE(&arpt_tables, table);
 	up(&arpt_mutex);
 
 	/* Decrease module usage counts and free resources */
-	ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+	loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
 			   cleanup_entry, NULL);
-	vfree(table->private);
+	free_table_info(table->private);
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f..84e4f79b7ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
  *
  */
 
+#include <linux/in.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
 #include <linux/moduleparam.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 
@@ -34,7 +36,7 @@ static unsigned int master_timeout = 300;
 MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
 MODULE_DESCRIPTION("Amanda connection tracking module");
 MODULE_LICENSE("GPL");
-module_param(master_timeout, int, 0600);
+module_param(master_timeout, uint, 0600);
 MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 
 static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 68b173bcda6..e627e585617 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -34,7 +34,7 @@ static int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 
 static int loose;
-module_param(loose, int, 0600);
+module_param(loose, bool, 0600);
 
 unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
 				enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index d7c40421d0d..c51a2cf71b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -36,7 +36,7 @@
 #define MAX_PORTS 8
 static unsigned short ports[MAX_PORTS];
 static int ports_c;
-static int max_dcc_channels = 8;
+static unsigned int max_dcc_channels = 8;
 static unsigned int dcc_timeout = 300;
 /* This is slow, but it's simple. --RR */
 static char *irc_buffer;
@@ -54,9 +54,9 @@ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
 MODULE_LICENSE("GPL");
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, int, 0400);
+module_param(max_dcc_channels, uint, 0400);
 MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, int, 0400);
+module_param(dcc_timeout, uint, 0400);
 MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
 
 static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -254,10 +254,6 @@ static int __init init(void)
 		printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
 		return -EBUSY;
 	}
-	if (dcc_timeout < 0) {
-		printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
-		return -EBUSY;
-	}
 
 	irc_buffer = kmalloc(65536, GFP_KERNEL);
 	if (!irc_buffer)
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 186646eb249..4e68e16a261 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
 MODULE_LICENSE("GPL");
 
 static unsigned int timeout = 3;
-module_param(timeout, int, 0600);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 static int help(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 91fe8f2e38f..c9ebbe0d2d9 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -79,6 +79,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 		      const struct ip_conntrack_tuple *tuple)
 {
 	struct nfattr *nest_parms;
+	int ret;
 	
 	nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
 	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
@@ -86,10 +87,10 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 	NFA_NEST_END(skb, nest_parms);
 
 	nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-	ctnetlink_dump_tuples_proto(skb, tuple);
+	ret = ctnetlink_dump_tuples_proto(skb, tuple);
 	NFA_NEST_END(skb, nest_parms);
 
-	return 0;
+	return ret;
 
 nfattr_failure:
 	return -1;
@@ -160,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
 		return 0;
 		
 	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
 
 	if (ct->helper->to_nfattr)
 		ct->helper->to_nfattr(skb, ct);
@@ -229,7 +230,7 @@ nfattr_failure:
 static inline int
 ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	unsigned int use = htonl(atomic_read(&ct->ct_general.use));
+	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
 	
 	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
 	return 0;
@@ -311,29 +312,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	if (events & IPCT_DESTROY) {
 		type = IPCTNL_MSG_CT_DELETE;
 		group = NFNLGRP_CONNTRACK_DESTROY;
-		goto alloc_skb;
-	}
-	if (events & (IPCT_NEW | IPCT_RELATED)) {
+	} else if (events & (IPCT_NEW | IPCT_RELATED)) {
 		type = IPCTNL_MSG_CT_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-		goto alloc_skb;
-	}
-	if (events & (IPCT_STATUS |
+	} else if (events & (IPCT_STATUS |
 		      IPCT_PROTOINFO |
 		      IPCT_HELPER |
 		      IPCT_HELPINFO |
 		      IPCT_NATINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
-		goto alloc_skb;
-	} 
+	} else 
+		return NOTIFY_DONE;
 	
-	return NOTIFY_DONE;
-
-alloc_skb:
   /* FIXME: Check if there are any listeners before, don't hurt performance */
 	
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -1037,6 +1031,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 			return err;
 	}
 
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
 	ct->helper = ip_conntrack_helper_find_get(rtuple);
 
 	add_timer(&ct->timeout);
@@ -1045,11 +1044,6 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 	if (ct->helper)
 		ip_conntrack_helper_put(ct->helper);
 
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
 	DEBUGP("conntrack with id %u inserted\n", ct->id);
 	return 0;
 
@@ -1209,7 +1203,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	unsigned int type;
 	unsigned char *b;
 	int flags = 0;
-	u16 proto;
 
 	if (events & IPEXP_NEW) {
 		type = IPCTNL_MSG_EXP_NEW;
@@ -1236,7 +1229,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 		goto nfattr_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
-	proto = exp->tuple.dst.protonum;
 	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
 	return NOTIFY_DONE;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377..57956dee60c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,7 @@
 #include <linux/ip.h>
 #include <linux/in.h>
 #include <linux/list.h>
+#include <linux/seq_file.h>
 
 static DEFINE_RWLOCK(ip_ct_gre_lock);
 #define ASSERT_READ_LOCK(x)
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5f9925db608..30fc21d6165 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -47,20 +47,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static const u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -110,17 +111,17 @@ static int icmp_packet(struct ip_conntrack *ct,
 	return NF_ACCEPT;
 }
 
-static const u_int8_t valid_new[] = { 
-	[ICMP_ECHO] = 1,
-	[ICMP_TIMESTAMP] = 1,
-	[ICMP_INFO_REQUEST] = 1,
-	[ICMP_ADDRESS] = 1 
-};
-
 /* Called when a new connection for this protocol found. */
 static int icmp_new(struct ip_conntrack *conntrack,
 		    const struct sk_buff *skb)
 {
+	static const u_int8_t valid_new[] = { 
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1 
+	};
+
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
 		/* Can't create a new ICMP `conn' with this. */
@@ -279,10 +280,6 @@ static int icmp_tuple_to_nfattr(struct sk_buff *skb,
 	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
 		&t->dst.u.icmp.code);
 
-	if (t->dst.u.icmp.type >= sizeof(valid_new) 
-	    || !valid_new[t->dst.u.icmp.type])
-		return -EINVAL;
-
 	return 0;
 
 nfattr_failure:
@@ -295,7 +292,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	if (!tb[CTA_PROTO_ICMP_TYPE-1]
 	    || !tb[CTA_PROTO_ICMP_CODE-1]
 	    || !tb[CTA_PROTO_ICMP_ID-1])
-		return -1;
+		return -EINVAL;
 
 	tuple->dst.u.icmp.type = 
 			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
@@ -304,6 +301,10 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	tuple->src.u.icmp.id =
 			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
 
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
 	return 0;
 }
 #endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c766..46becbe4fe5 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/seq_file.h>
 #include <net/checksum.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4..7ba97783e74 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
 #endif
 #include <net/checksum.h>
 #include <net/ip.h>
+#include <net/route.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -450,30 +451,6 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
 	return NF_ACCEPT;
 }
 
-static unsigned int ip_refrag(unsigned int hooknum,
-			      struct sk_buff **pskb,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      int (*okfn)(struct sk_buff *))
-{
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
-	/* We've seen it coming out the other side: confirm */
-	if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
-		return NF_DROP;
-
-	/* Local packets are never produced too large for their
-	   interface.  We degfragment them at LOCAL_OUT, however,
-	   so we have to refragment them here. */
-	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
-	    !skb_shinfo(*pskb)->tso_size) {
-		/* No hook can be after us, so this should be OK. */
-		ip_fragment(*pskb, okfn);
-		return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ip_conntrack_local(unsigned int hooknum,
 				       struct sk_buff **pskb,
 				       const struct net_device *in,
@@ -543,7 +520,7 @@ static struct nf_hook_ops ip_conntrack_helper_in_ops = {
 
 /* Refragmenter; last chance. */
 static struct nf_hook_ops ip_conntrack_out_ops = {
-	.hook		= ip_refrag,
+	.hook		= ip_confirm,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
 	.hooknum	= NF_IP_POST_ROUTING,
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9..b8daab3c64a 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe..461c833eaca 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b4..4f95d477805 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
  *
  */
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -53,6 +54,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/ip.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 30cd4e18c12..f04111f74e0 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -190,23 +190,6 @@ ip_nat_out(unsigned int hooknum,
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	/* We can hit fragment here; forwarded packets get
-	   defragmented by connection tracking coming in, then
-	   fragmented (grr) by the forward code.
-
-	   In future: If we have nfct != NULL, AND we have NAT
-	   initialized, AND there is no helper, then we can do full
-	   NAPT on the head, and IP-address-only NAT on the rest.
-
-	   I'm starting to have nightmares about fragments.  */
-
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
-
-		if (!*pskb)
-			return NF_STOLEN;
-	}
-
 	return ip_nat_fn(hooknum, pskb, in, out, okfn);
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e..2a26d167e14 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -83,11 +83,6 @@ static DECLARE_MUTEX(ipt_mutex);
    context stops packets coming through and allows user context to read
    the counters or update the rules.
 
-   To be cache friendly on SMP, we arrange them like so:
-   [ n-entries ]
-   ... cache-align padding ...
-   [ n-entries ]
-
    Hence the start of any table is given by get_table() below.  */
 
 /* The table itself */
@@ -105,20 +100,15 @@ struct ipt_table_info
 	unsigned int underflow[NF_IP_NUMHOOKS];
 
 	/* ipt_entry tables: one per CPU */
-	char entries[0] ____cacheline_aligned;
+	void *entries[NR_CPUS];
 };
 
 static LIST_HEAD(ipt_target);
 static LIST_HEAD(ipt_match);
 static LIST_HEAD(ipt_tables);
+#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
 
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
 #if 0
 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
@@ -290,8 +280,7 @@ ipt_do_table(struct sk_buff **pskb,
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private, smp_processor_id());
+	table_base = (void *)table->private->entries[smp_processor_id()];
 	e = get_entry(table_base, table->private->hook_entry[hook]);
 
 #ifdef CONFIG_NETFILTER_DEBUG
@@ -563,7 +552,8 @@ unconditional(const struct ipt_ip *ip)
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct ipt_table_info *newinfo,
+		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -572,7 +562,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ipt_entry *e
-			= (struct ipt_entry *)(newinfo->entries + pos);
+			= (struct ipt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -622,13 +612,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 						goto next;
 
 					e = (struct ipt_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct ipt_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -645,7 +635,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 					newpos = pos + e->next_offset;
 				}
 				e = (struct ipt_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -855,6 +845,7 @@ static int
 translate_table(const char *name,
 		unsigned int valid_hooks,
 		struct ipt_table_info *newinfo,
+		void *entry0,
 		unsigned int size,
 		unsigned int number,
 		const unsigned int *hook_entries,
@@ -875,11 +866,11 @@ translate_table(const char *name,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry_size_and_hooks,
 				newinfo,
-				newinfo->entries,
-				newinfo->entries + size,
+				entry0,
+				entry0 + size,
 				hook_entries, underflows, &i);
 	if (ret != 0)
 		return ret;
@@ -907,27 +898,24 @@ translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks))
+	if (!mark_source_chains(newinfo, valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry, name, size, &i);
 
 	if (ret != 0) {
-		IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				  cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
@@ -943,15 +931,12 @@ replace_table(struct ipt_table *table,
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	{
-		struct ipt_entry *table_base;
-		unsigned int i;
+		int cpu;
 
-		for_each_cpu(i) {
-			table_base =
-				(void *)newinfo->entries
-				+ TABLE_OFFSET(newinfo, i);
-
-			table_base->comefrom = 0xdead57ac;
+		for_each_cpu(cpu) {
+			struct ipt_entry *table_base = newinfo->entries[cpu];
+			if (table_base)
+				table_base->comefrom = 0xdead57ac;
 		}
 	}
 #endif
@@ -986,16 +971,44 @@ add_entry_to_counter(const struct ipt_entry *e,
 	return 0;
 }
 
+static inline int
+set_entry_to_counter(const struct ipt_entry *e,
+		     struct ipt_counters total[],
+		     unsigned int *i)
+{
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static void
 get_counters(const struct ipt_table_info *t,
 	     struct ipt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	IPT_ENTRY_ITERATE(t->entries[curcpu],
+			  t->size,
+			  set_entry_to_counter,
+			  counters,
+			  &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		IPT_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
@@ -1012,24 +1025,29 @@ copy_entries_to_user(unsigned int total_size,
 	struct ipt_entry *e;
 	struct ipt_counters *counters;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct ipt_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
 	get_counters(table->private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+	/* ... then copy entire thing ... */
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -1041,7 +1059,7 @@ copy_entries_to_user(unsigned int total_size,
 		struct ipt_entry_match *m;
 		struct ipt_entry_target *t;
 
-		e = (struct ipt_entry *)(table->private->entries + off);
+		e = (struct ipt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct ipt_entry, counters),
 				 &counters[num],
@@ -1110,6 +1128,45 @@ get_entries(const struct ipt_get_entries *entries,
 	return ret;
 }
 
+static void free_table_info(struct ipt_table_info *info)
+{
+	int cpu;
+	for_each_cpu(cpu) {
+		if (info->size <= PAGE_SIZE)
+			kfree(info->entries[cpu]);
+		else
+			vfree(info->entries[cpu]);
+	}
+	kfree(info);
+}
+
+static struct ipt_table_info *alloc_table_info(unsigned int size)
+{
+	struct ipt_table_info *newinfo;
+	int cpu;
+
+	newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
+	if (!newinfo)
+		return NULL;
+
+	newinfo->size = size;
+
+	for_each_cpu(cpu) {
+		if (size <= PAGE_SIZE)
+			newinfo->entries[cpu] = kmalloc_node(size,
+				GFP_KERNEL,
+				cpu_to_node(cpu));
+		else
+			newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
+		if (newinfo->entries[cpu] == 0) {
+			free_table_info(newinfo);
+			return NULL;
+		}
+	}
+
+	return newinfo;
+}
+
 static int
 do_replace(void __user *user, unsigned int len)
 {
@@ -1118,6 +1175,7 @@ do_replace(void __user *user, unsigned int len)
 	struct ipt_table *t;
 	struct ipt_table_info *newinfo, *oldinfo;
 	struct ipt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1130,13 +1188,13 @@ do_replace(void __user *user, unsigned int len)
 	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
 		return -ENOMEM;
 
-	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(tmp.size) * 
-			  	(highest_possible_processor_id()+1));
+	newinfo = alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
@@ -1147,10 +1205,9 @@ do_replace(void __user *user, unsigned int len)
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
@@ -1189,8 +1246,9 @@ do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
 			 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
@@ -1202,11 +1260,11 @@ do_replace(void __user *user, unsigned int len)
 	module_put(t->me);
 	up(&ipt_mutex);
  free_newinfo_counters_untrans:
-	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	free_table_info(newinfo);
 	return ret;
 }
 
@@ -1239,6 +1297,7 @@ do_add_counters(void __user *user, unsigned int len)
 	struct ipt_counters_info tmp, *paddc;
 	struct ipt_table *t;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1246,7 +1305,7 @@ do_add_counters(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = vmalloc_node(len, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
@@ -1268,7 +1327,9 @@ do_add_counters(void __user *user, unsigned int len)
 	}
 
 	i = 0;
-	IPT_ENTRY_ITERATE(t->private->entries,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_entry,
 			  t->private->size,
 			  add_counter_to_entry,
 			  paddc->counters,
@@ -1460,28 +1521,31 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
 	struct ipt_table_info *newinfo;
 	static struct ipt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(repl->size) * 
-			  		(highest_possible_processor_id()+1));
+	newinfo = alloc_table_info(repl->size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	memcpy(newinfo->entries, repl->entries, repl->size);
+	/* choose the copy on our node/cpu
+	 * but dont care of preemption
+	 */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
 	if (ret != 0) {
-		vfree(newinfo);
+		free_table_info(newinfo);
 		return ret;
 	}
 
 	ret = down_interruptible(&ipt_mutex);
 	if (ret != 0) {
-		vfree(newinfo);
+		free_table_info(newinfo);
 		return ret;
 	}
 
@@ -1510,20 +1574,23 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
 	return ret;
 
  free_unlock:
-	vfree(newinfo);
+	free_table_info(newinfo);
 	goto unlock;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
 {
+	void *loc_cpu_entry;
+
 	down(&ipt_mutex);
 	LIST_DELETE(&ipt_tables, table);
 	up(&ipt_mutex);
 
 	/* Decrease module usage counts and free resources */
-	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+	loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
 			  cleanup_entry, NULL);
-	vfree(table->private);
+	free_table_info(table->private);
 }
 
 /* Returns 1 if the port is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe..27860510ca6 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/types.h>
+#include <linux/inetdevice.h>
 #include <linux/ip.h>
 #include <linux/timer.h>
 #include <linux/module.h>
@@ -18,6 +19,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/checksum.h>
+#include <net/route.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f057025a719..6693526ae12 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -203,7 +203,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 						sizeof(struct tcphdr), 0));
 
 	/* Adjust IP TTL, DF */
-	nskb->nh.iph->ttl = MAXTTL;
+	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
 	/* Set DF, id = 0 */
 	nskb->nh.iph->frag_off = htons(IP_DF);
 	nskb->nh.iph->id = 0;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2883ccd8a91..38641cd0612 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -77,15 +77,15 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
 
 static unsigned int nlbufsiz = 4096;
-module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
+module_param(nlbufsiz, uint, 0400);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
 
 static unsigned int flushtimeout = 10;
-module_param(flushtimeout, int, 0600);
+module_param(flushtimeout, uint, 0600);
 MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
 
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
+static int nflog = 1;
+module_param(nflog, bool, 0400);
 MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
 
 /* global data structures */
@@ -376,7 +376,7 @@ static int __init init(void)
 
 	DEBUGP("ipt_ULOG: init module\n");
 
-	if (nlbufsiz >= 128*1024) {
+	if (nlbufsiz > 128*1024) {
 		printk("Netlink buffer has to be <= 128kB\n");
 		return -EINVAL;
 	}
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index bf14e1c7798..aef649e393a 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
+#include <linux/interrupt.h>
 #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
index 1a53924041f..03f554857a4 100644
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ b/net/ipv4/netfilter/ipt_physdev.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_ipv4/ipt_physdev.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 261cbb4d4c4..5ddccb18c65 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -24,10 +24,10 @@
 #define HASH_LOG 9
 
 /* Defaults, these can be overridden on the module command-line. */
-static int ip_list_tot = 100;
-static int ip_pkt_list_tot = 20;
-static int ip_list_hash_size = 0;
-static int ip_list_perms = 0644;
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
 #ifdef DEBUG
 static int debug = 1;
 #endif
@@ -38,13 +38,13 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>.  htt
 MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
 MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
 MODULE_LICENSE("GPL");
-module_param(ip_list_tot, int, 0400);
-module_param(ip_pkt_list_tot, int, 0400);
-module_param(ip_list_hash_size, int, 0400);
-module_param(ip_list_perms, int, 0400);
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
 #ifdef DEBUG
-module_param(debug, int, 0600);
-MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
+module_param(debug, bool, 0600);
+MODULE_PARM_DESC(debug,"enable debugging output");
 #endif
 MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afa..9bdbb779397 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
+#include <net/route.h>
 #include <net/ip.h>
 
 #include <linux/netfilter_ipv4.h>
@@ -180,30 +181,6 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_refrag(unsigned int hooknum,
-				struct sk_buff **pskb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
-{
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
-	/* We've seen it coming out the other side: confirm */
-	if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
-		return NF_DROP;
-
-	/* Local packets are never produced too large for their
-	   interface.  We degfragment them at LOCAL_OUT, however,
-	   so we have to refragment them here. */
-	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
-	    !skb_shinfo(*pskb)->tso_size) {
-		/* No hook can be after us, so this should be OK. */
-		ip_fragment(*pskb, okfn);
-		return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
 				      struct sk_buff **pskb,
 				      const struct net_device *in,
@@ -283,7 +260,7 @@ static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
 
 /* Refragmenter; last chance. */
 static struct nf_hook_ops ipv4_conntrack_out_ops = {
-	.hook		= ipv4_refrag,
+	.hook		= ipv4_confirm,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
 	.hooknum	= NF_IP_POST_ROUTING,
@@ -392,6 +369,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	return -ENOENT;
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
+		&tuple->src.u3.ip);
+	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
+		&tuple->dst.u3.ip);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
+	[CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+};
+
+static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	t->src.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+	t->dst.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+
+	return 0;
+}
+#endif
+
 static struct nf_sockopt_ops so_getorigdst = {
 	.pf		= PF_INET,
 	.get_optmin	= SO_ORIGINAL_DST,
@@ -408,6 +427,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.print_conntrack = ipv4_print_conntrack,
 	.prepare	 = ipv4_prepare,
 	.get_features	 = ipv4_get_features,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr = ipv4_tuple_to_nfattr,
+	.nfattr_to_tuple = ipv4_nfattr_to_tuple,
+#endif
 	.me		 = THIS_MODULE,
 };
 
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b..52dc175be39 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 			     const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
 static int icmp_new(struct nf_conn *conntrack,
 		    const struct sk_buff *skb, unsigned int dataoff)
 {
-	static u_int8_t valid_new[]
-		= { [ICMP_ECHO] = 1,
-		    [ICMP_TIMESTAMP] = 1,
-		    [ICMP_INFO_REQUEST] = 1,
-		    [ICMP_ADDRESS] = 1 };
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
 
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+	innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
 	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
 	return icmp_error_message(skb, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMP_TYPE-1]
+	    || !tb[CTA_PROTO_ICMP_CODE-1]
+	    || !tb[CTA_PROTO_ICMP_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = 
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 {
 	.list			= { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 	.new			= icmp_new,
 	.error			= icmp_error,
 	.destroy		= NULL,
-	.me			= NULL
+	.me			= NULL,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmp_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmp_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db4..39d49dc333a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/sock.h>
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a..e20be3331f6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 					   struct request_sock *req,
 					   struct dst_entry *dst)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
 
-	child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
 	if (child)
 		inet_csk_reqsk_queue_add(sk, req, child);
 	else
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48..16984d4a8a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
 #include <linux/sysctl.h>
 #include <linux/config.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -22,6 +23,7 @@
 extern int sysctl_ip_nonlocal_bind;
 
 #ifdef CONFIG_SYSCTL
+static int zero;
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = {
 		.strategy	= &sysctl_jiffies
 	},
 	{
+		.ctl_name	= NET_IPV4_IPFRAG_MAX_DIST,
+		.procname	= "ipfrag_max_dist",
+		.data		= &sysctl_ipfrag_max_dist,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero
+	},
+	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
 		.data		= &sysctl_tcp_nometrics_save,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56..00aa80e9324 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int err = 0;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->setsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
+						     optval, optlen);
 
 	/* This is a string value all the others are int's */
 	if (optname == TCP_CONGESTION) {
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
 
-	info->tcpi_pmtu = tp->pmtu_cookie;
+	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
 	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
 	info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
 	info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int val, len;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->getsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->getsockopt(sk, level, optname,
+						     optval, optlen);
 
 	if (get_user(len, optlen))
 		return -EFAULT;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 1d0cd86621b..035f2092d73 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -30,8 +30,6 @@ static int fast_convergence = 1;
 static int max_increment = 16;
 static int low_window = 14;
 static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
-static int low_utilization_threshold = 153;
-static int low_utilization_period = 2;
 static int initial_ssthresh = 100;
 static int smooth_part = 20;
 
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644);
 MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)");
 module_param(beta, int, 0644);
 MODULE_PARM_DESC(beta, "beta for multiplicative increase");
-module_param(low_utilization_threshold, int, 0644);
-MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode");
-module_param(low_utilization_period, int, 0644);
-MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)");
 module_param(initial_ssthresh, int, 0644);
 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
 module_param(smooth_part, int, 0644);
@@ -60,11 +54,6 @@ struct bictcp {
 	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
-	u32	delay_min;	/* min delay */
-	u32	delay_max;	/* max delay */
-	u32	last_delay;
-	u8	low_utilization;/* 0: high; 1: low */
-	u32	low_utilization_start;	/* starting time of low utilization detection*/
 	u32	epoch_start;	/* beginning of an epoch */
 #define ACK_RATIO_SHIFT	4
 	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca)
 	ca->loss_cwnd = 0;
 	ca->last_cwnd = 0;
 	ca->last_time = 0;
-	ca->delay_min = 0;
-	ca->delay_max = 0;
-	ca->last_delay = 0;
-	ca->low_utilization = 0;
-	ca->low_utilization_start = 0;
 	ca->epoch_start = 0;
 	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
 }
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	}
 
 	/* if in slow start or link utilization is very low */
-	if ( ca->loss_cwnd == 0 ||
-	     (cwnd > ca->loss_cwnd && ca->low_utilization)) {
+	if (ca->loss_cwnd == 0) {
 		if (ca->cnt > 20) /* increase cwnd 5% per RTT */
 			ca->cnt = 20;
 	}
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-
-/* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct sock *sk, int flag)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct bictcp *ca = inet_csk_ca(sk);
-	u32 dist, delay;
-
-	/* No time stamp */
-	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
-	     /* Discard delay samples right after fast recovery */
-	     tcp_time_stamp < ca->epoch_start + HZ ||
-	     /* this delay samples may not be accurate */
-	     flag == 0) {
-		ca->last_delay = 0;
-		goto notlow;
-	}
-
-	delay = ca->last_delay<<3;	/* use the same scale as tp->srtt*/
-	ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-	if (delay == 0) 		/* no previous delay sample */
-		goto notlow;
-
-	/* first time call or link delay decreases */
-	if (ca->delay_min == 0 || ca->delay_min > delay) {
-		ca->delay_min = ca->delay_max = delay;
-		goto notlow;
-	}
-
-	if (ca->delay_max < delay)
-		ca->delay_max = delay;
-
-	/* utilization is low, if avg delay < dist*threshold
-	   for checking_period time */
-	dist = ca->delay_max - ca->delay_min;
-	if (dist <= ca->delay_min>>6 ||
-	    tp->srtt - ca->delay_min >=  (dist*low_utilization_threshold)>>10)
-		goto notlow;
-
-	if (ca->low_utilization_start == 0) {
-		ca->low_utilization = 0;
-		ca->low_utilization_start = tcp_time_stamp;
-	} else if ((s32)(tcp_time_stamp - ca->low_utilization_start)
-			> low_utilization_period*HZ) {
-		ca->low_utilization = 1;
-	}
-
-	return;
-
- notlow:
-	ca->low_utilization = 0;
-	ca->low_utilization_start = 0;
-
-}
-
 static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 			      u32 seq_rtt, u32 in_flight, int data_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	bictcp_low_utilization(sk, data_acked);
-
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 
 	ca->epoch_start = 0;	/* end of epoch */
 
-	/* in case of wrong delay_max*/
-	if (ca->delay_min > 0 && ca->delay_max > ca->delay_min)
-		ca->delay_max = ca->delay_min
-			+ ((ca->delay_max - ca->delay_min)* 90) / 100;
-
 	/* Wmax and fast convergence */
 	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
 		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 		bictcp_reset(inet_csk_ca(sk));
 }
 
-/* Track delayed acknowledgement ratio using sliding window
+/* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
 static void bictcp_acked(struct sock *sk, u32 cnt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
-	if (cnt > 0 && 	icsk->icsk_ca_state == TCP_CA_Open) {
+	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
 		struct bictcp *ca = inet_csk_ca(sk);
 		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
 		ca->delayed_ack += cnt;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc1..e688c687d62 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	return err;
 }
 
+
+/*
+ * Linear increase during slow start
+ */
+void tcp_slow_start(struct tcp_sock *tp)
+{
+	if (sysctl_tcp_abc) {
+		/* RFC3465: Slow Start
+		 * TCP sender SHOULD increase cwnd by the number of
+		 * previously unacknowledged bytes ACKed by each incoming
+		 * acknowledgment, provided the increase is not more than L
+		 */
+		if (tp->bytes_acked < tp->mss_cache)
+			return;
+
+		/* We MAY increase by 2 if discovered delayed ack */
+		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+		}
+	}
+	tp->bytes_acked = 0;
+
+	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+		tp->snd_cwnd++;
+}
+EXPORT_SYMBOL_GPL(tcp_slow_start);
+
 /*
  * TCP Reno congestion control
  * This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
new file mode 100644
index 00000000000..31a4986dfbf
--- /dev/null
+++ b/net/ipv4/tcp_cubic.c
@@ -0,0 +1,411 @@
+/*
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ *
+ * This is from the implementation of CUBIC TCP in
+ * Injong Rhee, Lisong Xu.
+ *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant
+ *  in PFLDnet 2005
+ * Available from:
+ *  http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+ *
+ * Unless CUBIC is enabled and congestion window is large
+ * this behaves the same as the original Reno.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <asm/div64.h>
+
+#define BICTCP_BETA_SCALE    1024	/* Scale factor beta calculation
+					 * max_cwnd = snd_cwnd * beta
+					 */
+#define BICTCP_B		4	 /*
+					  * In binary search,
+					  * go to point (max+min)/N
+					  */
+#define	BICTCP_HZ		10	/* BIC HZ 2^10 = 1024 */
+
+static int fast_convergence = 1;
+static int max_increment = 16;
+static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh = 100;
+static int bic_scale = 41;
+static int tcp_friendliness = 1;
+
+static u32 cube_rtt_scale;
+static u32 beta_scale;
+static u64 cube_factor;
+
+/* Note parameters that are used for precomputing scale factors are read-only */
+module_param(fast_convergence, int, 0644);
+MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
+module_param(max_increment, int, 0644);
+MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
+module_param(beta, int, 0444);
+MODULE_PARM_DESC(beta, "beta for multiplicative increase");
+module_param(initial_ssthresh, int, 0644);
+MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
+module_param(bic_scale, int, 0444);
+MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
+module_param(tcp_friendliness, int, 0644);
+MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
+
+#include <asm/div64.h>
+
+/* BIC TCP Parameters */
+struct bictcp {
+	u32	cnt;		/* increase cwnd by 1 after ACKs */
+	u32 	last_max_cwnd;	/* last maximum snd_cwnd */
+	u32	loss_cwnd;	/* congestion window at last loss */
+	u32	last_cwnd;	/* the last snd_cwnd */
+	u32	last_time;	/* time when updated last_cwnd */
+	u32	bic_origin_point;/* origin point of bic function */
+	u32	bic_K;		/* time to origin point from the beginning of the current epoch */
+	u32	delay_min;	/* min delay */
+	u32	epoch_start;	/* beginning of an epoch */
+	u32	ack_cnt;	/* number of acks */
+	u32	tcp_cwnd;	/* estimated tcp cwnd */
+#define ACK_RATIO_SHIFT	4
+	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
+};
+
+static inline void bictcp_reset(struct bictcp *ca)
+{
+	ca->cnt = 0;
+	ca->last_max_cwnd = 0;
+	ca->loss_cwnd = 0;
+	ca->last_cwnd = 0;
+	ca->last_time = 0;
+	ca->bic_origin_point = 0;
+	ca->bic_K = 0;
+	ca->delay_min = 0;
+	ca->epoch_start = 0;
+	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
+	ca->ack_cnt = 0;
+	ca->tcp_cwnd = 0;
+}
+
+static void bictcp_init(struct sock *sk)
+{
+	bictcp_reset(inet_csk_ca(sk));
+	if (initial_ssthresh)
+		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+}
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
+{
+	u_int32_t d = divisor;
+
+	if (divisor > 0xffffffffULL) {
+		unsigned int shift = fls(divisor >> 32);
+
+		d = divisor >> shift;
+		dividend >>= shift;
+	}
+
+	/* avoid 64 bit division if possible */
+	if (dividend >> 32)
+		do_div(dividend, d);
+	else
+		dividend = (uint32_t) dividend / d;
+
+	return dividend;
+}
+
+/*
+ * calculate the cubic root of x using Newton-Raphson
+ */
+static u32 cubic_root(u64 a)
+{
+	u32 x, x1;
+
+	/* Initial estimate is based on:
+	 * cbrt(x) = exp(log(x) / 3)
+	 */
+	x = 1u << (fls64(a)/3);
+
+	/*
+	 * Iteration based on:
+	 *                         2
+	 * x    = ( 2 * x  +  a / x  ) / 3
+	 *  k+1          k         k
+	 */
+	do {
+		x1 = x;
+		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
+	} while (abs(x1 - x) > 1);
+
+	return x;
+}
+
+/*
+ * Compute congestion window to use.
+ */
+static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+{
+	u64 offs;
+	u32 delta, t, bic_target, min_cnt, max_cnt;
+
+	ca->ack_cnt++;	/* count the number of ACKs */
+
+	if (ca->last_cwnd == cwnd &&
+	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+		return;
+
+	ca->last_cwnd = cwnd;
+	ca->last_time = tcp_time_stamp;
+
+	if (ca->epoch_start == 0) {
+		ca->epoch_start = tcp_time_stamp;	/* record the beginning of an epoch */
+		ca->ack_cnt = 1;			/* start counting */
+		ca->tcp_cwnd = cwnd;			/* syn with cubic */
+
+		if (ca->last_max_cwnd <= cwnd) {
+			ca->bic_K = 0;
+			ca->bic_origin_point = cwnd;
+		} else {
+			/* Compute new K based on
+			 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+			 */
+			ca->bic_K = cubic_root(cube_factor
+					       * (ca->last_max_cwnd - cwnd));
+			ca->bic_origin_point = ca->last_max_cwnd;
+		}
+	}
+
+        /* cubic function - calc*/
+        /* calculate c * time^3 / rtt,
+         *  while considering overflow in calculation of time^3
+	 * (so time^3 is done by using 64 bit)
+	 * and without the support of division of 64bit numbers
+	 * (so all divisions are done by using 32 bit)
+         *  also NOTE the unit of those veriables
+         *	  time  = (t - K) / 2^bictcp_HZ
+         *	  c = bic_scale >> 10
+	 * rtt  = (srtt >> 3) / HZ
+	 * !!! The following code does not have overflow problems,
+	 * if the cwnd < 1 million packets !!!
+         */
+
+	/* change the unit from HZ to bictcp_HZ */
+        t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
+	     << BICTCP_HZ) / HZ;
+
+        if (t < ca->bic_K)		/* t - K */
+		offs = ca->bic_K - t;
+        else
+                offs = t - ca->bic_K;
+
+	/* c/rtt * (t-K)^3 */
+	delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
+        if (t < ca->bic_K)                                	/* below origin*/
+                bic_target = ca->bic_origin_point - delta;
+        else                                                	/* above origin*/
+                bic_target = ca->bic_origin_point + delta;
+
+        /* cubic function - calc bictcp_cnt*/
+        if (bic_target > cwnd) {
+		ca->cnt = cwnd / (bic_target - cwnd);
+        } else {
+                ca->cnt = 100 * cwnd;              /* very small increment*/
+        }
+
+	if (ca->delay_min > 0) {
+		/* max increment = Smax * rtt / 0.1  */
+		min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
+		if (ca->cnt < min_cnt)
+			ca->cnt = min_cnt;
+	}
+
+        /* slow start and low utilization  */
+	if (ca->loss_cwnd == 0)		/* could be aggressive in slow start */
+		ca->cnt = 50;
+
+	/* TCP Friendly */
+	if (tcp_friendliness) {
+		u32 scale = beta_scale;
+		delta = (cwnd * scale) >> 3;
+	        while (ca->ack_cnt > delta) {		/* update tcp cwnd */
+	                ca->ack_cnt -= delta;
+        	        ca->tcp_cwnd++;
+		}
+
+		if (ca->tcp_cwnd > cwnd){	/* if bic is slower than tcp */
+			delta = ca->tcp_cwnd - cwnd;
+			max_cnt = cwnd / delta;
+			if (ca->cnt > max_cnt)
+				ca->cnt = max_cnt;
+		}
+        }
+
+	ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
+	if (ca->cnt == 0)			/* cannot be zero */
+		ca->cnt = 1;
+}
+
+
+/* Keep track of minimum rtt */
+static inline void measure_delay(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+	u32 delay;
+
+	/* No time stamp */
+	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
+	     /* Discard delay samples right after fast recovery */
+	    (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+		return;
+
+	delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+	if (delay == 0)
+		delay = 1;
+
+	/* first time call or link delay decreases */
+	if (ca->delay_min == 0 || ca->delay_min > delay)
+		ca->delay_min = delay;
+}
+
+static void bictcp_cong_avoid(struct sock *sk, u32 ack,
+			      u32 seq_rtt, u32 in_flight, int data_acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	if (data_acked)
+		measure_delay(sk);
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
+		bictcp_update(ca, tp->snd_cwnd);
+
+		/* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= ca->cnt) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		} else
+			tp->snd_cwnd_cnt++;
+	}
+
+}
+
+static u32 bictcp_recalc_ssthresh(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	ca->epoch_start = 0;	/* end of epoch */
+
+	/* Wmax and fast convergence */
+	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+			/ (2 * BICTCP_BETA_SCALE);
+	else
+		ca->last_max_cwnd = tp->snd_cwnd;
+
+	ca->loss_cwnd = tp->snd_cwnd;
+
+	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+}
+
+static u32 bictcp_undo_cwnd(struct sock *sk)
+{
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
+}
+
+static u32 bictcp_min_cwnd(struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+static void bictcp_state(struct sock *sk, u8 new_state)
+{
+	if (new_state == TCP_CA_Loss)
+		bictcp_reset(inet_csk_ca(sk));
+}
+
+/* Track delayed acknowledgment ratio using sliding window
+ * ratio = (15*ratio + sample) / 16
+ */
+static void bictcp_acked(struct sock *sk, u32 cnt)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
+		struct bictcp *ca = inet_csk_ca(sk);
+		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+		ca->delayed_ack += cnt;
+	}
+}
+
+
+static struct tcp_congestion_ops cubictcp = {
+	.init		= bictcp_init,
+	.ssthresh	= bictcp_recalc_ssthresh,
+	.cong_avoid	= bictcp_cong_avoid,
+	.set_state	= bictcp_state,
+	.undo_cwnd	= bictcp_undo_cwnd,
+	.min_cwnd	= bictcp_min_cwnd,
+	.pkts_acked     = bictcp_acked,
+	.owner		= THIS_MODULE,
+	.name		= "cubic",
+};
+
+static int __init cubictcp_register(void)
+{
+	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+
+	/* Precompute a bunch of the scaling factors that are used per-packet
+	 * based on SRTT of 100ms
+	 */
+
+	beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+
+	cube_rtt_scale = (bic_scale << 3) / 10;	/* 1024*c/rtt */
+
+	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+	 *  so K = cubic_root( (wmax-cwnd)*rtt/c )
+	 * the unit of K is bictcp_HZ=2^10, not HZ
+	 *
+	 *  c = bic_scale >> 10
+	 *  rtt = 100ms
+	 *
+	 * the following code has been designed and tested for
+	 * cwnd < 1 million packets
+	 * RTT < 100 seconds
+	 * HZ < 1,000,00  (corresponding to 10 nano-second)
+	 */
+
+	/* 1/c * 2^2*bictcp_HZ * srtt */
+	cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
+
+	/* divide by bic_scale and by constant Srtt (100ms) */
+	do_div(cube_factor, bic_scale * 10);
+
+	return tcp_register_congestion_control(&cubictcp);
+}
+
+static void __exit cubictcp_unregister(void)
+{
+	tcp_unregister_congestion_control(&cubictcp);
+}
+
+module_init(cubictcp_register);
+module_exit(cubictcp_unregister);
+
+MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CUBIC TCP");
+MODULE_VERSION("2.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bc..0a461232329 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
 /* Adapt the MSS value used to make delayed ack decision to the 
  * real world.
  */ 
-static inline void tcp_measure_rcv_mss(struct sock *sk,
-				       const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk,
+				const struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const unsigned int lss = icsk->icsk_ack.last_seg_size; 
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 	return 0;
 }
 
-static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
-				   struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+			    struct sk_buff *skb)
 {
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
 }
 
+
+/* Initialize RCV_MSS value.
+ * RCV_MSS is an our guess about MSS used by the peer.
+ * We haven't any direct information about the MSS.
+ * It's better to underestimate the RCV_MSS rather than overestimate.
+ * Overestimations make us ACKing less frequently than needed.
+ * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
+ */
+void tcp_initialize_rcv_mss(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
+
+	hint = min(hint, tp->rcv_wnd/2);
+	hint = min(hint, TCP_MIN_RCVMSS);
+	hint = max(hint, TCP_MIN_MSS);
+
+	inet_csk(sk)->icsk_ack.rcv_mss = hint;
+}
+
 /* Receiver "autotuning" code.
  *
  * The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
+/* Set slow start threshold and cwnd not falling to slow start */
+void tcp_enter_cwr(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->prior_ssthresh = 0;
+	tp->bytes_acked = 0;
+	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+		tp->undo_marker = 0;
+		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+		tp->snd_cwnd = min(tp->snd_cwnd,
+				   tcp_packets_in_flight(tp) + 1U);
+		tp->snd_cwnd_cnt = 0;
+		tp->high_seq = tp->snd_nxt;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+		TCP_ECN_queue_cwr(tp);
+
+		tcp_set_ca_state(sk, TCP_CA_CWR);
+	}
+}
+
 /* Initialize metrics on socket. */
 
 static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
-static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
-				  u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+			   u32 in_flight, int good)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * RFC2988 recommends to restart timer to now+rto.
  */
 
-static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
 {
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
-static inline u32 tcp_usrtt(const struct sk_buff *skb)
+static u32 tcp_usrtt(const struct sk_buff *skb)
 {
 	struct timeval tv, now;
 
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
 			}
 		}
 	}
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
-static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-					 struct tcp_sock *tp)
+static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
+				  struct tcp_sock *tp)
 {
 	if (th->doff == sizeof(struct tcphdr)>>2) {
 		tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	}
 }
 
-static __inline__ int
-tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
 {
 	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
 		if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
 	return 0;
 }
 
-static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
 		if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 	}
 }
 
-static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (!tp->rx_opt.dsack)
 		tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 	}
 }
 
-static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
 {
 	__u32 tmp;
 
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
 {
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
 	sk->sk_write_space(sk);
 }
 
-static inline void tcp_check_space(struct sock *sk)
+static void tcp_check_space(struct sock *sk)
 {
 	if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
 		sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
 	}
 }
 
-static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
 {
 	tcp_push_pending_frames(sk, tp);
 	tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	}
 }
 
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
+static inline void tcp_ack_snd_check(struct sock *sk)
 {
 	if (!inet_csk_ack_scheduled(sk)) {
 		/* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 	return result;
 }
 
-static __inline__ int
-tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
 	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
 		__tcp_checksum_complete_user(sk, skb);
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
 	tcp_parse_options(skb, &tp->rx_opt, 0);
 
 	if (th->ack) {
-		struct inet_connection_sock *icsk;
 		/* rfc793:
 		 * "If the state is SYN-SENT then
 		 *    first check the ACK bit
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
 			tp->rx_opt.sack_ok |= 2;
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 		/* Remember, tcp_poll() does not lock socket!
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
 		/* Make sure socket is routed, for correct metrics.  */
-		tp->af_specific->rebuild_header(sk);
+		icsk->icsk_af_ops->rebuild_header(sk);
 
 		tcp_init_metrics(sk);
 
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			sk_wake_async(sk, 0, POLL_OUT);
 		}
 
-		icsk = inet_csk(sk);
-
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
 		    icsk->icsk_ack.pingpong) {
@@ -4173,7 +4210,7 @@ discard:
 		if (tp->ecn_flags&TCP_ECN_OK)
 			sock_set_flag(sk, SOCK_NO_LARGESEND);
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			  struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int queued = 0;
 
 	tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			goto discard;
 
 		if(th->syn) {
-			if(tp->af_specific->conn_request(sk, skb) < 0)
+			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
 			/* Now we have several options: In theory there is 
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				/* Make sure socket is routed, for
 				 * correct metrics.
 				 */
-				tp->af_specific->rebuild_header(sk);
+				icsk->icsk_af_ops->rebuild_header(sk);
 
 				tcp_init_metrics(sk);
 
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
+EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d5021e1929..e9f83e5b28c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
+#include <net/timewait_sock.h>
 #include <net/xfrm.h>
 
 #include <linux/inet.h>
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency;
 /* Socket used for sending RSTs */
 static struct socket *tcp_socket;
 
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb);
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 	.lhash_lock	= RW_LOCK_UNLOCKED,
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 
 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 {
-	return inet_csk_get_port(&tcp_hashinfo, sk, snum);
+	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+				 inet_csk_bind_conflict);
 }
 
 static void tcp_v4_hash(struct sock *sk)
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 					  skb->h.th->source);
 }
 
-/* called with local bh disabled */
-static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
-				      struct inet_timewait_sock **twp)
+int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 {
-	struct inet_sock *inet = inet_sk(sk);
-	u32 daddr = inet->rcv_saddr;
-	u32 saddr = inet->daddr;
-	int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			/* With PAWS, it is safe from the viewpoint
-			   of data integrity. Even without PAWS it
-			   is safe provided sequence spaces do not
-			   overlap i.e. at data rates <= 80Mbit/sec.
-
-			   Actually, the idea is close to VJ's one,
-			   only timestamp cache is held not per host,
-			   but per port pair and TW bucket is used
-			   as state holder.
+	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+	struct tcp_sock *tp = tcp_sk(sk);
 
-			   If TW bucket has been already destroyed we
-			   fall back to VJ's scheme and use initial
-			   timestamp retrieved from peer table.
-			 */
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp || (sysctl_tcp_tw_reuse &&
-				      xtime.tv_sec -
-				      tcptw->tw_ts_recent_stamp > 1))) {
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (tp->write_seq == 0)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
-				goto unique;
-			} else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
+	/* With PAWS, it is safe from the viewpoint
+	   of data integrity. Even without PAWS it is safe provided sequence
+	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
+	   Actually, the idea is close to VJ's one, only timestamp cache is
+	   held not per host, but per port pair and TW bucket is used as state
+	   holder.
 
-unique:
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
+	   If TW bucket has been already destroyed we fall back to VJ's scheme
+	   and use initial timestamp retrieved from peer table.
+	 */
+	if (tcptw->tw_ts_recent_stamp &&
+	    (twp == NULL || (sysctl_tcp_tw_reuse &&
+			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+		if (tp->write_seq == 0)
+			tp->write_seq = 1;
+		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
+		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+		sock_hold(sktw);
+		return 1;
 	}
 
 	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
 }
 
-static inline u32 connect_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-
-	return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, 
-					 inet->dport);
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static inline int tcp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + connect_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v4_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__inet_hash(&tcp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);;
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb  = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet_hash(&tcp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
+EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	tp->ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt)
-		tp->ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 
 	tp->rx_opt.mss_clamp = 536;
 
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	 * complete initialization after this.
 	 */
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v4_hash_connect(sk);
+	err = inet_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto failure;
 
@@ -433,12 +270,10 @@ failure:
 /*
  * This routine does path mtu discovery as defined in RFC1191.
  */
-static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
-				     u32 mtu)
+static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 {
 	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 	 * send out by Linux are always <576bytes so they should go through
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    tp->pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		tcp_sync_mss(sk, mtu);
 
 		/* Resend the TCP packet because it's
@@ -644,10 +479,10 @@ out:
 }
 
 /* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-static inline void syn_flood_warning(struct sk_buff *skb)
+#ifdef CONFIG_SYN_COOKIES
+static void syn_flood_warning(struct sk_buff *skb)
 {
 	static unsigned long warntime;
 
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb)
 		       ntohs(skb->h.th->dest));
 	}
 }
+#endif
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
  */
-static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
-						     struct sk_buff *skb)
+static struct ip_options *tcp_v4_save_options(struct sock *sk,
+					      struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	struct ip_options *dopt = NULL;
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = {
 	.send_reset	=	tcp_v4_send_reset,
 };
 
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
+
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = skb->nh.iph->ttl;
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
-		newtp->ext_header_len = newinet->opt->optlen;
+		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
 	newinet->id = newtp->write_seq ^ jiffies;
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1314,16 +1156,6 @@ do_time_wait:
 	goto discard_it;
 }
 
-static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
-	struct inet_sock *inet = inet_sk(sk);
-
-	sin->sin_family		= AF_INET;
-	sin->sin_addr.s_addr	= inet->daddr;
-	sin->sin_port		= inet->dport;
-}
-
 /* VJ's idea. Save last timestamp seen from this destination
  * and hold it at least for normal timewait interval to use for duplicate
  * segment detection in subsequent connections, before they enter synchronized
@@ -1382,7 +1214,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 	return 0;
 }
 
-struct tcp_func ipv4_specific = {
+struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1392,7 +1224,7 @@ struct tcp_func ipv4_specific = {
 	.net_header_len	=	sizeof(struct iphdr),
 	.setsockopt	=	ip_setsockopt,
 	.getsockopt	=	ip_getsockopt,
-	.addr2sockaddr	=	v4_addr2sockaddr,
+	.addr2sockaddr	=	inet_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in),
 };
 
@@ -1433,7 +1265,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
-	tp->af_specific = &ipv4_specific;
+	icsk->icsk_af_ops = &ipv4_specific;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1989,7 +1822,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.twsk_obj_size		= sizeof(struct tcp_timewait_sock),
+	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac432..2b9b7f6c7f7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
 void tcp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	int recycle_ok = 0;
 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-		recycle_ok = tp->af_specific->remember_stamp(sk);
+		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
 
 	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
 		struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
 
 		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+			struct inet6_timewait_sock *tw6;
 
-			ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+			tw6 = inet6_twsk((struct sock *)tw);
+			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock **prev)
 {
 	struct tcphdr *th = skb->h.th;
-	struct tcp_sock *tp = tcp_sk(sk);
 	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		 * ESTABLISHED STATE. If it will be dropped after
 		 * socket is created, wait for troubles.
 		 */
-		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
+								 req, NULL);
 		if (child == NULL)
 			goto listen_overflow;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7325e0b406..a7623ead39a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
  */
 int sysctl_tcp_tso_win_divisor = 3;
 
-static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
-				    struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct tcp_sock *tp,
+			     struct sk_buff *skb)
 {
 	sk->sk_send_head = skb->next;
 	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
 	tp->snd_cwnd_used = 0;
 }
 
-static inline void tcp_event_data_sent(struct tcp_sock *tp,
-				       struct sk_buff *skb, struct sock *sk)
+static void tcp_event_data_sent(struct tcp_sock *tp,
+				struct sk_buff *skb, struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
 		icsk->icsk_ack.pingpong = 1;
 }
 
-static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 {
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
  * value can be stuffed directly into th->window for an outgoing
  * frame.
  */
-static __inline__ u16 tcp_select_window(struct sock *sk)
+static u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
 	return new_win;
 }
 
+static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
+					 __u32 tstamp)
+{
+	if (tp->rx_opt.tstamp_ok) {
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					  (TCPOPT_NOP << 16) |
+					  (TCPOPT_TIMESTAMP << 8) |
+					  TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);
+		*ptr++ = htonl(tp->rx_opt.ts_recent);
+	}
+	if (tp->rx_opt.eff_sacks) {
+		struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+		int this_sack;
+
+		*ptr++ = htonl((TCPOPT_NOP  << 24) |
+			       (TCPOPT_NOP  << 16) |
+			       (TCPOPT_SACK <<  8) |
+			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+						     TCPOLEN_SACK_PERBLOCK)));
+		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+			*ptr++ = htonl(sp[this_sack].start_seq);
+			*ptr++ = htonl(sp[this_sack].end_seq);
+		}
+		if (tp->rx_opt.dsack) {
+			tp->rx_opt.dsack = 0;
+			tp->rx_opt.eff_sacks--;
+		}
+	}
+}
+
+/* Construct a tcp options header for a SYN or SYN_ACK packet.
+ * If this is every changed make sure to change the definition of
+ * MAX_SYN_SIZE to match the new maximum number of options that you
+ * can generate.
+ */
+static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+				  int offer_wscale, int wscale, __u32 tstamp,
+				  __u32 ts_recent)
+{
+	/* We always get an MSS option.
+	 * The option bytes which will be seen in normal data
+	 * packets should timestamps be used, must be in the MSS
+	 * advertised.  But we subtract them from tp->mss_cache so
+	 * that calculations in tcp_sendmsg are simpler etc.
+	 * So account for this fact here if necessary.  If we
+	 * don't do this correctly, as a receiver we won't
+	 * recognize data packets as being full sized when we
+	 * should, and thus we won't abide by the delayed ACK
+	 * rules correctly.
+	 * SACKs don't matter, we never delay an ACK when we
+	 * have any of those going out.
+	 */
+	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+	if (ts) {
+		if(sack)
+			*ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		else
+			*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);		/* TSVAL */
+		*ptr++ = htonl(ts_recent);	/* TSECR */
+	} else if(sack)
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
+	if (offer_wscale)
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
+}
 
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
@@ -371,7 +440,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		TCP_ECN_send(sk, tp, skb, tcp_header_size);
 	}
 
-	tp->af_specific->send_check(sk, th, skb->len, skb);
+	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
 	if (likely(tcb->flags & TCPCB_FLAG_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -381,7 +450,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-	err = tp->af_specific->queue_xmit(skb, 0);
+	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (unlikely(err <= 0))
 		return err;
 
@@ -621,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
-   tp->pmtu_cookie is last pmtu, seen by this function.
+   inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
 
    tp->mss_cache is current effective sending mss, including
    all tcp options except for SACKs. It is evaluated,
@@ -631,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    NOTE1. rfc1122 clearly states that advertised MSS
    DOES NOT include either tcp or ip options.
 
-   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
-   this function.			--ANK (980731)
+   NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
+   are READ ONLY outside this function.		--ANK (980731)
  */
 
 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mss_now;
-
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	/* Calculate base mss without TCP options:
 	   It is MMS_S - sizeof(tcphdr) of rfc1122
 	 */
-	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
+		       sizeof(struct tcphdr));
 
 	/* Clamp it (mss_clamp does not include tcp options) */
 	if (mss_now > tp->rx_opt.mss_clamp)
 		mss_now = tp->rx_opt.mss_clamp;
 
 	/* Now subtract optional transport overhead */
-	mss_now -= tp->ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
 	if (mss_now < 48)
@@ -664,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
 
 	/* And store cached results */
-	tp->pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	tp->mss_cache = mss_now;
 
 	return mss_now;
@@ -694,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
-		if (mtu != tp->pmtu_cookie)
+		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
 			mss_now = tcp_sync_mss(sk, mtu);
 	}
 
@@ -705,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 	xmit_size_goal = mss_now;
 
 	if (doing_tso) {
-		xmit_size_goal = 65535 -
-			tp->af_specific->net_header_len -
-			tp->ext_header_len - tp->tcp_header_len;
+		xmit_size_goal = (65535 -
+				  inet_csk(sk)->icsk_af_ops->net_header_len -
+				  inet_csk(sk)->icsk_ext_hdr_len -
+				  tp->tcp_header_len);
 
 		if (tp->max_window &&
 		    (xmit_size_goal > (tp->max_window >> 1)))
@@ -723,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 /* Congestion window validation. (RFC2861) */
 
-static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
 {
 	__u32 packets_out = tp->packets_out;
 
@@ -772,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
 /* This must be invoked the first time we consider transmitting
  * SKB onto the wire.
  */
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);
 
@@ -1422,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	   (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
-	if(tp->af_specific->rebuild_header(sk))
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
 	/* Some Solaris stacks overoptimize and ignore the FIN on a
@@ -1793,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 /* 
  * Do all connect socket setups that can be done AF independent.
  */ 
-static inline void tcp_connect_init(struct sock *sk)
+static void tcp_connect_init(struct sock *sk)
 {
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13e7e6e8df1..3b740349505 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -330,6 +330,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 		vegas->cntRTT = 0;
 		vegas->minRTT = 0x7fffffff;
 	}
+	/* Use normal slow start */
+	else if (tp->snd_cwnd <= tp->snd_ssthresh) 
+		tcp_slow_start(tp);
+	
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195..223abaa72bc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
+#include <linux/igmp.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -846,20 +847,7 @@ out:
 csum_copy_err:
 	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (noblock)
 		return -EAGAIN;	
@@ -1094,7 +1082,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
  * Otherwise, csum completion requires chacksumming packet body,
  * including udp header and folding it to skb->csum.
  */
-static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 			     unsigned short ulen, u32 saddr, u32 daddr)
 {
 	if (uh->check == 0) {
@@ -1108,7 +1096,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 	/* Probably, we should checksum udp header (it should be in cache
 	 * in any case) and data in tiny packets (< rx copybreak).
 	 */
-	return 0;
 }
 
 /*
@@ -1141,8 +1128,7 @@ int udp_rcv(struct sk_buff *skb)
 	if (pskb_trim_rcsum(skb, ulen))
 		goto short_packet;
 
-	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
-		goto csum_error;
+	udp_checksum_init(skb, uh, ulen, saddr, daddr);
 
 	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834b..9601fd7f9d6 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,8 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
 		protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
 		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-		ip6_flowlabel.o ipv6_syms.o netfilter.o
+		ip6_flowlabel.o ipv6_syms.o netfilter.o \
+		inet6_connection_sock.o
 
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a60585fd85a..704fb73e6c5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
-	const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
+	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
 	u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
 	u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d9546380fa0..68afc53be66 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,6 +167,7 @@ lookup_protocol:
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
@@ -389,6 +390,8 @@ int inet6_destroy_sock(struct sock *sk)
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+
 /*
  *	This does both peername and sockname.
  */
@@ -431,7 +434,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
-	int err = -EINVAL;
 
 	switch(cmd) 
 	{
@@ -450,16 +452,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFDSTADDR:
 		return addrconf_set_dstaddr((void __user *) arg);
 	default:
-		if (!sk->sk_prot->ioctl ||
-		    (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
-			return(dev_ioctl(cmd,(void __user *) arg));		
-		return err;
+		if (!sk->sk_prot->ioctl)
+			return -ENOIOCTLCMD;
+		return sk->sk_prot->ioctl(sk, cmd, arg);
 	}
 	/*NOTREACHED*/
 	return(0);
 }
 
-struct proto_ops inet6_stream_ops = {
+const struct proto_ops inet6_stream_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -480,7 +481,7 @@ struct proto_ops inet6_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet6_dgram_ops = {
+const struct proto_ops inet6_dgram_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -508,7 +509,7 @@ static struct net_proto_family inet6_family_ops = {
 };
 
 /* Same as inet6_dgram_ops, sans udp_poll.  */
-static struct proto_ops inet6_sockraw_ops = {
+static const struct proto_ops inet6_sockraw_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -609,6 +610,79 @@ inet6_unregister_protosw(struct inet_protosw *p)
 	}
 }
 
+int inet6_sk_rebuild_header(struct sock *sk)
+{
+	int err;
+	struct dst_entry *dst;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = sk->sk_protocol;
+		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+		fl.fl6_flowlabel = np->flow_label;
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet->dport;
+		fl.fl_ip_sport = inet->sport;
+
+		if (np->opt && np->opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
+
+int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	if (np->rxopt.all) {
+		if ((opt->hop && (np->rxopt.bits.hopopts ||
+				  np->rxopt.bits.ohopopts)) ||
+		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
+		     np->rxopt.bits.rxflow) ||
+		    (opt->srcrt && (np->rxopt.bits.srcrt ||
+		     np->rxopt.bits.osrcrt)) ||
+		    ((opt->dst1 || opt->dst0) &&
+		     (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
+			return 1;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
+
 int
 snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
 {
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb1..13cc7f89558 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <net/xfrm.h>
 #include <asm/scatterlist.h>
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bfbe997079..6de8ee1a5ad 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
 #include <linux/random.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/icmpv6.h>
 
 static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index be6faf31138..113374dc342 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
 	return opt;
 }
 
+EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
+
 /**********************************
   Hop-by-hop options.
  **********************************/
@@ -579,6 +581,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
 	return opt2;
 }
 
+EXPORT_SYMBOL_GPL(ipv6_dup_options);
+
 static int ipv6_renew_option(void *ohdr,
 			     struct ipv6_opt_hdr __user *newopt, int newoptlen,
 			     int inherit,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 00000000000..792f90f0f9e
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,199 @@
+/*
+ * INET        An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Support for INET6 connection oriented protocols.
+ *
+ * Authors:    See the TCPv6 sources
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or(at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
+#include <net/addrconf.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_ecn.h>
+#include <net/inet_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/sock.h>
+
+int inet6_csk_bind_conflict(const struct sock *sk,
+			    const struct inet_bind_bucket *tb)
+{
+	const struct sock *sk2;
+	const struct hlist_node *node;
+
+	/* We must walk the whole port owner list in this case. -DaveM */
+	sk_for_each_bound(sk2, node, &tb->owners) {
+		if (sk != sk2 &&
+		    (!sk->sk_bound_dev_if ||
+		     !sk2->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+		    (!sk->sk_reuse || !sk2->sk_reuse ||
+		     sk2->sk_state == TCP_LISTEN) &&
+		     ipv6_rcv_saddr_equal(sk, sk2))
+			break;
+	}
+
+	return node != NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+
+/*
+ * request_sock (formerly open request) hash tables.
+ */
+static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
+			   const u32 rnd, const u16 synq_hsize)
+{
+	u32 a = raddr->s6_addr32[0];
+	u32 b = raddr->s6_addr32[1];
+	u32 c = raddr->s6_addr32[2];
+
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += rnd;
+	__jhash_mix(a, b, c);
+
+	a += raddr->s6_addr32[3];
+	b += (u32)rport;
+	__jhash_mix(a, b, c);
+
+	return c & (synq_hsize - 1);
+}
+
+struct request_sock *inet6_csk_search_req(const struct sock *sk,
+					  struct request_sock ***prevp,
+					  const __u16 rport,
+					  const struct in6_addr *raddr,
+					  const struct in6_addr *laddr,
+					  const int iif)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+	struct request_sock *req, **prev;
+
+	for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
+						     lopt->hash_rnd,
+						     lopt->nr_table_entries)];
+	     (req = *prev) != NULL;
+	     prev = &req->dl_next) {
+		const struct inet6_request_sock *treq = inet6_rsk(req);
+
+		if (inet_rsk(req)->rmt_port == rport &&
+		    req->rsk_ops->family == AF_INET6 &&
+		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
+		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
+		    (!treq->iif || treq->iif == iif)) {
+			BUG_TRAP(req->sk == NULL);
+			*prevp = prev;
+			return req;
+		}
+	}
+
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_search_req);
+
+void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
+				    struct request_sock *req,
+				    const unsigned long timeout)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
+				      inet_rsk(req)->rmt_port,
+				      lopt->hash_rnd, lopt->nr_table_entries);
+
+	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+	inet_csk_reqsk_queue_added(sk, timeout);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+	sin6->sin6_family = AF_INET6;
+	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+	sin6->sin6_port	= inet_sk(sk)->dport;
+	/* We do not store received flowlabel for TCP */
+	sin6->sin6_flowinfo = 0;
+	sin6->sin6_scope_id = 0;
+	if (sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		sin6->sin6_scope_id = sk->sk_bound_dev_if;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
+
+int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+{
+	struct sock *sk = skb->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi fl;
+	struct dst_entry *dst;
+	struct in6_addr *final_p = NULL, final;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = sk->sk_protocol;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+	fl.fl6_flowlabel = np->flow_label;
+	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_dport = inet->dport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		int err = ip6_dst_lookup(sk, &dst, &fl);
+
+		if (err) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	skb->dst = dst_clone(dst);
+
+	/* Restore final destination back after routing done */
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+
+	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e4..4154f3a8b6c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
  *
  *		Generic INET6 transport hashtables
  *
- * Authors:	Lotsa people, from code originally in tcp
+ * Authors:	Lotsa people, from code originally in tcp, generalised here
+ * 		by Arnaldo Carvalho de Melo <acme@mandriva.com>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
  */
 
 #include <linux/config.h>
-
 #include <linux/module.h>
+#include <linux/random.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
 #include <net/inet6_hashtables.h>
+#include <net/ip.h>
 
 struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
 				   const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
 }
 
 EXPORT_SYMBOL_GPL(inet6_lookup);
+
+static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+				     struct sock *sk, const __u16 lport,
+				     struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct in6_addr *daddr = &np->rcv_saddr;
+	const struct in6_addr *saddr = &np->daddr;
+	const int dif = sk->sk_bound_dev_if;
+	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
+						inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
+
+		tw = inet_twsk(sk2);
+
+		if(*((__u32 *)&(tw->tw_dport)) == ports		 &&
+		   sk2->sk_family	       == PF_INET6	 &&
+		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
+		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sk->sk_hash = hash;
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp != NULL) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw != NULL) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet6_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+					  np->daddr.s6_addr32,
+					  inet->dport);
+}
+
+int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+		       struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (snum == 0) {
+ 		const int low = sysctl_local_port_range[0];
+ 		const int high = sysctl_local_port_range[1];
+		const int range = high - low;
+ 		int i, port;
+		static u32 hint;
+		const u32 offset = hint + inet6_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet6_check_established(death_row,
+								       sk, port,
+								       &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+						     head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet6_hash(hinfo, sk);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb   = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+
+	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+		__inet6_hash(hinfo, sk);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet6_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1cf02765fb5..89d12b4817a 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -200,6 +200,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
 	return NULL;
 }
 
+EXPORT_SYMBOL_GPL(fl6_sock_lookup);
+
 void fl6_free_socklist(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8523c76ebf7..b4c4beba0ed 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -775,6 +775,8 @@ out_err_release:
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+
 static inline int ip6_ufo_append_data(struct sock *sk,
 			int getfrag(void *from, char *to, int offset, int len,
 			int odd, struct sk_buff *skb),
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb1709..626dd39685f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
 #include <linux/rtnetlink.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718defe..c63868dd2ca 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 			sk_refcnt_debug_dec(sk);
 
 			if (sk->sk_protocol == IPPROTO_TCP) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 
 				local_bh_disable();
 				sock_prot_dec_use(sk->sk_prot);
 				sock_prot_inc_use(&tcp_prot);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
-				tp->af_specific = &ipv4_specific;
+				icsk->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 			} else {
 				local_bh_disable();
 				sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 		}
 
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
+		if (inet_sk(sk)->is_icsk) {
 			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				if (!((1 << sk->sk_state) &
 				      (TCPF_LISTEN | TCPF_CLOSE))
 				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+					icsk->icsk_ext_hdr_len =
+						opt->opt_flen + opt->opt_nflen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 				}
 			}
 			opt = xchg(&np->opt, opt);
@@ -380,14 +381,15 @@ sticky_done:
 			goto done;
 update:
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
+		if (inet_sk(sk)->is_icsk) {
 			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				if (!((1 << sk->sk_state) &
 				      (TCPF_LISTEN | TCPF_CLOSE))
 				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+					icsk->icsk_ext_hdr_len =
+						opt->opt_flen + opt->opt_nflen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 				}
 			}
 			opt = xchg(&np->opt, opt);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f829a4ad3cc..1cf305a9f8d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -224,7 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 
 	mc_lst->ifindex = dev->ifindex;
 	mc_lst->sfmode = MCAST_EXCLUDE;
-	mc_lst->sflock = RW_LOCK_UNLOCKED;
+	rwlock_init(&mc_lst->sflock);
 	mc_lst->sflist = NULL;
 
 	/*
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c4..925b42d4834 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -15,6 +15,7 @@
  *      - new extension header parser code
  */
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/skbuff.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
@@ -86,11 +87,6 @@ static DECLARE_MUTEX(ip6t_mutex);
    context stops packets coming through and allows user context to read
    the counters or update the rules.
 
-   To be cache friendly on SMP, we arrange them like so:
-   [ n-entries ]
-   ... cache-align padding ...
-   [ n-entries ]
-
    Hence the start of any table is given by get_table() below.  */
 
 /* The table itself */
@@ -108,33 +104,29 @@ struct ip6t_table_info
 	unsigned int underflow[NF_IP6_NUMHOOKS];
 
 	/* ip6t_entry tables: one per CPU */
-	char entries[0] ____cacheline_aligned;
+	void *entries[NR_CPUS];
 };
 
 static LIST_HEAD(ip6t_target);
 static LIST_HEAD(ip6t_match);
 static LIST_HEAD(ip6t_tables);
+#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
 
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
 #if 0
 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
 #endif
 
-static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
-			      struct in6_addr addr2)
+int
+ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask,
+                   const struct in6_addr *addr2)
 {
 	int i;
 	for( i = 0; i < 16; i++){
-		if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
-		   (addr2.s6_addr[i] & mask.s6_addr[i]))
+		if((addr1->s6_addr[i] & mask->s6_addr[i]) != 
+		   (addr2->s6_addr[i] & mask->s6_addr[i]))
 			return 1;
 	}
 	return 0;
@@ -168,10 +160,10 @@ ip6_packet_match(const struct sk_buff *skb,
 
 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
 
-	if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
-		  IP6T_INV_SRCIP)
-	    || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
-		     IP6T_INV_DSTIP)) {
+	if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk,
+	                             &ip6info->src), IP6T_INV_SRCIP)
+	    || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk,
+	                                &ip6info->dst), IP6T_INV_DSTIP)) {
 		dprintf("Source or dest mismatch.\n");
 /*
 		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -214,69 +206,21 @@ ip6_packet_match(const struct sk_buff *skb,
 
 	/* look for the desired protocol header */
 	if((ip6info->flags & IP6T_F_PROTO)) {
-		u_int8_t currenthdr = ipv6->nexthdr;
-		struct ipv6_opt_hdr _hdr, *hp;
-		u_int16_t ptr;		/* Header offset in skb */
-		u_int16_t hdrlen;	/* Header */
-		u_int16_t _fragoff = 0, *fp = NULL;
-
-		ptr = IPV6_HDR_LEN;
-
-		while (ip6t_ext_hdr(currenthdr)) {
-	                /* Is there enough space for the next ext header? */
-	                if (skb->len - ptr < IPV6_OPTHDR_LEN)
-	                        return 0;
-
-			/* NONE or ESP: there isn't protocol part */
-			/* If we want to count these packets in '-p all',
-			 * we will change the return 0 to 1*/
-			if ((currenthdr == IPPROTO_NONE) || 
-				(currenthdr == IPPROTO_ESP))
-				break;
+		int protohdr;
+		unsigned short _frag_off;
 
-			hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-			BUG_ON(hp == NULL);
-
-			/* Size calculation */
-	                if (currenthdr == IPPROTO_FRAGMENT) {
-				fp = skb_header_pointer(skb,
-						   ptr+offsetof(struct frag_hdr,
-								frag_off),
-						   sizeof(_fragoff),
-						   &_fragoff);
-				if (fp == NULL)
-					return 0;
-
-				_fragoff = ntohs(*fp) & ~0x7;
-	                        hdrlen = 8;
-	                } else if (currenthdr == IPPROTO_AH)
-	                        hdrlen = (hp->hdrlen+2)<<2;
-	                else
-	                        hdrlen = ipv6_optlen(hp);
-
-			currenthdr = hp->nexthdr;
-	                ptr += hdrlen;
-			/* ptr is too large */
-	                if ( ptr > skb->len ) 
-				return 0;
-			if (_fragoff) {
-				if (ip6t_ext_hdr(currenthdr))
-					return 0;
-				break;
-			}
-		}
-
-		*protoff = ptr;
-		*fragoff = _fragoff;
+		protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
+		if (protohdr < 0)
+			return 0;
 
-		/* currenthdr contains the protocol header */
+		*fragoff = _frag_off;
 
 		dprintf("Packet protocol %hi ?= %s%hi.\n",
-				currenthdr, 
+				protohdr, 
 				ip6info->invflags & IP6T_INV_PROTO ? "!":"",
 				ip6info->proto);
 
-		if (ip6info->proto == currenthdr) {
+		if (ip6info->proto == protohdr) {
 			if(ip6info->invflags & IP6T_INV_PROTO) {
 				return 0;
 			}
@@ -376,8 +320,7 @@ ip6t_do_table(struct sk_buff **pskb,
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private, smp_processor_id());
+	table_base = (void *)table->private->entries[smp_processor_id()];
 	e = get_entry(table_base, table->private->hook_entry[hook]);
 
 #ifdef CONFIG_NETFILTER_DEBUG
@@ -649,7 +592,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct ip6t_table_info *newinfo,
+		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -658,7 +602,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 	for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ip6t_entry *e
-			= (struct ip6t_entry *)(newinfo->entries + pos);
+			= (struct ip6t_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -708,13 +652,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 						goto next;
 
 					e = (struct ip6t_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct ip6t_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -731,7 +675,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 					newpos = pos + e->next_offset;
 				}
 				e = (struct ip6t_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -941,6 +885,7 @@ static int
 translate_table(const char *name,
 		unsigned int valid_hooks,
 		struct ip6t_table_info *newinfo,
+		void *entry0,
 		unsigned int size,
 		unsigned int number,
 		const unsigned int *hook_entries,
@@ -961,11 +906,11 @@ translate_table(const char *name,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry_size_and_hooks,
 				newinfo,
-				newinfo->entries,
-				newinfo->entries + size,
+				entry0,
+				entry0 + size,
 				hook_entries, underflows, &i);
 	if (ret != 0)
 		return ret;
@@ -993,27 +938,24 @@ translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks))
+	if (!mark_source_chains(newinfo, valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry, name, size, &i);
 
 	if (ret != 0) {
-		IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
 				  cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
@@ -1029,15 +971,12 @@ replace_table(struct ip6t_table *table,
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	{
-		struct ip6t_entry *table_base;
-		unsigned int i;
+		int cpu;
 
-		for_each_cpu(i) {
-			table_base =
-				(void *)newinfo->entries
-				+ TABLE_OFFSET(newinfo, i);
-
-			table_base->comefrom = 0xdead57ac;
+		for_each_cpu(cpu) {
+			struct ip6t_entry *table_base = newinfo->entries[cpu];
+			if (table_base)
+				table_base->comefrom = 0xdead57ac;
 		}
 	}
 #endif
@@ -1072,16 +1011,44 @@ add_entry_to_counter(const struct ip6t_entry *e,
 	return 0;
 }
 
+static inline int
+set_entry_to_counter(const struct ip6t_entry *e,
+		     struct ip6t_counters total[],
+		     unsigned int *i)
+{
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static void
 get_counters(const struct ip6t_table_info *t,
 	     struct ip6t_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	IP6T_ENTRY_ITERATE(t->entries[curcpu],
+			   t->size,
+			   set_entry_to_counter,
+			   counters,
+			   &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		IP6T_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
@@ -1098,6 +1065,7 @@ copy_entries_to_user(unsigned int total_size,
 	struct ip6t_entry *e;
 	struct ip6t_counters *counters;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -1109,13 +1077,13 @@ copy_entries_to_user(unsigned int total_size,
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
 	get_counters(table->private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	/* choose the copy that is on ourc node/cpu */
+	loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -1127,7 +1095,7 @@ copy_entries_to_user(unsigned int total_size,
 		struct ip6t_entry_match *m;
 		struct ip6t_entry_target *t;
 
-		e = (struct ip6t_entry *)(table->private->entries + off);
+		e = (struct ip6t_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct ip6t_entry, counters),
 				 &counters[num],
@@ -1196,6 +1164,46 @@ get_entries(const struct ip6t_get_entries *entries,
 	return ret;
 }
 
+static void free_table_info(struct ip6t_table_info *info)
+{
+	int cpu;
+	for_each_cpu(cpu) {
+		if (info->size <= PAGE_SIZE)
+			kfree(info->entries[cpu]);
+		else
+			vfree(info->entries[cpu]);
+	}
+	kfree(info);
+}
+
+static struct ip6t_table_info *alloc_table_info(unsigned int size)
+{
+	struct ip6t_table_info *newinfo;
+	int cpu;
+
+	newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL);
+	if (!newinfo)
+		return NULL;
+
+	newinfo->size = size;
+
+	for_each_cpu(cpu) {
+		if (size <= PAGE_SIZE)
+			newinfo->entries[cpu] = kmalloc_node(size,
+							GFP_KERNEL,
+							cpu_to_node(cpu));
+		else
+			newinfo->entries[cpu] = vmalloc_node(size,
+							     cpu_to_node(cpu));
+		if (newinfo->entries[cpu] == NULL) {
+			free_table_info(newinfo);
+			return NULL;
+		}
+	}
+
+	return newinfo;
+}
+
 static int
 do_replace(void __user *user, unsigned int len)
 {
@@ -1204,6 +1212,7 @@ do_replace(void __user *user, unsigned int len)
 	struct ip6t_table *t;
 	struct ip6t_table_info *newinfo, *oldinfo;
 	struct ip6t_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1212,13 +1221,13 @@ do_replace(void __user *user, unsigned int len)
 	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
 		return -ENOMEM;
 
-	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(tmp.size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
@@ -1229,10 +1238,9 @@ do_replace(void __user *user, unsigned int len)
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
@@ -1271,8 +1279,9 @@ do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
 			 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
@@ -1284,11 +1293,11 @@ do_replace(void __user *user, unsigned int len)
 	module_put(t->me);
 	up(&ip6t_mutex);
  free_newinfo_counters_untrans:
-	IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	free_table_info(newinfo);
 	return ret;
 }
 
@@ -1321,6 +1330,7 @@ do_add_counters(void __user *user, unsigned int len)
 	struct ip6t_counters_info tmp, *paddc;
 	struct ip6t_table *t;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1350,7 +1360,9 @@ do_add_counters(void __user *user, unsigned int len)
 	}
 
 	i = 0;
-	IP6T_ENTRY_ITERATE(t->private->entries,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = t->private->entries[smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_entry,
 			  t->private->size,
 			  add_counter_to_entry,
 			  paddc->counters,
@@ -1543,28 +1555,29 @@ int ip6t_register_table(struct ip6t_table *table,
 	struct ip6t_table_info *newinfo;
 	static struct ip6t_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(repl->size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = alloc_table_info(repl->size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	memcpy(newinfo->entries, repl->entries, repl->size);
+	/* choose the copy on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
 	if (ret != 0) {
-		vfree(newinfo);
+		free_table_info(newinfo);
 		return ret;
 	}
 
 	ret = down_interruptible(&ip6t_mutex);
 	if (ret != 0) {
-		vfree(newinfo);
+		free_table_info(newinfo);
 		return ret;
 	}
 
@@ -1593,20 +1606,23 @@ int ip6t_register_table(struct ip6t_table *table,
 	return ret;
 
  free_unlock:
-	vfree(newinfo);
+	free_table_info(newinfo);
 	goto unlock;
 }
 
 void ip6t_unregister_table(struct ip6t_table *table)
 {
+	void *loc_cpu_entry;
+
 	down(&ip6t_mutex);
 	LIST_DELETE(&ip6t_tables, table);
 	up(&ip6t_mutex);
 
 	/* Decrease module usage counts and free resources */
-	IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
+	loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
 			  cleanup_entry, NULL);
-	vfree(table->private);
+	free_table_info(table->private);
 }
 
 /* Returns 1 if the port is matched by the range, 0 otherwise */
@@ -2035,26 +2051,39 @@ static void __exit fini(void)
 }
 
 /*
- * find specified header up to transport protocol header.
- * If found target header, the offset to the header is set to *offset
- * and return 0. otherwise, return -1.
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
  *
- * Notes: - non-1st Fragment Header isn't skipped.
- *	  - ESP header isn't skipped.
- *	  - The target header may be trancated.
  */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+		  int target, unsigned short *fragoff)
 {
 	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
 	u8 nexthdr = skb->nh.ipv6h->nexthdr;
 	unsigned int len = skb->len - start;
 
+	if (fragoff)
+		*fragoff = 0;
+
 	while (nexthdr != target) {
 		struct ipv6_opt_hdr _hdr, *hp;
 		unsigned int hdrlen;
 
-		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+			if (target < 0)
+				break;
 			return -1;
+		}
+
 		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return -1;
@@ -2068,8 +2097,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
 			if (fp == NULL)
 				return -1;
 
-			if (ntohs(*fp) & ~0x7)
+			_frag_off = ntohs(*fp) & ~0x7;
+			if (_frag_off) {
+				if (target < 0 &&
+				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
+				     nexthdr == NEXTHDR_NONE)) {
+					if (fragoff)
+						*fragoff = _frag_off;
+					return hp->nexthdr;
+				}
 				return -1;
+			}
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH)
 			hdrlen = (hp->hdrlen + 2) << 2; 
@@ -2082,7 +2120,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
 	}
 
 	*offset = start;
-	return 0;
+	return nexthdr;
 }
 
 EXPORT_SYMBOL(ip6t_register_table);
@@ -2094,6 +2132,7 @@ EXPORT_SYMBOL(ip6t_register_target);
 EXPORT_SYMBOL(ip6t_unregister_target);
 EXPORT_SYMBOL(ip6t_ext_hdr);
 EXPORT_SYMBOL(ipv6_find_hdr);
+EXPORT_SYMBOL(ip6_masked_addrcmp);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd903..ae4653bfd65 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20..f5c1a7ff4a1 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
@@ -53,7 +54,7 @@ match(const struct sk_buff *skb,
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
 		return 0;
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index c450a635e54..48cf5f9efc9 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
        unsigned int optlen;
        
 #if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
 #else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
 #endif
 		return 0;
 
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43a..e1828f6d0a4 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
@@ -55,7 +56,7 @@ match(const struct sk_buff *skb,
 	/* Make sure this isn't an evil packet */
 	/*DEBUGP("ipv6_esp entered \n");*/
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0)
 		return 0;
 
 	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea2..d1549b26866 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -52,7 +52,7 @@ match(const struct sk_buff *skb,
        const struct ip6t_frag *fraginfo = matchinfo;
        unsigned int ptr;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
 		return 0;
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d..e3bc8e2700e 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
        unsigned int optlen;
        
 #if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
 #else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
 #endif
 		return 0;
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebb..c1e770e4554 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -58,7 +58,7 @@ match(const struct sk_buff *skb,
        unsigned int ret = 0;
        struct in6_addr *ap, _addr;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
 		return 0;
 
        rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 753a3ae8502..704fbbe7487 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = {
 };
 #endif
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv6_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+		&tuple->src.u3.ip6);
+	NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+		&tuple->dst.u3.ip6);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V6_SRC-1]       = sizeof(u_int32_t)*4,
+	[CTA_IP_V6_DST-1]       = sizeof(u_int32_t)*4,
+};
+
+static int ipv6_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), 
+	       sizeof(u_int32_t) * 4);
+	memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]),
+	       sizeof(u_int32_t) * 4);
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
 	.name			= "ipv6",
@@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.print_tuple		= ipv6_print_tuple,
 	.print_conntrack	= ipv6_print_conntrack,
 	.prepare		= ipv6_prepare,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= ipv6_tuple_to_nfattr,
+	.nfattr_to_tuple	= ipv6_nfattr_to_tuple,
+#endif
 	.get_features		= ipv6_get_features,
 	.me			= THIS_MODULE,
 };
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a7e03cfacd0..09945c33305 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
+};
+
 static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 			       const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[] = {
-		[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
-		[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
-		[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
-		[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
-	};
-
 	int type = orig->dst.u.icmp.type - 128;
 	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
 		return 0;
@@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	inproto = nf_ct_find_proto(PF_INET6, inprotonum);
+	inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
 
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -255,6 +255,60 @@ skipped:
 	return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nfattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMPV6_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMPV6_TYPE-1]
+	    || !tb[CTA_PROTO_ICMPV6_CODE-1]
+	    || !tb[CTA_PROTO_ICMPV6_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
+
+	if (tuple->dst.u.icmp.type < 128
+	    || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type - 128])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
 {
 	.l3proto		= PF_INET6,
@@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
 	.packet			= icmpv6_packet,
 	.new			= icmpv6_new,
 	.error			= icmpv6_error,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmpv6_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmpv6_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c2c52af9e56..f3e5ffbd592 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -98,7 +98,7 @@ struct nf_ct_frag6_queue
 #define FRAG6Q_HASHSZ	64
 
 static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(nf_ct_frag6_lock);
 static u32 nf_ct_frag6_hash_rnd;
 static LIST_HEAD(nf_ct_frag6_lru_list);
 int nf_ct_frag6_nqueues = 0;
@@ -371,7 +371,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src,				   struct
 	init_timer(&fq->timer);
 	fq->timer.function = nf_ct_frag6_expire;
 	fq->timer.data = (long) fq;
-	fq->lock = SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
 
 	return nf_ct_frag6_intern(hash, fq);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2a..66f1d12ea57 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 #include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
+	skb_kill_datagram(sk, skb, flags);
 
 	/* Error for blocking case is chosen to masquerade
 	   as some normal condition.
 	 */
 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
 	/* FIXME: increment a raw6 drops counter here */
-	goto out_free;
+	goto out;
 }
 
 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c3123c9e1a8..577d49732b0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -33,6 +33,7 @@
 #include <asm/uaccess.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -720,7 +721,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_SIT;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8827389abaf..2947bc56d8a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -48,6 +48,7 @@
 #include <net/tcp.h>
 #include <net/ndisc.h>
 #include <net/inet6_hashtables.h>
+#include <net/inet6_connection_sock.h>
 #include <net/ipv6.h>
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
@@ -59,6 +60,7 @@
 #include <net/addrconf.h>
 #include <net/snmp.h>
 #include <net/dsfield.h>
+#include <net/timewait_sock.h>
 
 #include <asm/uaccess.h>
 
@@ -67,224 +69,33 @@
 
 static void	tcp_v6_send_reset(struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+static void	tcp_v6_send_check(struct sock *sk, int len, 
 				  struct sk_buff *skb);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
 
-static struct tcp_func ipv6_mapped;
-static struct tcp_func ipv6_specific;
+static struct inet_connection_sock_af_ops ipv6_mapped;
+static struct inet_connection_sock_af_ops ipv6_specific;
 
-static inline int tcp_v6_bind_conflict(const struct sock *sk,
-				       const struct inet_bind_bucket *tb)
-{
-	const struct sock *sk2;
-	const struct hlist_node *node;
-
-	/* We must walk the whole port owner list in this case. -DaveM */
-	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
-	}
-
-	return node != NULL;
-}
-
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- * But it doesn't matter, the recalculation is in the rarest path
- * this function ever takes.
- */
 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	struct inet_bind_hashbucket *head;
-	struct inet_bind_bucket *tb;
-	struct hlist_node *node;
-	int ret;
-
-	local_bh_disable();
-	if (snum == 0) {
-		int low = sysctl_local_port_range[0];
-		int high = sysctl_local_port_range[1];
-		int remaining = (high - low) + 1;
-		int rover = net_random() % (high - low) + low;
-
-		do {
-			head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
-			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
-				if (tb->port == rover)
-					goto next;
-			break;
-		next:
-			spin_unlock(&head->lock);
-			if (++rover > high)
-				rover = low;
-		} while (--remaining > 0);
-
-		/* Exhausted local port range during search?  It is not
-		 * possible for us to be holding one of the bind hash
-		 * locks if this test triggers, because if 'remaining'
-		 * drops to zero, we broke out of the do/while loop at
-		 * the top level, not from the 'break;' statement.
-		 */
-		ret = 1;
-		if (unlikely(remaining <= 0))
-			goto fail;
-
-		/* OK, here is the one we will use. */
-		snum = rover;
-	} else {
-		head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
-		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
-			if (tb->port == snum)
-				goto tb_found;
-	}
-	tb = NULL;
-	goto tb_not_found;
-tb_found:
-	if (tb && !hlist_empty(&tb->owners)) {
-		if (tb->fastreuse > 0 && sk->sk_reuse &&
-		    sk->sk_state != TCP_LISTEN) {
-			goto success;
-		} else {
-			ret = 1;
-			if (tcp_v6_bind_conflict(sk, tb))
-				goto fail_unlock;
-		}
-	}
-tb_not_found:
-	ret = 1;
-	if (tb == NULL) {
-	       	tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
-		if (tb == NULL)
-			goto fail_unlock;
-	}
-	if (hlist_empty(&tb->owners)) {
-		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-			tb->fastreuse = 1;
-		else
-			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
-
-success:
-	if (!inet_csk(sk)->icsk_bind_hash)
-		inet_bind_hash(sk, tb, snum);
-	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
-	ret = 0;
-
-fail_unlock:
-	spin_unlock(&head->lock);
-fail:
-	local_bh_enable();
-	return ret;
-}
-
-static __inline__ void __tcp_v6_hash(struct sock *sk)
-{
-	struct hlist_head *list;
-	rwlock_t *lock;
-
-	BUG_TRAP(sk_unhashed(sk));
-
-	if (sk->sk_state == TCP_LISTEN) {
-		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &tcp_hashinfo.lhash_lock;
-		inet_listen_wlock(&tcp_hashinfo);
-	} else {
-		unsigned int hash;
-		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
-		hash &= (tcp_hashinfo.ehash_size - 1);
-		list = &tcp_hashinfo.ehash[hash].chain;
-		lock = &tcp_hashinfo.ehash[hash].lock;
-		write_lock(lock);
-	}
-
-	__sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(lock);
+	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+				 inet6_csk_bind_conflict);
 }
 
-
 static void tcp_v6_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
-
-		if (tp->af_specific == &ipv6_mapped) {
+		if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
 			tcp_prot.hash(sk);
 			return;
 		}
 		local_bh_disable();
-		__tcp_v6_hash(sk);
+		__inet6_hash(&tcp_hashinfo, sk);
 		local_bh_enable();
 	}
 }
 
-/*
- * Open request hash tables.
- */
-
-static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
-{
-	u32 a, b, c;
-
-	a = raddr->s6_addr32[0];
-	b = raddr->s6_addr32[1];
-	c = raddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += rnd;
-	__jhash_mix(a, b, c);
-
-	a += raddr->s6_addr32[3];
-	b += (u32) rport;
-	__jhash_mix(a, b, c);
-
-	return c & (TCP_SYNQ_HSIZE - 1);
-}
-
-static struct request_sock *tcp_v6_search_req(const struct sock *sk,
-					      struct request_sock ***prevp,
-					      __u16 rport,
-					      struct in6_addr *raddr,
-					      struct in6_addr *laddr,
-					      int iif)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req, **prev;  
-
-	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
-	     (req = *prev) != NULL;
-	     prev = &req->dl_next) {
-		const struct tcp6_request_sock *treq = tcp6_rsk(req);
-
-		if (inet_rsk(req)->rmt_port == rport &&
-		    req->rsk_ops->family == AF_INET6 &&
-		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
-		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
-		    (!treq->iif || treq->iif == iif)) {
-			BUG_TRAP(req->sk == NULL);
-			*prevp = prev;
-			return req;
-		}
-	}
-
-	return NULL;
-}
-
 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 				   struct in6_addr *saddr, 
 				   struct in6_addr *daddr, 
@@ -308,195 +119,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
-
-		tw = inet_twsk(sk2);
-
-		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
-		   sk2->sk_family		== PF_INET6	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp ||
-			     (sysctl_tcp_tw_reuse &&
-			      xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
-				/* See comment in tcp_ipv4.c */
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (!tp->write_seq)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
-				goto unique;
-			} else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-unique:
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sk->sk_hash = hash;
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-static inline u32 tcpv6_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-
-	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					   np->daddr.s6_addr32,
-					   inet->dport);
-}
-
-static int tcp_v6_hash_connect(struct sock *sk)
-{
-	unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + tcpv6_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v6_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__tcp_v6_hash(sk);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__tcp_v6_hash(sk);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v6_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
 			  int addr_len)
 {
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
-	struct inet_sock *inet = inet_sk(sk);
+ 	struct inet_sock *inet = inet_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -571,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	 */
 
 	if (addr_type == IPV6_ADDR_MAPPED) {
-		u32 exthdrlen = tp->ext_header_len;
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -583,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-		tp->af_specific = &ipv6_mapped;
+		icsk->icsk_af_ops = &ipv6_mapped;
 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 
 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 
 		if (err) {
-			tp->ext_header_len = exthdrlen;
-			tp->af_specific = &ipv6_specific;
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &ipv6_specific;
 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 			goto failure;
 		} else {
@@ -643,16 +271,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 
-	tp->ext_header_len = 0;
+	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
-		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
 	inet->dport = usin->sin6_port;
 
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v6_hash_connect(sk);
+	err = inet6_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto late_failure;
 
@@ -758,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
-		if (tp->pmtu_cookie > dst_mtu(dst)) {
+		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			tcp_sync_mss(sk, dst_mtu(dst));
 			tcp_simple_retransmit(sk);
 		} /* else let the usual retransmit timer handle it */
@@ -775,8 +404,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (sock_owned_by_user(sk))
 			goto out;
 
-		req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
-					&hdr->saddr, inet6_iif(skb));
+		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
+					   &hdr->saddr, inet6_iif(skb));
 		if (!req)
 			goto out;
 
@@ -822,7 +451,7 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 			      struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
@@ -888,8 +517,8 @@ done:
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
-	if (tcp6_rsk(req)->pktopts)
-		kfree_skb(tcp6_rsk(req)->pktopts);
+	if (inet6_rsk(req)->pktopts)
+		kfree_skb(inet6_rsk(req)->pktopts);
 }
 
 static struct request_sock_ops tcp6_request_sock_ops = {
@@ -901,26 +530,15 @@ static struct request_sock_ops tcp6_request_sock_ops = {
 	.send_reset	=	tcp_v6_send_reset
 };
 
-static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct inet6_skb_parm *opt = IP6CB(skb);
-
-	if (np->rxopt.all) {
-		if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
-		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
-		    (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
-		    ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
-			return 1;
-	}
-	return 0;
-}
-
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
 
-static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
-			      struct sk_buff *skb)
+static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
@@ -1091,8 +709,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
-	req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
-				&skb->nh.ipv6h->daddr, inet6_iif(skb));
+	req = inet6_csk_search_req(sk, &prev, th->source,
+				   &skb->nh.ipv6h->saddr,
+				   &skb->nh.ipv6h->daddr, inet6_iif(skb));
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
@@ -1116,23 +735,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 	return sk;
 }
 
-static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-
-	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
-	inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
-}
-
-
 /* FIXME: this is substantially similar to the ipv4 code.
  * Can some kind of merge be done? -- erics
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp6_request_sock *treq;
+	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_options_received tmp_opt;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1157,7 +765,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
@@ -1170,7 +778,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	treq = tcp6_rsk(req);
+	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
 	TCP_ECN_create_request(req, skb->h.th);
@@ -1196,8 +804,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (tcp_v6_send_synack(sk, req, NULL))
 		goto drop;
 
-	tcp_v6_synq_add(sk, req);
-
+	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	return 0;
 
 drop:
@@ -1212,7 +819,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct request_sock *req,
 					  struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1247,7 +854,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
-		newtp->af_specific = &ipv6_mapped;
+		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
@@ -1261,10 +868,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		/* It is tricky place. Until this moment IPv4 tcp
-		   worked with IPv6 af_tcp.af_specific.
+		   worked with IPv6 icsk.icsk_af_ops.
 		   Sync it now.
 		 */
-		tcp_sync_mss(newsk, newtp->pmtu_cookie);
+		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
 
 		return newsk;
 	}
@@ -1371,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 			sock_kfree_s(sk, opt, opt->tot_len);
 	}
 
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newnp->opt)
-		newtp->ext_header_len = newnp->opt->opt_nflen +
-					newnp->opt->opt_flen;
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1382,7 +989,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
 
-	__tcp_v6_hash(newsk);
+	__inet6_hash(&tcp_hashinfo, newsk);
 	inet_inherit_port(&tcp_hashinfo, sk, newsk);
 
 	return newsk;
@@ -1679,139 +1286,16 @@ do_time_wait:
 	goto discard_it;
 }
 
-static int tcp_v6_rebuild_header(struct sock *sk)
-{
-	int err;
-	struct dst_entry *dst;
-	struct ipv6_pinfo *np = inet6_sk(sk);
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
-		struct flowi fl;
-
-		memset(&fl, 0, sizeof(fl));
-		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-		fl.fl6_flowlabel = np->flow_label;
-		fl.oif = sk->sk_bound_dev_if;
-		fl.fl_ip_dport = inet->dport;
-		fl.fl_ip_sport = inet->sport;
-
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-
-		err = ip6_dst_lookup(sk, &dst, &fl);
-		if (err) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	return 0;
-}
-
-static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
-{
-	struct sock *sk = skb->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flowi fl;
-	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
-
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.fl6_flowlabel = np->flow_label;
-	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
-	fl.oif = sk->sk_bound_dev_if;
-	fl.fl_ip_sport = inet->sport;
-	fl.fl_ip_dport = inet->dport;
-
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		int err = ip6_dst_lookup(sk, &dst, &fl);
-
-		if (err) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	skb->dst = dst_clone(dst);
-
-	/* Restore final destination back after routing done */
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
-}
-
-static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
-
-	sin6->sin6_family = AF_INET6;
-	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
-	sin6->sin6_port	= inet_sk(sk)->dport;
-	/* We do not store received flowlabel for TCP */
-	sin6->sin6_flowinfo = 0;
-	sin6->sin6_scope_id = 0;
-	if (sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6->sin6_scope_id = sk->sk_bound_dev_if;
-}
-
 static int tcp_v6_remember_stamp(struct sock *sk)
 {
 	/* Alas, not yet... */
 	return 0;
 }
 
-static struct tcp_func ipv6_specific = {
-	.queue_xmit	=	tcp_v6_xmit,
+static struct inet_connection_sock_af_ops ipv6_specific = {
+	.queue_xmit	=	inet6_csk_xmit,
 	.send_check	=	tcp_v6_send_check,
-	.rebuild_header	=	tcp_v6_rebuild_header,
+	.rebuild_header	=	inet6_sk_rebuild_header,
 	.conn_request	=	tcp_v6_conn_request,
 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
 	.remember_stamp	=	tcp_v6_remember_stamp,
@@ -1819,7 +1303,7 @@ static struct tcp_func ipv6_specific = {
 
 	.setsockopt	=	ipv6_setsockopt,
 	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
 
@@ -1827,7 +1311,7 @@ static struct tcp_func ipv6_specific = {
  *	TCP over IPv4 via INET6 API
  */
 
-static struct tcp_func ipv6_mapped = {
+static struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1838,7 +1322,7 @@ static struct tcp_func ipv6_mapped = {
 
 	.setsockopt	=	ipv6_setsockopt,
 	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
 
@@ -1877,8 +1361,9 @@ static int tcp_v6_init_sock(struct sock *sk)
 
 	sk->sk_state = TCP_CLOSE;
 
-	tp->af_specific = &ipv6_specific;
+	icsk->icsk_af_ops = &ipv6_specific;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
@@ -1900,14 +1385,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 static void get_openreq6(struct seq_file *seq, 
 			 struct sock *sk, struct request_sock *req, int i, int uid)
 {
-	struct in6_addr *dest, *src;
 	int ttd = req->expires - jiffies;
+	struct in6_addr *src = &inet6_rsk(req)->loc_addr;
+	struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	src = &tcp6_rsk(req)->loc_addr;
-	dest = &tcp6_rsk(req)->rmt_addr;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -1988,14 +1472,14 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+	struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	int ttd = tw->tw_ttd - jiffies;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	dest = &tcp6tw->tw_v6_daddr;
-	src  = &tcp6tw->tw_v6_rcv_saddr;
+	dest = &tw6->tw_v6_daddr;
+	src  = &tw6->tw_v6_rcv_saddr;
 	destp = ntohs(tw->tw_dport);
 	srcp  = ntohs(tw->tw_sport);
 
@@ -2093,7 +1577,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
+	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 };
 
@@ -2110,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = {
 	.ops		=	&inet6_stream_ops,
 	.capability	=	-1,
 	.no_check	=	0,
-	.flags		=	INET_PROTOSW_PERMANENT,
+	.flags		=	INET_PROTOSW_PERMANENT |
+				INET_PROTOSW_ICSK,
 };
 
 void __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb55..d8538dcea81 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/init.h>
+#include <linux/skbuff.h>
 #include <asm/uaccess.h>
 
 #include <net/sock.h>
@@ -300,20 +301,7 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (flags & MSG_DONTWAIT) {
 		UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 34b3bb86840..0dc519b4040 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -75,7 +75,7 @@ static struct datalink_proto *pEII_datalink;
 static struct datalink_proto *p8023_datalink;
 static struct datalink_proto *pSNAP_datalink;
 
-static struct proto_ops ipx_dgram_ops;
+static const struct proto_ops ipx_dgram_ops;
 
 LIST_HEAD(ipx_interfaces);
 DEFINE_SPINLOCK(ipx_interfaces_lock);
@@ -1884,7 +1884,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		rc = -EINVAL;
 		break;
 	default:
-		rc = dev_ioctl(cmd, argp);
+		rc = -ENOIOCTLCMD;
 		break;
 	}
 
@@ -1901,7 +1901,7 @@ static struct net_proto_family ipx_family_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
 	.family		= PF_IPX,
 	.owner		= THIS_MODULE,
 	.release	= ipx_release,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 6f92f9c6299..fbfa9675441 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -62,12 +62,12 @@
 
 static int irda_create(struct socket *sock, int protocol);
 
-static struct proto_ops irda_stream_ops;
-static struct proto_ops irda_seqpacket_ops;
-static struct proto_ops irda_dgram_ops;
+static const struct proto_ops irda_stream_ops;
+static const struct proto_ops irda_seqpacket_ops;
+static const struct proto_ops irda_dgram_ops;
 
 #ifdef CONFIG_IRDA_ULTRA
-static struct proto_ops irda_ultra_ops;
+static const struct proto_ops irda_ultra_ops;
 #define ULTRA_MAX_DATA 382
 #endif /* CONFIG_IRDA_ULTRA */
 
@@ -1438,8 +1438,9 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 			/*
 			 *	POSIX 1003.1g mandates this order.
 			 */
-			if (sk->sk_err)
-				ret = sock_error(sk);
+			ret = sock_error(sk);
+			if (ret)
+				break;
 			else if (sk->sk_shutdown & RCV_SHUTDOWN)
 				;
 			else if (noblock)
@@ -1821,7 +1822,7 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return -EINVAL;
 	default:
 		IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__);
-		return dev_ioctl(cmd, (void __user *) arg);
+		return -ENOIOCTLCMD;
 	}
 
 	/*NOTREACHED*/
@@ -2463,7 +2464,7 @@ static struct net_proto_family irda_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2484,7 +2485,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2505,7 +2506,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2527,7 +2528,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
 };
 
 #ifdef CONFIG_IRDA_ULTRA
-static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index b8bb78af8b8..254f9074690 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -364,7 +364,7 @@ static void iriap_disconnect_request(struct iriap_cb *self)
 /*
  * Function iriap_getvaluebyclass (addr, name, attr)
  *
- *    Retreive all values from attribute in all objects with given class
+ *    Retrieve all values from attribute in all objects with given class
  *    name
  */
 int iriap_getvaluebyclass_request(struct iriap_cb *self,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 39031684b65..52efd04cbed 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -113,7 +113,7 @@ static __inline__ void pfkey_unlock_table(void)
 }
 
 
-static struct proto_ops pfkey_ops;
+static const struct proto_ops pfkey_ops;
 
 static void pfkey_insert(struct sock *sk)
 {
@@ -336,6 +336,7 @@ static u8 sadb_ext_min_len[] = {
 	[SADB_X_EXT_NAT_T_SPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
+	[SADB_X_EXT_SEC_CTX]		= (u8) sizeof(struct sadb_x_sec_ctx),
 };
 
 /* Verify sadb_address_{len,prefixlen} against sa_family.  */
@@ -383,6 +384,55 @@ static int verify_address_len(void *p)
 	return 0;
 }
 
+static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx)
+{
+	int len = 0;
+
+	len += sizeof(struct sadb_x_sec_ctx);
+	len += sec_ctx->sadb_x_ctx_len;
+	len += sizeof(uint64_t) - 1;
+	len /= sizeof(uint64_t);
+
+	return len;
+}
+
+static inline int verify_sec_ctx_len(void *p)
+{
+	struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p;
+	int len;
+
+	if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE)
+		return -EINVAL;
+
+	len = pfkey_sec_ctx_len(sec_ctx);
+
+	if (sec_ctx->sadb_x_sec_len != len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx)
+{
+	struct xfrm_user_sec_ctx *uctx = NULL;
+	int ctx_size = sec_ctx->sadb_x_ctx_len;
+
+	uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL);
+
+	if (!uctx)
+		return NULL;
+
+	uctx->len = pfkey_sec_ctx_len(sec_ctx);
+	uctx->exttype = sec_ctx->sadb_x_sec_exttype;
+	uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi;
+	uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg;
+	uctx->ctx_len = sec_ctx->sadb_x_ctx_len;
+	memcpy(uctx + 1, sec_ctx + 1,
+	       uctx->ctx_len);
+
+	return uctx;
+}
+
 static int present_and_same_family(struct sadb_address *src,
 				   struct sadb_address *dst)
 {
@@ -438,6 +488,10 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
 				if (verify_address_len(p))
 					return -EINVAL;
 			}				
+			if (ext_type == SADB_X_EXT_SEC_CTX) {
+				if (verify_sec_ctx_len(p))
+					return -EINVAL;
+			}
 			ext_hdrs[ext_type-1] = p;
 		}
 		p   += ext_len;
@@ -586,6 +640,9 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 	struct sadb_key *key;
 	struct sadb_x_sa2 *sa2;
 	struct sockaddr_in *sin;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
+	int ctx_size = 0;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6 *sin6;
 #endif
@@ -609,6 +666,12 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 			sizeof(struct sadb_address)*2 + 
 				sockaddr_size*2 +
 					sizeof(struct sadb_x_sa2);
+
+	if ((xfrm_ctx = x->security)) {
+		ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+		size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
+	}
+
 	/* identity & sensitivity */
 
 	if ((x->props.family == AF_INET &&
@@ -899,6 +962,20 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 		n_port->sadb_x_nat_t_port_reserved = 0;
 	}
 
+	/* security context */
+	if (xfrm_ctx) {
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx->sadb_x_sec_len =
+		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	return skb;
 }
 
@@ -909,6 +986,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 	struct sadb_lifetime *lifetime;
 	struct sadb_sa *sa;
 	struct sadb_key *key;
+	struct sadb_x_sec_ctx *sec_ctx;
 	uint16_t proto;
 	int err;
 	
@@ -993,6 +1071,21 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 		x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
 		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
 	}
+
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx)
+			goto out;
+
+		err = security_xfrm_state_alloc(x, uctx);
+		kfree(uctx);
+
+		if (err)
+			goto out;
+	}
+
 	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
@@ -1720,6 +1813,18 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
 	return 0;
 }
 
+static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
+{
+  struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+
+	if (xfrm_ctx) {
+		int len = sizeof(struct sadb_x_sec_ctx);
+		len += xfrm_ctx->ctx_len;
+		return PFKEY_ALIGN8(len);
+	}
+	return 0;
+}
+
 static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 {
 	int sockaddr_size = pfkey_sockaddr_size(xp->family);
@@ -1733,7 +1838,8 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 		(sockaddr_size * 2) +
 		sizeof(struct sadb_x_policy) +
 		(xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
-				(socklen * 2)));
+				(socklen * 2))) +
+		pfkey_xfrm_policy2sec_ctx_size(xp);
 }
 
 static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
@@ -1757,6 +1863,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 	struct sadb_lifetime *lifetime;
 	struct sadb_x_policy *pol;
 	struct sockaddr_in   *sin;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6  *sin6;
 #endif
@@ -1941,6 +2049,21 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 			}
 		}
 	}
+
+	/* security context */
+	if ((xfrm_ctx = xp->security)) {
+		int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp);
+
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size);
+		sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	hdr->sadb_msg_len = size / sizeof(uint64_t);
 	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
 }
@@ -1976,12 +2099,13 @@ out:
 
 static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
 {
-	int err;
+	int err = 0;
 	struct sadb_lifetime *lifetime;
 	struct sadb_address *sa;
 	struct sadb_x_policy *pol;
 	struct xfrm_policy *xp;
 	struct km_event c;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2028,6 +2152,22 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if (xp->selector.dport)
 		xp->selector.dport_mask = ~0;
 
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx) {
+			err = -ENOBUFS;
+			goto out;
+		}
+
+		err = security_xfrm_policy_alloc(xp, uctx);
+		kfree(uctx);
+
+		if (err)
+			goto out;
+	}
+
 	xp->lft.soft_byte_limit = XFRM_INF;
 	xp->lft.hard_byte_limit = XFRM_INF;
 	xp->lft.soft_packet_limit = XFRM_INF;
@@ -2051,10 +2191,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 
 	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
 				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
-	if (err) {
-		kfree(xp);
-		return err;
-	}
+
+	if (err)
+		goto out;
 
 	if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
 		c.event = XFRM_MSG_UPDPOLICY;
@@ -2069,6 +2208,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 
 out:
+	security_xfrm_policy_free(xp);
 	kfree(xp);
 	return err;
 }
@@ -2078,9 +2218,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	int err;
 	struct sadb_address *sa;
 	struct sadb_x_policy *pol;
-	struct xfrm_policy *xp;
+	struct xfrm_policy *xp, tmp;
 	struct xfrm_selector sel;
 	struct km_event c;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2109,7 +2250,24 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	if (sel.dport)
 		sel.dport_mask = ~0;
 
-	xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1);
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	memset(&tmp, 0, sizeof(struct xfrm_policy));
+
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx)
+			return -ENOMEM;
+
+		err = security_xfrm_policy_alloc(&tmp, uctx);
+		kfree(uctx);
+
+		if (err)
+			return err;
+	}
+
+	xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
+	security_xfrm_policy_free(&tmp);
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -2660,6 +2818,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 {
 	struct xfrm_policy *xp;
 	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	switch (family) {
 	case AF_INET:
@@ -2709,10 +2868,32 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 	    (*dir = parse_ipsecrequests(xp, pol)) < 0)
 		goto out;
 
+	/* security context too */
+	if (len >= (pol->sadb_x_policy_len*8 +
+	    sizeof(struct sadb_x_sec_ctx))) {
+		char *p = (char *)pol;
+		struct xfrm_user_sec_ctx *uctx;
+
+		p += pol->sadb_x_policy_len*8;
+		sec_ctx = (struct sadb_x_sec_ctx *)p;
+		if (len < pol->sadb_x_policy_len*8 +
+		    sec_ctx->sadb_x_sec_len)
+			goto out;
+		if ((*dir = verify_sec_ctx_len(p)))
+			goto out;
+		uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+		*dir = security_xfrm_policy_alloc(xp, uctx);
+		kfree(uctx);
+
+		if (*dir)
+			goto out;
+	}
+
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
 
 out:
+	security_xfrm_policy_free(xp);
 	kfree(xp);
 	return NULL;
 }
@@ -2946,7 +3127,7 @@ out:
 	return err;
 }
 
-static struct proto_ops pfkey_ops = {
+static const struct proto_ops pfkey_ops = {
 	.family		=	PF_KEY,
 	.owner		=	THIS_MODULE,
 	/* Operations that make no sense on pfkey sockets. */
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c3f0b078345..8171c53bc0e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -36,7 +36,7 @@
 static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
 static u16 llc_ui_sap_link_no_max[256];
 static struct sockaddr_llc llc_ui_addrnull;
-static struct proto_ops llc_ui_ops;
+static const struct proto_ops llc_ui_ops;
 
 static int llc_ui_wait_for_conn(struct sock *sk, long timeout);
 static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
@@ -566,10 +566,9 @@ static int llc_wait_data(struct sock *sk, long timeo)
 		/*
 		 * POSIX 1003.1g mandates this order.
 		 */
-		if (sk->sk_err) {
-			rc = sock_error(sk);
+		rc = sock_error(sk);
+		if (rc)
 			break;
-		}
 		rc = 0;
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			break;
@@ -960,7 +959,7 @@ out:
 static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
 			unsigned long arg)
 {
-	return dev_ioctl(cmd, (void __user *)arg);
+	return -ENOIOCTLCMD;
 }
 
 /**
@@ -1099,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-static struct proto_ops llc_ui_ops = {
+static const struct proto_ops llc_ui_ops = {
 	.family	     = PF_LLC,
 	.owner       = THIS_MODULE,
 	.release     = llc_ui_release,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 794c41d19b2..7d55f9cbd85 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -95,4 +95,11 @@ config NF_CONNTRACK_FTP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CT_NETLINK
+	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
+	depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+	help
+	  This option enables support for a netlink-based userspace interface
+
 endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 55f019ad2c0..cb2183145c3 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -13,3 +13,6 @@ obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 
 # SCTP protocol connection tracking
 obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+
+# netlink interface for nf_conntrack
+obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a7c7b490cf2..62bb509f05d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -82,6 +82,8 @@ unsigned int nf_ct_log_invalid;
 static LIST_HEAD(unconfirmed);
 static int nf_conntrack_vmalloc;
 
+static unsigned int nf_conntrack_next_id = 1;
+static unsigned int nf_conntrack_expect_next_id = 1;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 struct notifier_block *nf_conntrack_chain;
 struct notifier_block *nf_conntrack_expect_chain;
@@ -184,7 +186,7 @@ DECLARE_MUTEX(nf_ct_cache_mutex);
 
 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
 struct nf_conntrack_protocol *
-nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
+__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
 {
 	if (unlikely(nf_ct_protos[l3proto] == NULL))
 		return &nf_conntrack_generic_protocol;
@@ -192,6 +194,50 @@ nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
 	return nf_ct_protos[l3proto][protocol];
 }
 
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct nf_conntrack_protocol *
+nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
+{
+	struct nf_conntrack_protocol *p;
+
+	preempt_disable();
+	p = __nf_ct_proto_find(l3proto, protocol);
+	if (p) {
+		if (!try_module_get(p->me))
+			p = &nf_conntrack_generic_protocol;
+	}
+	preempt_enable();
+	
+	return p;
+}
+
+void nf_ct_proto_put(struct nf_conntrack_protocol *p)
+{
+	module_put(p->me);
+}
+
+struct nf_conntrack_l3proto *
+nf_ct_l3proto_find_get(u_int16_t l3proto)
+{
+	struct nf_conntrack_l3proto *p;
+
+	preempt_disable();
+	p = __nf_ct_l3proto_find(l3proto);
+	if (p) {
+		if (!try_module_get(p->me))
+			p = &nf_conntrack_generic_l3proto;
+	}
+	preempt_enable();
+
+	return p;
+}
+
+void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
+{
+	module_put(p->me);
+}
+
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -384,7 +430,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 }
 
 /* nf_conntrack_expect helper functions */
-static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 {
 	ASSERT_WRITE_LOCK(&nf_conntrack_lock);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
@@ -404,6 +450,33 @@ static void expectation_timed_out(unsigned long ul_expect)
 	nf_conntrack_expect_put(exp);
 }
 
+struct nf_conntrack_expect *
+__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+	
+	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+			atomic_inc(&i->use);
+			return i;
+		}
+	}
+	return NULL;
+}
+
+/* Just find a expectation corresponding to a tuple. */
+struct nf_conntrack_expect *
+nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+	
+	read_lock_bh(&nf_conntrack_lock);
+	i = __nf_conntrack_expect_find(tuple);
+	read_unlock_bh(&nf_conntrack_lock);
+
+	return i;
+}
+
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 static struct nf_conntrack_expect *
@@ -432,7 +505,7 @@ find_expectation(const struct nf_conntrack_tuple *tuple)
 }
 
 /* delete all expectations for this conntrack */
-static void remove_expectations(struct nf_conn *ct)
+void nf_ct_remove_expectations(struct nf_conn *ct)
 {
 	struct nf_conntrack_expect *i, *tmp;
 
@@ -462,7 +535,7 @@ clean_from_lists(struct nf_conn *ct)
 	LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all pending expectations */
-	remove_expectations(ct);
+	nf_ct_remove_expectations(ct);
 }
 
 static void
@@ -482,12 +555,11 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to nf_conntrack_lock!!! -HW */
-	l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
+	l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
 	if (l3proto && l3proto->destroy)
 		l3proto->destroy(ct);
 
-	proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
-				 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+	proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
@@ -499,7 +571,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	 * except TFTP can create an expectation on the first packet,
 	 * before connection is in the list, so we need to clean here,
 	 * too. */
-	remove_expectations(ct);
+	nf_ct_remove_expectations(ct);
 
 	/* We overload first tuple to link into unconfirmed list. */
 	if (!nf_ct_is_confirmed(ct)) {
@@ -540,7 +612,7 @@ conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
 		&& nf_ct_tuple_equal(tuple, &i->tuple);
 }
 
-static struct nf_conntrack_tuple_hash *
+struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack)
 {
@@ -575,6 +647,29 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
 	return h;
 }
 
+static void __nf_conntrack_hash_insert(struct nf_conn *ct,
+				       unsigned int hash,
+				       unsigned int repl_hash) 
+{
+	ct->id = ++nf_conntrack_next_id;
+	list_prepend(&nf_conntrack_hash[hash],
+		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_prepend(&nf_conntrack_hash[repl_hash],
+		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+}
+
+void nf_conntrack_hash_insert(struct nf_conn *ct)
+{
+	unsigned int hash, repl_hash;
+
+	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	write_lock_bh(&nf_conntrack_lock);
+	__nf_conntrack_hash_insert(ct, hash, repl_hash);
+	write_unlock_bh(&nf_conntrack_lock);
+}
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff **pskb)
@@ -621,10 +716,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 		/* Remove from unconfirmed list */
 		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
 
-		list_prepend(&nf_conntrack_hash[hash],
-			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-		list_prepend(&nf_conntrack_hash[repl_hash],
-			     &ct->tuplehash[IP_CT_DIR_REPLY]);
+		__nf_conntrack_hash_insert(ct, hash, repl_hash);
 		/* Timer relative to confirmation time, not original
 		   setting time, otherwise we'd get timer wrap in
 		   weird delay cases. */
@@ -708,13 +800,41 @@ static inline int helper_cmp(const struct nf_conntrack_helper *i,
 }
 
 static struct nf_conntrack_helper *
-nf_ct_find_helper(const struct nf_conntrack_tuple *tuple)
+__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	return LIST_FIND(&helpers, helper_cmp,
 			 struct nf_conntrack_helper *,
 			 tuple);
 }
 
+struct nf_conntrack_helper *
+nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_helper *helper;
+
+	/* need nf_conntrack_lock to assure that helper exists until
+	 * try_module_get() is called */
+	read_lock_bh(&nf_conntrack_lock);
+
+	helper = __nf_ct_helper_find(tuple);
+	if (helper) {
+		/* need to increase module usage count to assure helper will
+		 * not go away while the caller is e.g. busy putting a
+		 * conntrack in the hash that uses the helper */
+		if (!try_module_get(helper->me))
+			helper = NULL;
+	}
+
+	read_unlock_bh(&nf_conntrack_lock);
+
+	return helper;
+}
+
+void nf_ct_helper_put(struct nf_conntrack_helper *helper)
+{
+	module_put(helper->me);
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		     const struct nf_conntrack_tuple *repl,
@@ -744,7 +864,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	/*  find features needed by this conntrack. */
 	features = l3proto->get_features(orig);
 	read_lock_bh(&nf_conntrack_lock);
-	if (nf_ct_find_helper(repl) != NULL)
+	if (__nf_ct_helper_find(repl) != NULL)
 		features |= NF_CT_F_HELP;
 	read_unlock_bh(&nf_conntrack_lock);
 
@@ -794,7 +914,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 {
 	struct nf_conntrack_l3proto *l3proto;
 
-	l3proto = nf_ct_find_l3proto(orig->src.l3num);
+	l3proto = __nf_ct_l3proto_find(orig->src.l3num);
 	return __nf_conntrack_alloc(orig, repl, l3proto);
 }
 
@@ -853,7 +973,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		nf_conntrack_get(&conntrack->master->ct_general);
 		NF_CT_STAT_INC(expect_new);
 	} else {
-		conntrack->helper = nf_ct_find_helper(&repl_tuple);
+		conntrack->helper = __nf_ct_helper_find(&repl_tuple);
 
 		NF_CT_STAT_INC(new);
         }
@@ -947,13 +1067,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 		return NF_ACCEPT;
 	}
 
-	l3proto = nf_ct_find_l3proto((u_int16_t)pf);
+	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
 	if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
 		DEBUGP("not prepared to track yet or error occured\n");
 		return -ret;
 	}
 
-	proto = nf_ct_find_proto((u_int16_t)pf, protonum);
+	proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
 
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
@@ -1002,9 +1122,9 @@ int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 			 const struct nf_conntrack_tuple *orig)
 {
 	return nf_ct_invert_tuple(inverse, orig,
-				  nf_ct_find_l3proto(orig->src.l3num),
-				  nf_ct_find_proto(orig->src.l3num,
-						   orig->dst.protonum));
+				  __nf_ct_l3proto_find(orig->src.l3num),
+				  __nf_ct_proto_find(orig->src.l3num,
+						     orig->dst.protonum));
 }
 
 /* Would two expected things clash? */
@@ -1096,6 +1216,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
 	exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
 	add_timer(&exp->timeout);
 
+	exp->id = ++nf_conntrack_expect_next_id;
 	atomic_inc(&exp->use);
 	NF_CT_STAT_INC(expect_create);
 }
@@ -1129,6 +1250,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 {
 	struct nf_conntrack_expect *i;
+	struct nf_conn *master = expect->master;
 	int ret;
 
 	DEBUGP("nf_conntrack_expect_related %p\n", related_to);
@@ -1149,9 +1271,9 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 		}
 	}
 	/* Will be over limit? */
-	if (expect->master->helper->max_expected && 
-	    expect->master->expecting >= expect->master->helper->max_expected)
-		evict_oldest_expect(expect->master);
+	if (master->helper->max_expected && 
+	    master->expecting >= master->helper->max_expected)
+		evict_oldest_expect(master);
 
 	nf_conntrack_expect_insert(expect);
 	nf_conntrack_expect_event(IPEXP_NEW, expect);
@@ -1175,7 +1297,7 @@ void nf_conntrack_alter_reply(struct nf_conn *conntrack,
 
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
 	if (!conntrack->master && conntrack->expecting == 0)
-		conntrack->helper = nf_ct_find_helper(newreply);
+		conntrack->helper = __nf_ct_helper_find(newreply);
 	write_unlock_bh(&nf_conntrack_lock);
 }
 
@@ -1200,6 +1322,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 	return 0;
 }
 
+struct nf_conntrack_helper *
+__nf_conntrack_helper_find_byname(const char *name)
+{
+	struct nf_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (!strcmp(h->name, name))
+			return h;
+	}
+
+	return NULL;
+}
+
 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 			 const struct nf_conntrack_helper *me)
 {
@@ -1283,6 +1418,51 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 		nf_conntrack_event_cache(event, skb);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
+ * in ip_conntrack_core, since we don't want the protocols to autoload
+ * or depend on ctnetlink */
+int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+			       const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+		&tuple->src.u.tcp.port);
+	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+		&tuple->dst.u.tcp.port);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_SRC_PORT-1]  = sizeof(u_int16_t),
+	[CTA_PROTO_DST_PORT-1]  = sizeof(u_int16_t)
+};
+
+int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+			       struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	t->src.u.tcp.port =
+		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+	t->dst.u.tcp.port =
+		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+
+	return 0;
+}
+#endif
+
 /* Used by ipt_REJECT and ip6t_REJECT. */
 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 {
@@ -1365,6 +1545,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
 			   get_order(sizeof(struct list_head) * size));
 }
 
+void nf_conntrack_flush()
+{
+	nf_ct_iterate_cleanup(kill_all, NULL);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void nf_conntrack_cleanup(void)
@@ -1378,7 +1563,7 @@ void nf_conntrack_cleanup(void)
 
 	nf_ct_event_cache_flush();
  i_see_dead_people:
-	nf_ct_iterate_cleanup(kill_all, NULL);
+	nf_conntrack_flush();
 	if (atomic_read(&nf_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 65080e269f2..d5a6eaf4a1d 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -44,7 +44,7 @@ static unsigned int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 
 static int loose;
-module_param(loose, int, 0600);
+module_param(loose, bool, 0600);
 
 unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
 				enum ip_conntrack_info ctinfo,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
new file mode 100644
index 00000000000..73ab16bc7d4
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -0,0 +1,1653 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * I've reworked this stuff to use attributes instead of conntrack 
+ * structures. 5.44 am. I need more tea. --pablo 05/07/11.
+ *
+ * Initial connection tracking via netlink development funded and 
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+MODULE_LICENSE("GPL");
+
+static char __initdata version[] = "0.92";
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+
+static inline int
+ctnetlink_dump_tuples_proto(struct sk_buff *skb, 
+			    const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_protocol *proto;
+	int ret = 0;
+
+	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+
+	/* If no protocol helper is found, this function will return the
+	 * generic protocol helper, so proto won't *ever* be NULL */
+	proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+	if (likely(proto->tuple_to_nfattr))
+		ret = proto->tuple_to_nfattr(skb, tuple);
+	
+	nf_ct_proto_put(proto);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_tuples(struct sk_buff *skb, 
+		      const struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *nest_parms;
+	struct nf_conntrack_l3proto *l3proto;
+	int ret = 0;
+	
+	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
+	if (likely(l3proto->tuple_to_nfattr))
+		ret = l3proto->tuple_to_nfattr(skb, tuple);
+	NFA_NEST_END(skb, nest_parms);
+
+	nf_ct_l3proto_put(l3proto);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
+	ret = ctnetlink_dump_tuples_proto(skb, tuple);
+	NFA_NEST_END(skb, nest_parms);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t status = htonl((u_int32_t) ct->status);
+	NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	long timeout_l = ct->timeout.expires - jiffies;
+	u_int32_t timeout;
+
+	if (timeout_l < 0)
+		timeout = 0;
+	else
+		timeout = htonl(timeout_l / HZ);
+	
+	NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+	struct nfattr *nest_proto;
+	int ret;
+
+	if (!proto->to_nfattr) {
+		nf_ct_proto_put(proto);
+		return 0;
+	}
+	
+	nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
+
+	ret = proto->to_nfattr(skb, nest_proto, ct);
+
+	nf_ct_proto_put(proto);
+
+	NFA_NEST_END(skb, nest_proto);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nfattr *nest_helper;
+
+	if (!ct->helper)
+		return 0;
+		
+	nest_helper = NFA_NEST(skb, CTA_HELP);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
+
+	if (ct->helper->to_nfattr)
+		ct->helper->to_nfattr(skb, ct);
+
+	NFA_NEST_END(skb, nest_helper);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static inline int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
+			enum ip_conntrack_dir dir)
+{
+	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+	struct nfattr *nest_count = NFA_NEST(skb, type);
+	u_int32_t tmp;
+
+	tmp = htonl(ct->counters[dir].packets);
+	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
+
+	tmp = htonl(ct->counters[dir].bytes);
+	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+
+	NFA_NEST_END(skb, nest_count);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_counters(a, b, c) (0)
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+static inline int
+ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t mark = htonl(ct->mark);
+
+	NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b) (0)
+#endif
+
+static inline int
+ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t id = htonl(ct->id);
+	NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
+	
+	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+		    int event, int nowait, 
+		    const struct nf_conn *ct)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nfattr *nest_parms;
+	unsigned char *b;
+
+	b = skb->tail;
+
+	event |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+	nfmsg  = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+	nfmsg->nfgen_family = 
+		ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	nfmsg->version      = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+
+	if (ctnetlink_dump_status(skb, ct) < 0 ||
+	    ctnetlink_dump_timeout(skb, ct) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_mark(skb, ct) < 0 ||
+	    ctnetlink_dump_id(skb, ct) < 0 ||
+	    ctnetlink_dump_use(skb, ct) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+                                     unsigned long events, void *ptr)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nfattr *nest_parms;
+	struct nf_conn *ct = (struct nf_conn *)ptr;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned char *b;
+	unsigned int flags = 0, group;
+
+	/* ignore our fake conntrack entry */
+	if (ct == &nf_conntrack_untracked)
+		return NOTIFY_DONE;
+
+	if (events & IPCT_DESTROY) {
+		type = IPCTNL_MSG_CT_DELETE;
+		group = NFNLGRP_CONNTRACK_DESTROY;
+	} else  if (events & (IPCT_NEW | IPCT_RELATED)) {
+		type = IPCTNL_MSG_CT_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+		/* dump everything */
+		events = ~0UL;
+		group = NFNLGRP_CONNTRACK_NEW;
+	} else  if (events & (IPCT_STATUS |
+		      IPCT_PROTOINFO |
+		      IPCT_HELPER |
+		      IPCT_HELPINFO |
+		      IPCT_NATINFO)) {
+		type = IPCTNL_MSG_CT_NEW;
+		group = NFNLGRP_CONNTRACK_UPDATE;
+	} else
+		return NOTIFY_DONE;
+	
+  /* FIXME: Check if there are any listeners before, don't hurt performance */
+	
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return NOTIFY_DONE;
+
+	b = skb->tail;
+
+	type |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nfmsg = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = flags;
+	nfmsg->nfgen_family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	nfmsg->version	= NFNETLINK_V0;
+	nfmsg->res_id	= 0;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	/* NAT stuff is now a status flag */
+	if ((events & IPCT_STATUS || events & IPCT_NATINFO)
+	    && ctnetlink_dump_status(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_REFRESH
+	    && ctnetlink_dump_timeout(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_PROTOINFO
+	    && ctnetlink_dump_protoinfo(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_HELPINFO
+	    && ctnetlink_dump_helpinfo(skb, ct) < 0)
+		goto nfattr_failure;
+
+	if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nfnetlink_send(skb, 0, group, 0);
+	return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+	kfree_skb(skb);
+	return NOTIFY_DONE;
+}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+	DEBUGP("entered %s\n", __FUNCTION__);
+	return 0;
+}
+
+#define L3PROTO(ct) ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
+
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conn *ct = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[1];
+	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, 
+			cb->args[0], *id);
+
+	read_lock_bh(&nf_conntrack_lock);
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
+		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
+			h = (struct nf_conntrack_tuple_hash *) i;
+			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			/* Dump entries of a given L3 protocol number.
+			 * If it is not specified, ie. l3proto == 0,
+			 * then dump everything. */
+			if (l3proto && L3PROTO(ct) != l3proto)
+				continue;
+			if (ct->id <= *id)
+				continue;
+			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		                        	cb->nlh->nlmsg_seq,
+						IPCTNL_MSG_CT_NEW,
+						1, ct) < 0)
+				goto out;
+			*id = ct->id;
+		}
+	}
+out:	
+	read_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+	return skb->len;
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static int
+ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conn *ct = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[1];
+	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;	
+
+	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
+			cb->args[0], *id);
+
+	write_lock_bh(&nf_conntrack_lock);
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
+		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
+			h = (struct nf_conntrack_tuple_hash *) i;
+			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (l3proto && L3PROTO(ct) != l3proto)
+				continue;
+			if (ct->id <= *id)
+				continue;
+			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		                        	cb->nlh->nlmsg_seq,
+						IPCTNL_MSG_CT_NEW,
+						1, ct) < 0)
+				goto out;
+			*id = ct->id;
+
+			memset(&ct->counters, 0, sizeof(ct->counters));
+		}
+	}
+out:	
+	write_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+	return skb->len;
+}
+#endif
+
+static inline int
+ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *tb[CTA_IP_MAX];
+	struct nf_conntrack_l3proto *l3proto;
+	int ret = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_IP_MAX, attr);
+
+	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+
+	if (likely(l3proto->nfattr_to_tuple))
+		ret = l3proto->nfattr_to_tuple(tb, tuple);
+
+	nf_ct_l3proto_put(l3proto);
+
+	DEBUGP("leaving\n");
+
+	return ret;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
+};
+
+static inline int
+ctnetlink_parse_tuple_proto(struct nfattr *attr, 
+			    struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *tb[CTA_PROTO_MAX];
+	struct nf_conntrack_protocol *proto;
+	int ret = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	if (!tb[CTA_PROTO_NUM-1])
+		return -EINVAL;
+	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+
+	proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+
+	if (likely(proto->nfattr_to_tuple))
+		ret = proto->nfattr_to_tuple(tb, tuple);
+
+	nf_ct_proto_put(proto);
+	
+	return ret;
+}
+
+static inline int
+ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple,
+		      enum ctattr_tuple type, u_int8_t l3num)
+{
+	struct nfattr *tb[CTA_TUPLE_MAX];
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	memset(tuple, 0, sizeof(*tuple));
+
+	nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
+
+	if (!tb[CTA_TUPLE_IP-1])
+		return -EINVAL;
+
+	tuple->src.l3num = l3num;
+
+	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_TUPLE_PROTO-1])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
+	if (err < 0)
+		return err;
+
+	/* orig and expect tuples get DIR_ORIGINAL */
+	if (type == CTA_TUPLE_REPLY)
+		tuple->dst.dir = IP_CT_DIR_REPLY;
+	else
+		tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+	NF_CT_DUMP_TUPLE(tuple);
+
+	DEBUGP("leaving\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
+	[CTA_PROTONAT_PORT_MIN-1]       = sizeof(u_int16_t),
+	[CTA_PROTONAT_PORT_MAX-1]       = sizeof(u_int16_t),
+};
+
+static int ctnetlink_parse_nat_proto(struct nfattr *attr,
+				     const struct nf_conn *ct,
+				     struct ip_nat_range *range)
+{
+	struct nfattr *tb[CTA_PROTONAT_MAX];
+	struct ip_nat_protocol *npt;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
+		return -EINVAL;
+
+	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+
+	if (!npt->nfattr_to_range) {
+		ip_nat_proto_put(npt);
+		return 0;
+	}
+
+	/* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
+	if (npt->nfattr_to_range(tb, range) > 0)
+		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+
+	ip_nat_proto_put(npt);
+
+	DEBUGP("leaving\n");
+	return 0;
+}
+
+static const size_t cta_min_nat[CTA_NAT_MAX] = {
+	[CTA_NAT_MINIP-1]       = sizeof(u_int32_t),
+	[CTA_NAT_MAXIP-1]       = sizeof(u_int32_t),
+};
+
+static inline int
+ctnetlink_parse_nat(struct nfattr *cda[],
+		    const struct nf_conn *ct, struct ip_nat_range *range)
+{
+	struct nfattr *tb[CTA_NAT_MAX];
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	memset(range, 0, sizeof(*range));
+	
+	nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
+
+	if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
+		return -EINVAL;
+
+	if (tb[CTA_NAT_MINIP-1])
+		range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+
+	if (!tb[CTA_NAT_MAXIP-1])
+		range->max_ip = range->min_ip;
+	else
+		range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+
+	if (range->min_ip)
+		range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+	if (!tb[CTA_NAT_PROTO-1])
+		return 0;
+
+	err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
+	if (err < 0)
+		return err;
+
+	DEBUGP("leaving\n");
+	return 0;
+}
+#endif
+
+static inline int
+ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
+{
+	struct nfattr *tb[CTA_HELP_MAX];
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
+
+	if (!tb[CTA_HELP_NAME-1])
+		return -EINVAL;
+
+	*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
+
+	return 0;
+}
+
+static const size_t cta_min[CTA_MAX] = {
+	[CTA_STATUS-1] 		= sizeof(u_int32_t),
+	[CTA_TIMEOUT-1] 	= sizeof(u_int32_t),
+	[CTA_MARK-1]		= sizeof(u_int32_t),
+	[CTA_USE-1]		= sizeof(u_int32_t),
+	[CTA_ID-1]		= sizeof(u_int32_t)
+};
+
+static int
+ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+	else {
+		/* Flush the whole table */
+		nf_conntrack_flush();
+		return 0;
+	}
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		DEBUGP("tuple not found in conntrack hash\n");
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	
+	if (cda[CTA_ID-1]) {
+		u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+		if (ct->id != id) {
+			nf_ct_put(ct);
+			return -ENOENT;
+		}
+	}	
+	if (del_timer(&ct->timeout))
+		ct->timeout.function((unsigned long)ct);
+
+	nf_ct_put(ct);
+	DEBUGP("leaving\n");
+
+	return 0;
+}
+
+static int
+ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	struct sk_buff *skb2 = NULL;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		u32 rlen;
+
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
+					IPCTNL_MSG_CT_GET_CTRZERO) {
+#ifdef CONFIG_NF_CT_ACCT
+			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+						ctnetlink_dump_table_w,
+						ctnetlink_done)) != 0)
+				return -EINVAL;
+#else
+			return -ENOTSUPP;
+#endif
+		} else {
+			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+		      		                        ctnetlink_dump_table,
+		                                	ctnetlink_done)) != 0)
+			return -EINVAL;
+		}
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return 0;
+	}
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		DEBUGP("tuple not found in conntrack hash");
+		return -ENOENT;
+	}
+	DEBUGP("tuple found\n");
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	err = -ENOMEM;
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2) {
+		nf_ct_put(ct);
+		return -ENOMEM;
+	}
+	NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+
+	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 
+				  IPCTNL_MSG_CT_NEW, 1, ct);
+	nf_ct_put(ct);
+	if (err <= 0)
+		goto free;
+
+	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	if (err < 0)
+		goto out;
+
+	DEBUGP("leaving\n");
+	return 0;
+
+free:
+	kfree_skb(skb2);
+out:
+	return err;
+}
+
+static inline int
+ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[])
+{
+	unsigned long d;
+	unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
+	d = ct->status ^ status;
+
+	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+		/* unchangeable */
+		return -EINVAL;
+	
+	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+		/* SEEN_REPLY bit can only be set */
+		return -EINVAL;
+
+	
+	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+		/* ASSURED bit can only be set */
+		return -EINVAL;
+
+	if (cda[CTA_NAT-1]) {
+#ifndef CONFIG_IP_NF_NAT_NEEDED
+		return -EINVAL;
+#else
+		unsigned int hooknum;
+		struct ip_nat_range range;
+
+		if (ctnetlink_parse_nat(cda, ct, &range) < 0)
+			return -EINVAL;
+
+		DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", 
+		       NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
+		       htons(range.min.all), htons(range.max.all));
+		
+		/* This is tricky but it works. ip_nat_setup_info needs the
+		 * hook number as parameter, so let's do the correct 
+		 * conversion and run away */
+		if (status & IPS_SRC_NAT_DONE)
+			hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
+		else if (status & IPS_DST_NAT_DONE)
+			hooknum = NF_IP_PRE_ROUTING;  /* IP_NAT_MANIP_DST */
+		else 
+			return -EINVAL; /* Missing NAT flags */
+
+		DEBUGP("NAT status: %lu\n", 
+		       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+		
+		if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			return -EEXIST;
+		ip_nat_setup_info(ct, &range, hooknum);
+
+                DEBUGP("NAT status after setup_info: %lu\n",
+                       ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+#endif
+	}
+
+	/* Be careful here, modifying NAT bits can screw up things,
+	 * so don't let users modify them directly if they don't pass
+	 * ip_nat_range. */
+	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+	return 0;
+}
+
+
+static inline int
+ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
+{
+	struct nf_conntrack_helper *helper;
+	char *helpname;
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	/* don't change helper of sibling connections */
+	if (ct->master)
+		return -EINVAL;
+
+	err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
+	if (err < 0)
+		return err;
+
+	helper = __nf_conntrack_helper_find_byname(helpname);
+	if (!helper) {
+		if (!strcmp(helpname, ""))
+			helper = NULL;
+		else
+			return -EINVAL;
+	}
+
+	if (ct->helper) {
+		if (!helper) {
+			/* we had a helper before ... */
+			nf_ct_remove_expectations(ct);
+			ct->helper = NULL;
+		} else {
+			/* need to zero data of old helper */
+			memset(&ct->help, 0, sizeof(ct->help));
+		}
+	}
+	
+	ct->helper = helper;
+
+	return 0;
+}
+
+static inline int
+ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[])
+{
+	u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+	
+	if (!del_timer(&ct->timeout))
+		return -ETIME;
+
+	ct->timeout.expires = jiffies + timeout * HZ;
+	add_timer(&ct->timeout);
+
+	return 0;
+}
+
+static inline int
+ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[])
+{
+	struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
+	struct nf_conntrack_protocol *proto;
+	u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+	u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	int err = 0;
+
+	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
+
+	proto = nf_ct_proto_find_get(l3num, npt);
+
+	if (proto->from_nfattr)
+		err = proto->from_nfattr(tb, ct);
+	nf_ct_proto_put(proto); 
+
+	return err;
+}
+
+static int
+ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[])
+{
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (cda[CTA_HELP-1]) {
+		err = ctnetlink_change_helper(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TIMEOUT-1]) {
+		err = ctnetlink_change_timeout(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_STATUS-1]) {
+		err = ctnetlink_change_status(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_PROTOINFO-1]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
+	DEBUGP("all done\n");
+	return 0;
+}
+
+static int
+ctnetlink_create_conntrack(struct nfattr *cda[], 
+			   struct nf_conntrack_tuple *otuple,
+			   struct nf_conntrack_tuple *rtuple)
+{
+	struct nf_conn *ct;
+	int err = -EINVAL;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	ct = nf_conntrack_alloc(otuple, rtuple);
+	if (ct == NULL || IS_ERR(ct))
+		return -ENOMEM;	
+
+	if (!cda[CTA_TIMEOUT-1])
+		goto err;
+	ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+
+	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+	ct->status |= IPS_CONFIRMED;
+
+	err = ctnetlink_change_status(ct, cda);
+	if (err < 0)
+		goto err;
+
+	if (cda[CTA_PROTOINFO-1]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
+	ct->helper = nf_ct_helper_find_get(rtuple);
+
+	add_timer(&ct->timeout);
+	nf_conntrack_hash_insert(ct);
+
+	if (ct->helper)
+		nf_ct_helper_put(ct->helper);
+
+	DEBUGP("conntrack with id %u inserted\n", ct->id);
+	return 0;
+
+err:	
+	nf_conntrack_free(ct);
+	return err;
+}
+
+static int 
+ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1]) {
+		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TUPLE_REPLY-1]) {
+		err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+		if (err < 0)
+			return err;
+	}
+
+	write_lock_bh(&nf_conntrack_lock);
+	if (cda[CTA_TUPLE_ORIG-1])
+		h = __nf_conntrack_find(&otuple, NULL);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		h = __nf_conntrack_find(&rtuple, NULL);
+
+	if (h == NULL) {
+		write_unlock_bh(&nf_conntrack_lock);
+		DEBUGP("no such conntrack, create new\n");
+		err = -ENOENT;
+		if (nlh->nlmsg_flags & NLM_F_CREATE)
+			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+		return err;
+	}
+	/* implicit 'else' */
+
+	/* we only allow nat config for new conntracks */
+	if (cda[CTA_NAT-1]) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* We manipulate the conntrack inside the global conntrack table lock,
+	 * so there's no need to increase the refcount */
+	DEBUGP("conntrack found\n");
+	err = -EEXIST;
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda);
+
+out_unlock:
+	write_unlock_bh(&nf_conntrack_lock);
+	return err;
+}
+
+/*********************************************************************** 
+ * EXPECT 
+ ***********************************************************************/ 
+
+static inline int
+ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+			 const struct nf_conntrack_tuple *tuple,
+			 enum ctattr_expect type)
+{
+	struct nfattr *nest_parms = NFA_NEST(skb, type);
+	
+	if (ctnetlink_dump_tuples(skb, tuple) < 0)
+		goto nfattr_failure;
+
+	NFA_NEST_END(skb, nest_parms);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}			
+
+static inline int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+                          const struct nf_conntrack_expect *exp)
+{
+	struct nf_conn *master = exp->master;
+	u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+	u_int32_t id = htonl(exp->id);
+
+	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+		goto nfattr_failure;
+	if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
+		goto nfattr_failure;
+	if (ctnetlink_exp_dump_tuple(skb,
+				 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				 CTA_EXPECT_MASTER) < 0)
+		goto nfattr_failure;
+	
+	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
+	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+
+	return 0;
+	
+nfattr_failure:
+	return -1;
+}
+
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+		    int event, 
+		    int nowait, 
+		    const struct nf_conntrack_expect *exp)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned char *b;
+
+	b = skb->tail;
+
+	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+	nfmsg  = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+	nfmsg->nfgen_family = exp->tuple.src.l3num;
+	nfmsg->version	    = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_expect_event(struct notifier_block *this,
+				  unsigned long events, void *ptr)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned char *b;
+	int flags = 0;
+
+	if (events & IPEXP_NEW) {
+		type = IPCTNL_MSG_EXP_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+	} else
+		return NOTIFY_DONE;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return NOTIFY_DONE;
+
+	b = skb->tail;
+
+	type |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nfmsg = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = flags;
+	nfmsg->nfgen_family = exp->tuple.src.l3num;
+	nfmsg->version	    = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+	return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+	kfree_skb(skb);
+	return NOTIFY_DONE;
+}
+#endif
+
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_expect *exp = NULL;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[0];
+	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+
+	read_lock_bh(&nf_conntrack_lock);
+	list_for_each_prev(i, &nf_conntrack_expect_list) {
+		exp = (struct nf_conntrack_expect *) i;
+		if (l3proto && exp->tuple.src.l3num != l3proto)
+			continue;
+		if (exp->id <= *id)
+			continue;
+		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    IPCTNL_MSG_EXP_NEW,
+					    1, exp) < 0)
+			goto out;
+		*id = exp->id;
+	}
+out:	
+	read_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last id=%llu\n", *id);
+
+	return skb->len;
+}
+
+static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
+	[CTA_EXPECT_TIMEOUT-1]          = sizeof(u_int32_t),
+	[CTA_EXPECT_ID-1]               = sizeof(u_int32_t)
+};
+
+static int
+ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct sk_buff *skb2;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		u32 rlen;
+
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+		    				ctnetlink_exp_dump_table,
+						ctnetlink_done)) != 0)
+			return -EINVAL;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return 0;
+	}
+
+	if (cda[CTA_EXPECT_MASTER-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	exp = nf_conntrack_expect_find(&tuple);
+	if (!exp)
+		return -ENOENT;
+
+	if (cda[CTA_EXPECT_ID-1]) {
+		u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+		if (exp->id != ntohl(id)) {
+			nf_conntrack_expect_put(exp);
+			return -ENOENT;
+		}
+	}	
+
+	err = -ENOMEM;
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		goto out;
+	NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+	
+	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 
+				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+				      1, exp);
+	if (err <= 0)
+		goto free;
+
+	nf_conntrack_expect_put(exp);
+
+	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+free:
+	kfree_skb(skb2);
+out:
+	nf_conntrack_expect_put(exp);
+	return err;
+}
+
+static int
+ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, 
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_expect *exp, *tmp;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_helper *h;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err;
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (cda[CTA_EXPECT_TUPLE-1]) {
+		/* delete a single expect by tuple */
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+		if (err < 0)
+			return err;
+
+		/* bump usage count to 2 */
+		exp = nf_conntrack_expect_find(&tuple);
+		if (!exp)
+			return -ENOENT;
+
+		if (cda[CTA_EXPECT_ID-1]) {
+			u_int32_t id = 
+				*(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+			if (exp->id != ntohl(id)) {
+				nf_conntrack_expect_put(exp);
+				return -ENOENT;
+			}
+		}
+
+		/* after list removal, usage count == 1 */
+		nf_conntrack_unexpect_related(exp);
+		/* have to put what we 'get' above. 
+		 * after this line usage count == 0 */
+		nf_conntrack_expect_put(exp);
+	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
+		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
+
+		/* delete all expectations for this helper */
+		write_lock_bh(&nf_conntrack_lock);
+		h = __nf_conntrack_helper_find_byname(name);
+		if (!h) {
+			write_unlock_bh(&nf_conntrack_lock);
+			return -EINVAL;
+		}
+		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
+					 list) {
+			if (exp->master->helper == h 
+			    && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_conntrack_expect_put(exp);
+			}
+		}
+		write_unlock_bh(&nf_conntrack_lock);
+	} else {
+		/* This basically means we have to flush everything*/
+		write_lock_bh(&nf_conntrack_lock);
+		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
+					 list) {
+			if (del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_conntrack_expect_put(exp);
+			}
+		}
+		write_unlock_bh(&nf_conntrack_lock);
+	}
+
+	return 0;
+}
+static int
+ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[])
+{
+	return -EOPNOTSUPP;
+}
+
+static int
+ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
+{
+	struct nf_conntrack_tuple tuple, mask, master_tuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn *ct;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	/* caller guarantees that those three CTA_EXPECT_* exist */
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+	if (err < 0)
+		return err;
+
+	/* Look for master conntrack of this expectation */
+	h = nf_conntrack_find_get(&master_tuple, NULL);
+	if (!h)
+		return -ENOENT;
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	if (!ct->helper) {
+		/* such conntrack hasn't got any helper, abort */
+		err = -EINVAL;
+		goto out;
+	}
+
+	exp = nf_conntrack_expect_alloc(ct);
+	if (!exp) {
+		err = -ENOMEM;
+		goto out;
+	}
+	
+	exp->expectfn = NULL;
+	exp->flags = 0;
+	exp->master = ct;
+	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
+	memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
+
+	err = nf_conntrack_expect_related(exp);
+	nf_conntrack_expect_put(exp);
+
+out:	
+	nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+	return err;
+}
+
+static int
+ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);	
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (!cda[CTA_EXPECT_TUPLE-1]
+	    || !cda[CTA_EXPECT_MASK-1]
+	    || !cda[CTA_EXPECT_MASTER-1])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+
+	write_lock_bh(&nf_conntrack_lock);
+	exp = __nf_conntrack_expect_find(&tuple);
+
+	if (!exp) {
+		write_unlock_bh(&nf_conntrack_lock);
+		err = -ENOENT;
+		if (nlh->nlmsg_flags & NLM_F_CREATE)
+			err = ctnetlink_create_expect(cda, u3);
+		return err;
+	}
+
+	err = -EEXIST;
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_expect(exp, cda);
+	write_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving\n");
+	
+	return err;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static struct notifier_block ctnl_notifier = {
+	.notifier_call	= ctnetlink_conntrack_event,
+};
+
+static struct notifier_block ctnl_notifier_exp = {
+	.notifier_call	= ctnetlink_expect_event,
+};
+#endif
+
+static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
+					    .attr_count = CTA_MAX, },
+};
+
+static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+};
+
+static struct nfnetlink_subsystem ctnl_subsys = {
+	.name				= "conntrack",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK,
+	.cb_count			= IPCTNL_MSG_MAX,
+	.cb				= ctnl_cb,
+};
+
+static struct nfnetlink_subsystem ctnl_exp_subsys = {
+	.name				= "conntrack_expect",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK_EXP,
+	.cb_count			= IPCTNL_MSG_EXP_MAX,
+	.cb				= ctnl_exp_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+
+static int __init ctnetlink_init(void)
+{
+	int ret;
+
+	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+	ret = nfnetlink_subsys_register(&ctnl_subsys);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+
+	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+		goto err_unreg_subsys;
+	}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	ret = nf_conntrack_register_notifier(&ctnl_notifier);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register notifier.\n");
+		goto err_unreg_exp_subsys;
+	}
+
+	ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot expect register notifier.\n");
+		goto err_unreg_notifier;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+	nf_conntrack_unregister_notifier(&ctnl_notifier);
+err_unreg_exp_subsys:
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+#endif
+err_unreg_subsys:
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+	return ret;
+}
+
+static void __exit ctnetlink_exit(void)
+{
+	printk("ctnetlink: unregistering from nfnetlink.\n");
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
+	nf_conntrack_unregister_notifier(&ctnl_notifier);
+#endif
+
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+	return;
+}
+
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 6035633d822..6167137a5cb 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1147,6 +1147,63 @@ static int tcp_new(struct nf_conn *conntrack,
 		receiver->td_scale);
 	return 1;
 }
+
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+			 const struct nf_conn *ct)
+{
+	struct nfattr *nest_parms;
+	
+	read_lock_bh(&tcp_lock);
+	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
+		&ct->proto.tcp.state);
+	read_unlock_bh(&tcp_lock);
+
+	NFA_NEST_END(skb, nest_parms);
+
+	return 0;
+
+nfattr_failure:
+	read_unlock_bh(&tcp_lock);
+	return -1;
+}
+
+static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
+	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+};
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
+{
+	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+	/* updates could not contain anything about the private
+	 * protocol info, in that case skip the parsing */
+	if (!attr)
+		return 0;
+
+        nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
+		return -EINVAL;
+
+	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+		return -EINVAL;
+
+	write_lock_bh(&tcp_lock);
+	ct->proto.tcp.state = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+	write_unlock_bh(&tcp_lock);
+
+	return 0;
+}
+#endif
   
 struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
 {
@@ -1160,6 +1217,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
 	.packet 		= tcp_packet,
 	.new 			= tcp_new,
 	.error			= tcp_error4,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
@@ -1174,6 +1238,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
 	.packet 		= tcp_packet,
 	.new 			= tcp_new,
 	.error			= tcp_error6,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3cae7ce420d..1a592a55618 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -196,6 +196,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error4,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
@@ -210,6 +215,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error6,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5af381f9fe3..d17e42b28c7 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -161,14 +161,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (NF_CT_DIRECTION(hash))
 		return 0;
 
-	l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				     .tuple.src.l3num);
+	l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				       .tuple.src.l3num);
 
 	NF_CT_ASSERT(l3proto);
-	proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				 .tuple.src.l3num,
-				 conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				 .tuple.dst.protonum);
+	proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				   .tuple.src.l3num,
+				   conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				   .tuple.dst.protonum);
 	NF_CT_ASSERT(proto);
 
 	if (seq_printf(s, "%-8s %u %-8s %u %ld ",
@@ -307,9 +307,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    nf_ct_find_l3proto(expect->tuple.src.l3num),
-		    nf_ct_find_proto(expect->tuple.src.l3num,
-				     expect->tuple.dst.protonum));
+		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
+		    __nf_ct_proto_find(expect->tuple.src.l3num,
+				       expect->tuple.dst.protonum));
 	return seq_putc(s, '\n');
 }
 
@@ -847,7 +847,11 @@ EXPORT_SYMBOL(nf_conntrack_helper_unregister);
 EXPORT_SYMBOL(nf_ct_iterate_cleanup);
 EXPORT_SYMBOL(__nf_ct_refresh_acct);
 EXPORT_SYMBOL(nf_ct_protos);
-EXPORT_SYMBOL(nf_ct_find_proto);
+EXPORT_SYMBOL(__nf_ct_proto_find);
+EXPORT_SYMBOL(nf_ct_proto_find_get);
+EXPORT_SYMBOL(nf_ct_proto_put);
+EXPORT_SYMBOL(nf_ct_l3proto_find_get);
+EXPORT_SYMBOL(nf_ct_l3proto_put);
 EXPORT_SYMBOL(nf_ct_l3protos);
 EXPORT_SYMBOL(nf_conntrack_expect_alloc);
 EXPORT_SYMBOL(nf_conntrack_expect_put);
@@ -867,3 +871,21 @@ EXPORT_SYMBOL(nf_ct_get_tuple);
 EXPORT_SYMBOL(nf_ct_invert_tuple);
 EXPORT_SYMBOL(nf_conntrack_in);
 EXPORT_SYMBOL(__nf_conntrack_attach);
+EXPORT_SYMBOL(nf_conntrack_alloc);
+EXPORT_SYMBOL(nf_conntrack_free);
+EXPORT_SYMBOL(nf_conntrack_flush);
+EXPORT_SYMBOL(nf_ct_remove_expectations);
+EXPORT_SYMBOL(nf_ct_helper_find_get);
+EXPORT_SYMBOL(nf_ct_helper_put);
+EXPORT_SYMBOL(__nf_conntrack_helper_find_byname);
+EXPORT_SYMBOL(__nf_conntrack_find);
+EXPORT_SYMBOL(nf_ct_unlink_expect);
+EXPORT_SYMBOL(nf_conntrack_hash_insert);
+EXPORT_SYMBOL(__nf_conntrack_expect_find);
+EXPORT_SYMBOL(nf_conntrack_expect_find);
+EXPORT_SYMBOL(nf_conntrack_expect_list);
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr);
+EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple);
+#endif
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cba63729313..e10512e229b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -151,7 +151,7 @@ instance_create(u_int16_t group_num, int pid)
 		goto out_unlock;
 
 	INIT_HLIST_NODE(&inst->hlist);
-	inst->lock = SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&inst->lock);
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f28460b61e4..18ed9c5d209 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -148,7 +148,7 @@ instance_create(u_int16_t queue_num, int pid)
 	atomic_set(&inst->id_sequence, 0);
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
-	inst->lock = SPIN_LOCK_UNLOCKED;
+	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
 
 	if (!try_module_get(THIS_MODULE))
@@ -345,6 +345,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	struct nfqnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
+	struct nf_info *entinf = entry->info;
+	struct sk_buff *entskb = entry->skb;
+	struct net_device *indev;
+	struct net_device *outdev;
 	unsigned int tmp_uint;
 
 	QDEBUG("entered\n");
@@ -361,6 +365,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		+ NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw))
 		+ NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp));
 
+	outdev = entinf->outdev;
+
 	spin_lock_bh(&queue->lock);
 	
 	switch (queue->copy_mode) {
@@ -370,15 +376,15 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 	
 	case NFQNL_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if (entskb->ip_summed == CHECKSUM_HW &&
+		    (*errp = skb_checksum_help(entskb,
+		                               outdev == NULL))) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
 		}
 		if (queue->copy_range == 0 
-		    || queue->copy_range > entry->skb->len)
-			data_len = entry->skb->len;
+		    || queue->copy_range > entskb->len)
+			data_len = entskb->len;
 		else
 			data_len = queue->copy_range;
 		
@@ -402,29 +408,30 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
 	nfmsg = NLMSG_DATA(nlh);
-	nfmsg->nfgen_family = entry->info->pf;
+	nfmsg->nfgen_family = entinf->pf;
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
 	pmsg.packet_id 		= htonl(entry->id);
-	pmsg.hw_protocol	= htons(entry->skb->protocol);
-	pmsg.hook		= entry->info->hook;
+	pmsg.hw_protocol	= htons(entskb->protocol);
+	pmsg.hook		= entinf->hook;
 
 	NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
 
-	if (entry->info->indev) {
-		tmp_uint = htonl(entry->info->indev->ifindex);
+	indev = entinf->indev;
+	if (indev) {
+		tmp_uint = htonl(indev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
 		NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
 #else
-		if (entry->info->pf == PF_BRIDGE) {
+		if (entinf->pf == PF_BRIDGE) {
 			/* Case 1: indev is physical input device, we need to
 			 * look for bridge group (when called from 
 			 * netfilter_bridge) */
 			NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 
 				&tmp_uint);
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex);
+			tmp_uint = htonl(indev->br_port->br->dev->ifindex);
 			NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
 				&tmp_uint);
 		} else {
@@ -432,9 +439,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			 * physical device (when called from ipv4) */
 			NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
 				&tmp_uint);
-			if (entry->skb->nf_bridge
-			    && entry->skb->nf_bridge->physindev) {
-				tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex);
+			if (entskb->nf_bridge
+			    && entskb->nf_bridge->physindev) {
+				tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex);
 				NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
 					sizeof(tmp_uint), &tmp_uint);
 			}
@@ -442,19 +449,19 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 	}
 
-	if (entry->info->outdev) {
-		tmp_uint = htonl(entry->info->outdev->ifindex);
+	if (outdev) {
+		tmp_uint = htonl(outdev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
 		NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
 #else
-		if (entry->info->pf == PF_BRIDGE) {
+		if (entinf->pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical output device, we need to
 			 * look for bridge group (when called from 
 			 * netfilter_bridge) */
 			NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
 				&tmp_uint);
 			/* this is the bridge group "brX" */
-			tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex);
+			tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
 			NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
 				&tmp_uint);
 		} else {
@@ -462,9 +469,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			 * physical output device (when called from ipv4) */
 			NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
 				&tmp_uint);
-			if (entry->skb->nf_bridge
-			    && entry->skb->nf_bridge->physoutdev) {
-				tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex);
+			if (entskb->nf_bridge
+			    && entskb->nf_bridge->physoutdev) {
+				tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex);
 				NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
 					sizeof(tmp_uint), &tmp_uint);
 			}
@@ -472,27 +479,27 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 	}
 
-	if (entry->skb->nfmark) {
-		tmp_uint = htonl(entry->skb->nfmark);
+	if (entskb->nfmark) {
+		tmp_uint = htonl(entskb->nfmark);
 		NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
 	}
 
-	if (entry->info->indev && entry->skb->dev
-	    && entry->skb->dev->hard_header_parse) {
+	if (indev && entskb->dev
+	    && entskb->dev->hard_header_parse) {
 		struct nfqnl_msg_packet_hw phw;
 
 		phw.hw_addrlen =
-			entry->skb->dev->hard_header_parse(entry->skb,
+			entskb->dev->hard_header_parse(entskb,
 			                                   phw.hw_addr);
 		phw.hw_addrlen = htons(phw.hw_addrlen);
 		NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
 	}
 
-	if (entry->skb->tstamp.off_sec) {
+	if (entskb->tstamp.off_sec) {
 		struct nfqnl_msg_packet_timestamp ts;
 
-		ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec);
+		ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
+		ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
 
 		NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
 	}
@@ -510,7 +517,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		nfa->nfa_type = NFQA_PAYLOAD;
 		nfa->nfa_len = size;
 
-		if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len))
+		if (skb_copy_bits(entskb, 0, NFA_DATA(nfa), data_len))
 			BUG();
 	}
 		
@@ -667,12 +674,14 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
 static int
 dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
 {
-	if (entry->info->indev)
-		if (entry->info->indev->ifindex == ifindex)
+	struct nf_info *entinf = entry->info;
+	
+	if (entinf->indev)
+		if (entinf->indev->ifindex == ifindex)
 			return 1;
 			
-	if (entry->info->outdev)
-		if (entry->info->outdev->ifindex == ifindex)
+	if (entinf->outdev)
+		if (entinf->outdev->ifindex == ifindex)
 			return 1;
 
 	return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 96020d7087e..7849cac14d3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -293,7 +293,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
 	return 0;
 }
 
-static struct proto_ops netlink_ops;
+static const struct proto_ops netlink_ops;
 
 static int netlink_insert(struct sock *sk, u32 pid)
 {
@@ -1656,7 +1656,7 @@ int netlink_unregister_notifier(struct notifier_block *nb)
 	return notifier_chain_unregister(&netlink_chain, nb);
 }
                 
-static struct proto_ops netlink_ops = {
+static const struct proto_ops netlink_ops = {
 	.family =	PF_NETLINK,
 	.owner =	THIS_MODULE,
 	.release =	netlink_release,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 287cfcc5695..3b1378498d5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -441,7 +441,7 @@ errout:
 }
 
 static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
-				      int seq, int cmd)
+				      int seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int err;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index e5d82d711ca..63b0e4afeb3 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -63,7 +63,7 @@ static unsigned short circuit = 0x101;
 static HLIST_HEAD(nr_list);
 static DEFINE_SPINLOCK(nr_list_lock);
 
-static struct proto_ops nr_proto_ops;
+static const struct proto_ops nr_proto_ops;
 
 /*
  *	Socket removal during an interrupt is now safe.
@@ -1166,10 +1166,11 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	int ret;
 
-	lock_sock(sk);
 	switch (cmd) {
 	case TIOCOUTQ: {
 		long amount;
+
+		lock_sock(sk);
 		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
 		if (amount < 0)
 			amount = 0;
@@ -1180,6 +1181,8 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case TIOCINQ: {
 		struct sk_buff *skb;
 		long amount = 0L;
+
+		lock_sock(sk);
 		/* These two are safe on a single CPU system as only user tasks fiddle here */
 		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
 			amount = skb->len;
@@ -1188,6 +1191,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	}
 
 	case SIOCGSTAMP:
+		lock_sock(sk);
 		ret = sock_get_timestamp(sk, argp);
 		release_sock(sk);
 		return ret;
@@ -1202,21 +1206,17 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFNETMASK:
 	case SIOCGIFMETRIC:
 	case SIOCSIFMETRIC:
-		release_sock(sk);
 		return -EINVAL;
 
 	case SIOCADDRT:
 	case SIOCDELRT:
 	case SIOCNRDECOBS:
-		release_sock(sk);
 		if (!capable(CAP_NET_ADMIN)) return -EPERM;
 		return nr_rt_ioctl(cmd, argp);
 
 	default:
-		release_sock(sk);
-		return dev_ioctl(cmd, argp);
+		return -ENOIOCTLCMD;
 	}
-	release_sock(sk);
 
 	return 0;
 }
@@ -1337,7 +1337,7 @@ static struct net_proto_family nr_family_ops = {
 	.owner		=	THIS_MODULE,
 };
 
-static struct proto_ops nr_proto_ops = {
+static const struct proto_ops nr_proto_ops = {
 	.family		=	PF_NETROM,
 	.owner		=	THIS_MODULE,
 	.release	=	nr_release,
diff --git a/net/nonet.c b/net/nonet.c
index e5241dceaa5..1230f0ae832 100644
--- a/net/nonet.c
+++ b/net/nonet.c
@@ -14,11 +14,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 
-void __init sock_init(void)
-{
-	printk(KERN_INFO "Linux NoNET1.0 for Linux 2.6\n");
-}
-
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 {
 	return -ENXIO;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3e246276041..f69e5ed9bd0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -251,10 +251,10 @@ static void packet_sock_destruct(struct sock *sk)
 }
 
 
-static struct proto_ops packet_ops;
+static const struct proto_ops packet_ops;
 
 #ifdef CONFIG_SOCK_PACKET
-static struct proto_ops packet_ops_spkt;
+static const struct proto_ops packet_ops_spkt;
 
 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
 {
@@ -1521,7 +1521,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 #endif
 
 		default:
-			return dev_ioctl(cmd, (void __user *)arg);
+			return -ENOIOCTLCMD;
 	}
 	return 0;
 }
@@ -1784,7 +1784,7 @@ out:
 
 
 #ifdef CONFIG_SOCK_PACKET
-static struct proto_ops packet_ops_spkt = {
+static const struct proto_ops packet_ops_spkt = {
 	.family =	PF_PACKET,
 	.owner =	THIS_MODULE,
 	.release =	packet_release,
@@ -1806,7 +1806,7 @@ static struct proto_ops packet_ops_spkt = {
 };
 #endif
 
-static struct proto_ops packet_ops = {
+static const struct proto_ops packet_ops = {
 	.family =	PF_PACKET,
 	.owner =	THIS_MODULE,
 	.release =	packet_release,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 829fdbc4400..63090be2315 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1320,7 +1320,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return 0;
 
 	default:
-		return dev_ioctl(cmd, argp);
+		return -ENOIOCTLCMD;
 	}
 
 	return 0;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 82fb07aa06a..ba528320483 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -25,7 +25,7 @@
 
 #include <net/pkt_sched.h>
 
-#define VERSION "1.1"
+#define VERSION "1.2"
 
 /*	Network Emulation Queuing algorithm.
 	====================================
@@ -65,11 +65,12 @@ struct netem_sched_data {
 	u32 jitter;
 	u32 duplicate;
 	u32 reorder;
+	u32 corrupt;
 
 	struct crndstate {
 		unsigned long last;
 		unsigned long rho;
-	} delay_cor, loss_cor, dup_cor, reorder_cor;
+	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
 	struct disttable {
 		u32  size;
@@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->duplicate = dupsave;
 	}
 
+	/*
+	 * Randomized packet corruption.
+	 * Make copy if needed since we are modifying
+	 * If packet is going to be hardware checksummed, then
+	 * do it now in software before we mangle it.
+	 */
+	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
+		    || (skb->ip_summed == CHECKSUM_HW
+			&& skb_checksum_help(skb, 0))) {
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
+
+		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
+	}
+
 	if (q->gap == 0 		/* not doing reordering */
 	    || q->counter < q->gap 	/* inside last reordering gap */
 	    || q->reorder < get_crandom(&q->reorder_cor)) {
@@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
 	return 0;
 }
 
+static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corrupt *r = RTA_DATA(attr);
+
+	if (RTA_PAYLOAD(attr) != sizeof(*r))
+		return -EINVAL;
+
+	q->corrupt = r->probability;
+	init_crandom(&q->corrupt_cor, r->correlation);
+	return 0;
+}
+
+/* Parse netlink message to set options */
 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 			if (ret)
 				return ret;
 		}
+
 		if (tb[TCA_NETEM_REORDER-1]) {
 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
 			if (ret)
 				return ret;
 		}
-	}
 
+		if (tb[TCA_NETEM_CORRUPT-1]) {
+			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
+			if (ret)
+				return ret;
+		}
+	}
 
 	return 0;
 }
@@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
+	struct tc_netem_corrupt corrupt;
 
 	qopt.latency = q->latency;
 	qopt.jitter = q->jitter;
@@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	reorder.correlation = q->reorder_cor.rho;
 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
 
+	corrupt.probability = q->corrupt;
+	corrupt.correlation = q->corrupt_cor.rho;
+	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+
 	rta->rta_len = skb->tail - b;
 
 	return skb->len;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6cf0342706b..c4a2a8c4c33 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -22,6 +22,7 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dec68a60477..9d05e13e92f 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -110,7 +110,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000;
 	asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
 					* 1000;
-	asoc->pmtu = 0;
 	asoc->frag_point = 0;
 
 	/* Set the association max_retrans and RTO values from the
@@ -123,6 +122,25 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	asoc->overall_error_count = 0;
 
+	/* Initialize the association's heartbeat interval based on the
+	 * sock configured value.
+	 */
+	asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
+
+	/* Initialize path max retrans value. */
+	asoc->pathmaxrxt = sp->pathmaxrxt;
+
+	/* Initialize default path MTU. */
+	asoc->pathmtu = sp->pathmtu;
+
+	/* Set association default SACK delay */
+	asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
+
+	/* Set the association default flags controlling
+	 * Heartbeat, SACK delay, and Path MTU Discovery.
+	 */
+	asoc->param_flags = sp->param_flags;
+
 	/* Initialize the maximum mumber of new data packets that can be sent
 	 * in a burst.
 	 */
@@ -144,8 +162,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 		= 5 * asoc->rto_max;
 
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
-	asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
-		SCTP_DEFAULT_TIMEOUT_SACK;
+	asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
 		sp->autoclose * HZ;
 	
@@ -540,23 +557,46 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 
 	sctp_transport_set_owner(peer, asoc);
 
+	/* Initialize the peer's heartbeat interval based on the
+	 * association configured value.
+	 */
+	peer->hbinterval = asoc->hbinterval;
+
+	/* Set the path max_retrans.  */
+	peer->pathmaxrxt = asoc->pathmaxrxt;
+
+	/* Initialize the peer's SACK delay timeout based on the
+	 * association configured value.
+	 */
+	peer->sackdelay = asoc->sackdelay;
+
+	/* Enable/disable heartbeat, SACK delay, and path MTU discovery
+	 * based on association setting.
+	 */
+	peer->param_flags = asoc->param_flags;
+
 	/* Initialize the pmtu of the transport. */
-	sctp_transport_pmtu(peer);
+	if (peer->param_flags & SPP_PMTUD_ENABLE)
+		sctp_transport_pmtu(peer);
+	else if (asoc->pathmtu)
+		peer->pathmtu = asoc->pathmtu;
+	else
+		peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 
 	/* If this is the first transport addr on this association,
 	 * initialize the association PMTU to the peer's PMTU.
 	 * If not and the current association PMTU is higher than the new
 	 * peer's PMTU, reset the association PMTU to the new peer's PMTU.
 	 */
-	if (asoc->pmtu)
-		asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu);
+	if (asoc->pathmtu)
+		asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu);
 	else
-		asoc->pmtu = peer->pmtu;
+		asoc->pathmtu = peer->pathmtu;
 
 	SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to "
-			  "%d\n", asoc, asoc->pmtu);
+			  "%d\n", asoc, asoc->pathmtu);
 
-	asoc->frag_point = sctp_frag_point(sp, asoc->pmtu);
+	asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
 
 	/* The asoc->peer.port might not be meaningful yet, but
 	 * initialize the packet structure anyway.
@@ -574,7 +614,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	 *   (for example, implementations MAY use the size of the
 	 *   receiver advertised window).
 	 */
-	peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380));
+	peer->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
 
 	/* At this point, we may not have the receiver's advertised window,
 	 * so initialize ssthresh to the default value and it will be set
@@ -585,17 +625,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	peer->partial_bytes_acked = 0;
 	peer->flight_size = 0;
 
-	/* By default, enable heartbeat for peer address. */
-	peer->hb_allowed = 1;
-
-	/* Initialize the peer's heartbeat interval based on the
-	 * sock configured value.
-	 */
-	peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval);
-
-	/* Set the path max_retrans.  */
-	peer->max_retrans = sp->paddrparam.spp_pathmaxrxt;
-
 	/* Set the transport's RTO.initial value */
 	peer->rto = asoc->rto_initial;
 
@@ -1155,18 +1184,18 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 	/* Get the lowest pmtu of all the transports. */
 	list_for_each(pos, &asoc->peer.transport_addr_list) {
 		t = list_entry(pos, struct sctp_transport, transports);
-		if (!pmtu || (t->pmtu < pmtu))
-			pmtu = t->pmtu;
+		if (!pmtu || (t->pathmtu < pmtu))
+			pmtu = t->pathmtu;
 	}
 
 	if (pmtu) {
 		struct sctp_sock *sp = sctp_sk(asoc->base.sk);
-		asoc->pmtu = pmtu;
+		asoc->pathmtu = pmtu;
 		asoc->frag_point = sctp_frag_point(sp, pmtu);
 	}
 
 	SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n",
-			  __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point);
+			  __FUNCTION__, asoc, asoc->pathmtu, asoc->frag_point);
 }
 
 /* Should we send a SACK to update our peer? */
@@ -1179,7 +1208,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc)
 	case SCTP_STATE_SHUTDOWN_SENT:
 		if ((asoc->rwnd > asoc->a_rwnd) &&
 		    ((asoc->rwnd - asoc->a_rwnd) >=
-		     min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu)))
+		     min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu)))
 			return 1;
 		break;
 	default:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b24ff2c1aef..238f1bffa68 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -305,18 +305,36 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 			   struct sctp_transport *t, __u32 pmtu)
 {
-	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
-		printk(KERN_WARNING "%s: Reported pmtu %d too low, "
-		       "using default minimum of %d\n", __FUNCTION__, pmtu,
-		       SCTP_DEFAULT_MINSEGMENT);
-		pmtu = SCTP_DEFAULT_MINSEGMENT;
-	}
+	if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu))
+		return;
 
-	if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) {
-		t->pmtu = pmtu;
+	if (t->param_flags & SPP_PMTUD_ENABLE) {
+		if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
+			printk(KERN_WARNING "%s: Reported pmtu %d too low, "
+			       "using default minimum of %d\n",
+			       __FUNCTION__, pmtu,
+			       SCTP_DEFAULT_MINSEGMENT);
+			/* Use default minimum segment size and disable
+			 * pmtu discovery on this transport.
+			 */
+			t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
+			t->param_flags = (t->param_flags & ~SPP_HB) |
+				SPP_PMTUD_DISABLE;
+		} else {
+			t->pathmtu = pmtu;
+		}
+
+		/* Update association pmtu. */
 		sctp_assoc_sync_pmtu(asoc);
-		sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
 	}
+
+	/* Retransmit with the new pmtu setting.
+	 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
+	 * Needed will never be sent, but if a message was sent before
+	 * PMTU discovery was disabled that was larger than the PMTU, it
+	 * would not be fragmented, so it must be re-transmitted fragmented.	 
+	 */
+	sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
 }
 
 /*
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fa3be2b8fb5..15c05165c90 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -866,7 +866,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 	return 2;
 }
 
-static struct proto_ops inet6_seqpacket_ops = {
+static const struct proto_ops inet6_seqpacket_ops = {
 	.family     = PF_INET6,
 	.owner      = THIS_MODULE,
 	.release    = inet6_release,
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 93137163346..a40991ef72c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -234,8 +234,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
 		goto finish;
 
 	pmtu  = ((packet->transport->asoc) ?
-		 (packet->transport->asoc->pmtu) :
-		 (packet->transport->pmtu));
+		 (packet->transport->asoc->pathmtu) :
+		 (packet->transport->pathmtu));
 
 	too_big = (psize + chunk_len > pmtu);
 
@@ -482,7 +482,9 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	if (!dst || (dst->obsolete > 1)) {
 		dst_release(dst);
 		sctp_transport_route(tp, NULL, sctp_sk(sk));
-		sctp_assoc_sync_pmtu(asoc);
+		if (asoc->param_flags & SPP_PMTUD_ENABLE) {
+			sctp_assoc_sync_pmtu(asoc);
+		}
 	}
 
 	nskb->dst = dst_clone(tp->dst);
@@ -492,7 +494,10 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n",
 			  nskb->len);
 
-	(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
+	if (tp->param_flags & SPP_PMTUD_ENABLE)
+		(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
+	else
+		(*tp->af_specific->sctp_xmit)(nskb, tp, 1);
 
 out:
 	packet->size = packet->overhead;
@@ -577,7 +582,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
 	 * 	if ((flightsize + Max.Burst * MTU) < cwnd)
 	 *		cwnd = flightsize + Max.Burst * MTU
 	 */
-	max_burst_bytes = asoc->max_burst * asoc->pmtu;
+	max_burst_bytes = asoc->max_burst * asoc->pathmtu;
 	if ((transport->flight_size + max_burst_bytes) < transport->cwnd) {
 		transport->cwnd = transport->flight_size + max_burst_bytes;
 		SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: "
@@ -622,7 +627,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
 		 * data will fit or delay in hopes of bundling a full
 		 * sized packet.
 		 */
-		if (len < asoc->pmtu - packet->overhead) {
+		if (len < asoc->pathmtu - packet->overhead) {
 			retval = SCTP_XMIT_NAGLE_DELAY;
 			goto finish;
 		}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f775d78aa59..de693b43c8e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -54,6 +54,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/route.h>
 #include <net/sctp/sctp.h>
 #include <net/addrconf.h>
 #include <net/inet_common.h>
@@ -829,7 +830,7 @@ static struct notifier_block sctp_inetaddr_notifier = {
 };
 
 /* Socket operations.  */
-static struct proto_ops inet_seqpacket_ops = {
+static const struct proto_ops inet_seqpacket_ops = {
 	.family      = PF_INET,
 	.owner       = THIS_MODULE,
 	.release     = inet_release,       /* Needs to be wrapped... */
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 823947170a3..2d7d8a5db2a 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -157,9 +157,12 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 {
 	__u32 ctsn, max_tsn_seen;
 	struct sctp_chunk *sack;
+	struct sctp_transport *trans = asoc->peer.last_data_from;
 	int error = 0;
 
-	if (force)
+	if (force || 
+	    (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) ||
+	    (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE)))
 		asoc->peer.sack_needed = 1;
 
 	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
@@ -189,7 +192,22 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 	if (!asoc->peer.sack_needed) {
 		/* We will need a SACK for the next packet.  */
 		asoc->peer.sack_needed = 1;
-		goto out;
+
+		/* Set the SACK delay timeout based on the
+		 * SACK delay for the last transport
+		 * data was received from, or the default
+		 * for the association.
+		 */
+		if (trans)
+			asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = 
+				trans->sackdelay;
+		else
+			asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = 
+				asoc->sackdelay;
+
+		/* Restart the SACK timer. */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 	} else {
 		if (asoc->a_rwnd > asoc->rwnd)
 			asoc->a_rwnd = asoc->rwnd;
@@ -205,7 +223,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 	}
-out:
+
 	return error;
 nomem:
 	error = -ENOMEM;
@@ -415,7 +433,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
 	asoc->overall_error_count++;
 
 	if (transport->state != SCTP_INACTIVE &&
-	    (transport->error_count++ >= transport->max_retrans)) {
+	    (transport->error_count++ >= transport->pathmaxrxt)) {
 		SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p",
 					 " transport IP: port:%d failed.\n",
 					 asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 475bfb4972d..557a7d90b92 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -900,7 +900,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
 	 * HEARTBEAT is sent (see Section 8.3).
 	 */
 
-	if (transport->hb_allowed) {
+	if (transport->param_flags & SPP_HB_ENABLE) {
 		if (SCTP_DISPOSITION_NOMEM ==
 				sctp_sf_heartbeat(ep, asoc, type, arg,
 						  commands))
@@ -1051,7 +1051,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 		return SCTP_DISPOSITION_DISCARD;
 	}
 
-	max_interval = link->hb_interval + link->rto;
+	max_interval = link->hbinterval + link->rto;
 
 	/* Check if the timestamp looks valid.  */
 	if (time_after(hbinfo->sent_at, jiffies) ||
@@ -2691,14 +2691,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
 	 * document allow. However, an SCTP transmitter MUST NOT be
 	 * more aggressive than the following algorithms allow.
 	 */
-	if (chunk->end_of_packet) {
+	if (chunk->end_of_packet)
 		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
 
-		/* Start the SACK timer.  */
-		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
-				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
-	}
-
 	return SCTP_DISPOSITION_CONSUME;
 
 discard_force:
@@ -2721,13 +2716,9 @@ discard_force:
 	return SCTP_DISPOSITION_DISCARD;
 
 discard_noforce:
-	if (chunk->end_of_packet) {
+	if (chunk->end_of_packet)
 		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
 
-		/* Start the SACK timer.  */
-		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
-				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
-	}
 	return SCTP_DISPOSITION_DISCARD;
 consume:
 	return SCTP_DISPOSITION_CONSUME;
@@ -3442,9 +3433,6 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
 	 * send another. 
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
-	/* Start the SACK timer.  */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
-			SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 
 	return SCTP_DISPOSITION_CONSUME;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9df888e932c..fc04d185fa3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1941,107 +1941,379 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  * address's parameters:
  *
  *  struct sctp_paddrparams {
- *      sctp_assoc_t            spp_assoc_id;
- *      struct sockaddr_storage spp_address;
- *      uint32_t                spp_hbinterval;
- *      uint16_t                spp_pathmaxrxt;
- *  };
- *
- *   spp_assoc_id    - (UDP style socket) This is filled in the application,
- *                     and identifies the association for this query.
+ *     sctp_assoc_t            spp_assoc_id;
+ *     struct sockaddr_storage spp_address;
+ *     uint32_t                spp_hbinterval;
+ *     uint16_t                spp_pathmaxrxt;
+ *     uint32_t                spp_pathmtu;
+ *     uint32_t                spp_sackdelay;
+ *     uint32_t                spp_flags;
+ * };
+ *
+ *   spp_assoc_id    - (one-to-many style socket) This is filled in the
+ *                     application, and identifies the association for
+ *                     this query.
  *   spp_address     - This specifies which address is of interest.
  *   spp_hbinterval  - This contains the value of the heartbeat interval,
- *                     in milliseconds.  A value of 0, when modifying the
- *                     parameter, specifies that the heartbeat on this
- *                     address should be disabled. A value of UINT32_MAX
- *                     (4294967295), when modifying the parameter,
- *                     specifies that a heartbeat should be sent
- *                     immediately to the peer address, and the current
- *                     interval should remain unchanged.
+ *                     in milliseconds.  If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
  *   spp_pathmaxrxt  - This contains the maximum number of
  *                     retransmissions before this address shall be
- *                     considered unreachable.
+ *                     considered unreachable. If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
+ *   spp_pathmtu     - When Path MTU discovery is disabled the value
+ *                     specified here will be the "fixed" path mtu.
+ *                     Note that if the spp_address field is empty
+ *                     then all associations on this address will
+ *                     have this fixed path mtu set upon them.
+ *
+ *   spp_sackdelay   - When delayed sack is enabled, this value specifies
+ *                     the number of milliseconds that sacks will be delayed
+ *                     for. This value will apply to all addresses of an
+ *                     association if the spp_address field is empty. Note
+ *                     also, that if delayed sack is enabled and this
+ *                     value is set to 0, no change is made to the last
+ *                     recorded delayed sack timer value.
+ *
+ *   spp_flags       - These flags are used to control various features
+ *                     on an association. The flag field may contain
+ *                     zero or more of the following options.
+ *
+ *                     SPP_HB_ENABLE  - Enable heartbeats on the
+ *                     specified address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     have heartbeats enabled upon them.
+ *
+ *                     SPP_HB_DISABLE - Disable heartbeats on the
+ *                     speicifed address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     will have their heartbeats disabled. Note also
+ *                     that SPP_HB_ENABLE and SPP_HB_DISABLE are
+ *                     mutually exclusive, only one of these two should
+ *                     be specified. Enabling both fields will have
+ *                     undetermined results.
+ *
+ *                     SPP_HB_DEMAND - Request a user initiated heartbeat
+ *                     to be made immediately.
+ *
+ *                     SPP_PMTUD_ENABLE - This field will enable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected.
+ *
+ *                     SPP_PMTUD_DISABLE - This field will disable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected. Not also that
+ *                     SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually
+ *                     exclusive. Enabling both will have undetermined
+ *                     results.
+ *
+ *                     SPP_SACKDELAY_ENABLE - Setting this flag turns
+ *                     on delayed sack. The time specified in spp_sackdelay
+ *                     is used to specify the sack delay for this address. Note
+ *                     that if spp_address is empty then all addresses will
+ *                     enable delayed sack and take on the sack delay
+ *                     value specified in spp_sackdelay.
+ *                     SPP_SACKDELAY_DISABLE - Setting this flag turns
+ *                     off delayed sack. If the spp_address field is blank then
+ *                     delayed sack is disabled for the entire association. Note
+ *                     also that this field is mutually exclusive to
+ *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
+ *                     results.
  */
+int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
+				struct sctp_transport   *trans,
+				struct sctp_association *asoc,
+				struct sctp_sock        *sp,
+				int                      hb_change,
+				int                      pmtud_change,
+				int                      sackdelay_change)
+{
+	int error;
+
+	if (params->spp_flags & SPP_HB_DEMAND && trans) {
+		error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
+		if (error)
+			return error;
+	}
+
+	if (params->spp_hbinterval) {
+		if (trans) {
+			trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
+		} else if (asoc) {
+			asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
+		} else {
+			sp->hbinterval = params->spp_hbinterval;
+		}
+	}
+
+	if (hb_change) {
+		if (trans) {
+			trans->param_flags =
+				(trans->param_flags & ~SPP_HB) | hb_change;
+		} else if (asoc) {
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_HB) | hb_change;
+		} else {
+			sp->param_flags =
+				(sp->param_flags & ~SPP_HB) | hb_change;
+		}
+	}
+
+	if (params->spp_pathmtu) {
+		if (trans) {
+			trans->pathmtu = params->spp_pathmtu;
+			sctp_assoc_sync_pmtu(asoc);
+		} else if (asoc) {
+			asoc->pathmtu = params->spp_pathmtu;
+			sctp_frag_point(sp, params->spp_pathmtu);
+		} else {
+			sp->pathmtu = params->spp_pathmtu;
+		}
+	}
+
+	if (pmtud_change) {
+		if (trans) {
+			int update = (trans->param_flags & SPP_PMTUD_DISABLE) &&
+				(params->spp_flags & SPP_PMTUD_ENABLE);
+			trans->param_flags =
+				(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
+			if (update) {
+				sctp_transport_pmtu(trans);
+				sctp_assoc_sync_pmtu(asoc);
+			}
+		} else if (asoc) {
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
+		} else {
+			sp->param_flags =
+				(sp->param_flags & ~SPP_PMTUD) | pmtud_change;
+		}
+	}
+
+	if (params->spp_sackdelay) {
+		if (trans) {
+			trans->sackdelay =
+				msecs_to_jiffies(params->spp_sackdelay);
+		} else if (asoc) {
+			asoc->sackdelay =
+				msecs_to_jiffies(params->spp_sackdelay);
+		} else {
+			sp->sackdelay = params->spp_sackdelay;
+		}
+	}
+
+	if (sackdelay_change) {
+		if (trans) {
+			trans->param_flags =
+				(trans->param_flags & ~SPP_SACKDELAY) |
+				sackdelay_change;
+		} else if (asoc) {
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				sackdelay_change;
+		} else {
+			sp->param_flags =
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				sackdelay_change;
+		}
+	}
+
+	if (params->spp_pathmaxrxt) {
+		if (trans) {
+			trans->pathmaxrxt = params->spp_pathmaxrxt;
+		} else if (asoc) {
+			asoc->pathmaxrxt = params->spp_pathmaxrxt;
+		} else {
+			sp->pathmaxrxt = params->spp_pathmaxrxt;
+		}
+	}
+
+	return 0;
+}
+
 static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 					    char __user *optval, int optlen)
 {
-	struct sctp_paddrparams params;
-	struct sctp_transport *trans;
+	struct sctp_paddrparams  params;
+	struct sctp_transport   *trans = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
 	int error;
+	int hb_change, pmtud_change, sackdelay_change;
 
 	if (optlen != sizeof(struct sctp_paddrparams))
-		return -EINVAL;
+		return - EINVAL;
+
 	if (copy_from_user(&params, optval, optlen))
 		return -EFAULT;
 
-	/*
-	 * API 7. Socket Options (setting the default value for the endpoint)
-	 * All options that support specific settings on an association by
-	 * filling in either an association id variable or a sockaddr_storage
-	 * SHOULD also support setting of the same value for the entire endpoint
-	 * (i.e. future associations). To accomplish this the following logic is
-	 * used when setting one of these options:
-
-	 * c) If neither the sockaddr_storage or association identification is
-	 *    set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and
-	 *    the association identification is 0, the settings are a default
-	 *    and to be applied to the endpoint (all future associations).
-	 */
+	/* Validate flags and value parameters. */
+	hb_change        = params.spp_flags & SPP_HB;
+	pmtud_change     = params.spp_flags & SPP_PMTUD;
+	sackdelay_change = params.spp_flags & SPP_SACKDELAY;
+
+	if (hb_change        == SPP_HB ||
+	    pmtud_change     == SPP_PMTUD ||
+	    sackdelay_change == SPP_SACKDELAY ||
+	    params.spp_sackdelay > 500 ||
+	    (params.spp_pathmtu
+	    && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT))
+		return -EINVAL;
 
-	/* update default value for endpoint (all future associations) */
-	if (!params.spp_assoc_id && 
-	    sctp_is_any(( union sctp_addr *)&params.spp_address)) {
-		/* Manual heartbeat on an endpoint is invalid. */
-		if (0xffffffff == params.spp_hbinterval)
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
+	 */
+	if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+		trans = sctp_addr_id2transport(sk, &params.spp_address,
+					       params.spp_assoc_id);
+		if (!trans)
 			return -EINVAL;
-		else if (params.spp_hbinterval)
-			sctp_sk(sk)->paddrparam.spp_hbinterval =
-						params.spp_hbinterval;
-		if (params.spp_pathmaxrxt)
-			sctp_sk(sk)->paddrparam.spp_pathmaxrxt =
-						params.spp_pathmaxrxt;
-		return 0;
 	}
 
-	trans = sctp_addr_id2transport(sk, &params.spp_address,
-				       params.spp_assoc_id);
-	if (!trans)
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, params.spp_assoc_id);
+	if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
-	/* Applications can enable or disable heartbeats for any peer address
-	 * of an association, modify an address's heartbeat interval, force a
-	 * heartbeat to be sent immediately, and adjust the address's maximum
-	 * number of retransmissions sent before an address is considered
-	 * unreachable.
-	 *
-	 * The value of the heartbeat interval, in milliseconds. A value of
-	 * UINT32_MAX (4294967295), when modifying the parameter, specifies
-	 * that a heartbeat should be sent immediately to the peer address,
-	 * and the current interval should remain unchanged.
+	/* Heartbeat demand can only be sent on a transport or
+	 * association, but not a socket.
 	 */
-	if (0xffffffff == params.spp_hbinterval) {
-		error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
-		if (error)
-			return error;
-	} else {
-	/* The value of the heartbeat interval, in milliseconds. A value of 0,
-	 * when modifying the parameter, specifies that the heartbeat on this
-	 * address should be disabled.
+	if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc)
+		return -EINVAL;
+
+	/* Process parameters. */
+	error = sctp_apply_peer_addr_params(&params, trans, asoc, sp,
+					    hb_change, pmtud_change,
+					    sackdelay_change);
+
+	if (error)
+		return error;
+
+	/* If changes are for association, also apply parameters to each
+	 * transport.
 	 */
-		if (params.spp_hbinterval) {
-			trans->hb_allowed = 1;
-			trans->hb_interval = 
-				msecs_to_jiffies(params.spp_hbinterval);
-		} else
-			trans->hb_allowed = 0;
+	if (!trans && asoc) {
+		struct list_head *pos;
+
+		list_for_each(pos, &asoc->peer.transport_addr_list) {
+			trans = list_entry(pos, struct sctp_transport,
+					   transports);
+			sctp_apply_peer_addr_params(&params, trans, asoc, sp,
+						    hb_change, pmtud_change,
+						    sackdelay_change);
+		}
 	}
 
-	/* spp_pathmaxrxt contains the maximum number of retransmissions
-	 * before this address shall be considered unreachable.
-	 */
-	if (params.spp_pathmaxrxt)
-		trans->max_retrans = params.spp_pathmaxrxt;
+	return 0;
+}
+
+/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+ *
+ *   This options will get or set the delayed ack timer.  The time is set
+ *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
+ *   endpoints default delayed ack timer value.  If the assoc_id field is
+ *   non-zero, then the set or get effects the specified association.
+ *
+ *   struct sctp_assoc_value {
+ *       sctp_assoc_t            assoc_id;
+ *       uint32_t                assoc_value;
+ *   };
+ *
+ *     assoc_id    - This parameter, indicates which association the
+ *                   user is preforming an action upon. Note that if
+ *                   this field's value is zero then the endpoints
+ *                   default value is changed (effecting future
+ *                   associations only).
+ *
+ *     assoc_value - This parameter contains the number of milliseconds
+ *                   that the user is requesting the delayed ACK timer
+ *                   be set to. Note that this value is defined in
+ *                   the standard to be between 200 and 500 milliseconds.
+ *
+ *                   Note: a value of zero will leave the value alone,
+ *                   but disable SACK delay. A non-zero value will also
+ *                   enable SACK delay.
+ */
 
+static int sctp_setsockopt_delayed_ack_time(struct sock *sk,
+					    char __user *optval, int optlen)
+{
+	struct sctp_assoc_value  params;
+	struct sctp_transport   *trans = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
+
+	if (optlen != sizeof(struct sctp_assoc_value))
+		return - EINVAL;
+
+	if (copy_from_user(&params, optval, optlen))
+		return -EFAULT;
+
+	/* Validate value parameter. */
+	if (params.assoc_value > 500)
+		return -EINVAL;
+
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+ 	 */
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (params.assoc_value) {
+		if (asoc) {
+			asoc->sackdelay =
+				msecs_to_jiffies(params.assoc_value);
+			asoc->param_flags = 
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_ENABLE;
+		} else {
+			sp->sackdelay = params.assoc_value;
+			sp->param_flags = 
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_ENABLE;
+		}
+	} else {
+		if (asoc) {
+			asoc->param_flags = 
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_DISABLE;
+		} else {
+			sp->param_flags = 
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_DISABLE;
+		}
+	}
+
+	/* If change is for association, also apply to each transport. */
+	if (asoc) {
+		struct list_head *pos;
+
+		list_for_each(pos, &asoc->peer.transport_addr_list) {
+			trans = list_entry(pos, struct sctp_transport,
+					   transports);
+			if (params.assoc_value) {
+				trans->sackdelay =
+					msecs_to_jiffies(params.assoc_value);
+				trans->param_flags = 
+					(trans->param_flags & ~SPP_SACKDELAY) |
+					SPP_SACKDELAY_ENABLE;
+			} else {
+				trans->param_flags = 
+					(trans->param_flags & ~SPP_SACKDELAY) |
+					SPP_SACKDELAY_DISABLE;
+			}
+		}
+	}
+ 
 	return 0;
 }
 
@@ -2334,7 +2606,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
 	/* Update the frag_point of the existing associations. */
 	list_for_each(pos, &(sp->ep->asocs)) {
 		asoc = list_entry(pos, struct sctp_association, asocs);
-		asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); 
+		asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); 
 	}
 
 	return 0;
@@ -2491,6 +2763,10 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
 		break;
 
+	case SCTP_DELAYED_ACK_TIME:
+		retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
+		break;
+
 	case SCTP_INITMSG:
 		retval = sctp_setsockopt_initmsg(sk, optval, optlen);
 		break;
@@ -2715,8 +2991,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	/* Default Peer Address Parameters.  These defaults can
 	 * be modified via SCTP_PEER_ADDR_PARAMS
 	 */
-	sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval);
-	sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path;
+	sp->hbinterval  = jiffies_to_msecs(sctp_hb_interval);
+	sp->pathmaxrxt  = sctp_max_retrans_path;
+	sp->pathmtu     = 0; // allow default discovery
+	sp->sackdelay   = sctp_sack_timeout;
+	sp->param_flags = SPP_HB_ENABLE |
+	                  SPP_PMTUD_ENABLE |
+	                  SPP_SACKDELAY_ENABLE;
 
 	/* If enabled no SCTP message fragmentation will be performed.
 	 * Configure through SCTP_DISABLE_FRAGMENTS socket option.
@@ -2865,7 +3146,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	status.sstat_primary.spinfo_cwnd = transport->cwnd;
 	status.sstat_primary.spinfo_srtt = transport->srtt;
 	status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto);
-	status.sstat_primary.spinfo_mtu = transport->pmtu;
+	status.sstat_primary.spinfo_mtu = transport->pathmtu;
 
 	if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN)
 		status.sstat_primary.spinfo_state = SCTP_ACTIVE;
@@ -2924,7 +3205,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
 	pinfo.spinfo_cwnd = transport->cwnd;
 	pinfo.spinfo_srtt = transport->srtt;
 	pinfo.spinfo_rto = jiffies_to_msecs(transport->rto);
-	pinfo.spinfo_mtu = transport->pmtu;
+	pinfo.spinfo_mtu = transport->pathmtu;
 
 	if (pinfo.spinfo_state == SCTP_UNKNOWN)
 		pinfo.spinfo_state = SCTP_ACTIVE;
@@ -3086,69 +3367,227 @@ out:
  * address's parameters:
  *
  *  struct sctp_paddrparams {
- *      sctp_assoc_t            spp_assoc_id;
- *      struct sockaddr_storage spp_address;
- *      uint32_t                spp_hbinterval;
- *      uint16_t                spp_pathmaxrxt;
- *  };
- *
- *   spp_assoc_id    - (UDP style socket) This is filled in the application,
- *                     and identifies the association for this query.
+ *     sctp_assoc_t            spp_assoc_id;
+ *     struct sockaddr_storage spp_address;
+ *     uint32_t                spp_hbinterval;
+ *     uint16_t                spp_pathmaxrxt;
+ *     uint32_t                spp_pathmtu;
+ *     uint32_t                spp_sackdelay;
+ *     uint32_t                spp_flags;
+ * };
+ *
+ *   spp_assoc_id    - (one-to-many style socket) This is filled in the
+ *                     application, and identifies the association for
+ *                     this query.
  *   spp_address     - This specifies which address is of interest.
  *   spp_hbinterval  - This contains the value of the heartbeat interval,
- *                     in milliseconds.  A value of 0, when modifying the
- *                     parameter, specifies that the heartbeat on this
- *                     address should be disabled. A value of UINT32_MAX
- *                     (4294967295), when modifying the parameter,
- *                     specifies that a heartbeat should be sent
- *                     immediately to the peer address, and the current
- *                     interval should remain unchanged.
+ *                     in milliseconds.  If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
  *   spp_pathmaxrxt  - This contains the maximum number of
  *                     retransmissions before this address shall be
- *                     considered unreachable.
+ *                     considered unreachable. If a  value of zero
+ *                     is present in this field then no changes are to
+ *                     be made to this parameter.
+ *   spp_pathmtu     - When Path MTU discovery is disabled the value
+ *                     specified here will be the "fixed" path mtu.
+ *                     Note that if the spp_address field is empty
+ *                     then all associations on this address will
+ *                     have this fixed path mtu set upon them.
+ *
+ *   spp_sackdelay   - When delayed sack is enabled, this value specifies
+ *                     the number of milliseconds that sacks will be delayed
+ *                     for. This value will apply to all addresses of an
+ *                     association if the spp_address field is empty. Note
+ *                     also, that if delayed sack is enabled and this
+ *                     value is set to 0, no change is made to the last
+ *                     recorded delayed sack timer value.
+ *
+ *   spp_flags       - These flags are used to control various features
+ *                     on an association. The flag field may contain
+ *                     zero or more of the following options.
+ *
+ *                     SPP_HB_ENABLE  - Enable heartbeats on the
+ *                     specified address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     have heartbeats enabled upon them.
+ *
+ *                     SPP_HB_DISABLE - Disable heartbeats on the
+ *                     speicifed address. Note that if the address
+ *                     field is empty all addresses for the association
+ *                     will have their heartbeats disabled. Note also
+ *                     that SPP_HB_ENABLE and SPP_HB_DISABLE are
+ *                     mutually exclusive, only one of these two should
+ *                     be specified. Enabling both fields will have
+ *                     undetermined results.
+ *
+ *                     SPP_HB_DEMAND - Request a user initiated heartbeat
+ *                     to be made immediately.
+ *
+ *                     SPP_PMTUD_ENABLE - This field will enable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected.
+ *
+ *                     SPP_PMTUD_DISABLE - This field will disable PMTU
+ *                     discovery upon the specified address. Note that
+ *                     if the address feild is empty then all addresses
+ *                     on the association are effected. Not also that
+ *                     SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually
+ *                     exclusive. Enabling both will have undetermined
+ *                     results.
+ *
+ *                     SPP_SACKDELAY_ENABLE - Setting this flag turns
+ *                     on delayed sack. The time specified in spp_sackdelay
+ *                     is used to specify the sack delay for this address. Note
+ *                     that if spp_address is empty then all addresses will
+ *                     enable delayed sack and take on the sack delay
+ *                     value specified in spp_sackdelay.
+ *                     SPP_SACKDELAY_DISABLE - Setting this flag turns
+ *                     off delayed sack. If the spp_address field is blank then
+ *                     delayed sack is disabled for the entire association. Note
+ *                     also that this field is mutually exclusive to
+ *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
+ *                     results.
  */
 static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
-						char __user *optval, int __user *optlen)
+					    char __user *optval, int __user *optlen)
 {
-	struct sctp_paddrparams params;
-	struct sctp_transport *trans;
+	struct sctp_paddrparams  params;
+	struct sctp_transport   *trans = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
 
 	if (len != sizeof(struct sctp_paddrparams))
 		return -EINVAL;
+
 	if (copy_from_user(&params, optval, len))
 		return -EFAULT;
 
-	/* If no association id is specified retrieve the default value
-	 * for the endpoint that will be used for all future associations
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
 	 */
-	if (!params.spp_assoc_id &&
-	    sctp_is_any(( union sctp_addr *)&params.spp_address)) {
-		params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval;
-		params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt;
-
-		goto done;
+	if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+		trans = sctp_addr_id2transport(sk, &params.spp_address,
+					       params.spp_assoc_id);
+		if (!trans) {
+			SCTP_DEBUG_PRINTK("Failed no transport\n");
+			return -EINVAL;
+		}
 	}
 
-	trans = sctp_addr_id2transport(sk, &params.spp_address,
-				       params.spp_assoc_id);
-	if (!trans)
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, params.spp_assoc_id);
+	if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) {
+		SCTP_DEBUG_PRINTK("Failed no association\n");
 		return -EINVAL;
+	}
 
-	/* The value of the heartbeat interval, in milliseconds. A value of 0,
-	 * when modifying the parameter, specifies that the heartbeat on this
-	 * address should be disabled.
-	 */
-	if (!trans->hb_allowed)
-		params.spp_hbinterval = 0;
-	else
-		params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval);
+	if (trans) {
+		/* Fetch transport values. */
+		params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval);
+		params.spp_pathmtu    = trans->pathmtu;
+		params.spp_pathmaxrxt = trans->pathmaxrxt;
+		params.spp_sackdelay  = jiffies_to_msecs(trans->sackdelay);
+
+		/*draft-11 doesn't say what to return in spp_flags*/
+		params.spp_flags      = trans->param_flags;
+	} else if (asoc) {
+		/* Fetch association values. */
+		params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
+		params.spp_pathmtu    = asoc->pathmtu;
+		params.spp_pathmaxrxt = asoc->pathmaxrxt;
+		params.spp_sackdelay  = jiffies_to_msecs(asoc->sackdelay);
+
+		/*draft-11 doesn't say what to return in spp_flags*/
+		params.spp_flags      = asoc->param_flags;
+	} else {
+		/* Fetch socket values. */
+		params.spp_hbinterval = sp->hbinterval;
+		params.spp_pathmtu    = sp->pathmtu;
+		params.spp_sackdelay  = sp->sackdelay;
+		params.spp_pathmaxrxt = sp->pathmaxrxt;
+
+		/*draft-11 doesn't say what to return in spp_flags*/
+		params.spp_flags      = sp->param_flags;
+	}
 
-	/* spp_pathmaxrxt contains the maximum number of retransmissions
-	 * before this address shall be considered unreachable.
-	 */
-	params.spp_pathmaxrxt = trans->max_retrans;
+	if (copy_to_user(optval, &params, len))
+		return -EFAULT;
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+ *
+ *   This options will get or set the delayed ack timer.  The time is set
+ *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
+ *   endpoints default delayed ack timer value.  If the assoc_id field is
+ *   non-zero, then the set or get effects the specified association.
+ *
+ *   struct sctp_assoc_value {
+ *       sctp_assoc_t            assoc_id;
+ *       uint32_t                assoc_value;
+ *   };
+ *
+ *     assoc_id    - This parameter, indicates which association the
+ *                   user is preforming an action upon. Note that if
+ *                   this field's value is zero then the endpoints
+ *                   default value is changed (effecting future
+ *                   associations only).
+ *
+ *     assoc_value - This parameter contains the number of milliseconds
+ *                   that the user is requesting the delayed ACK timer
+ *                   be set to. Note that this value is defined in
+ *                   the standard to be between 200 and 500 milliseconds.
+ *
+ *                   Note: a value of zero will leave the value alone,
+ *                   but disable SACK delay. A non-zero value will also
+ *                   enable SACK delay.
+ */
+static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len,
+					    char __user *optval,
+					    int __user *optlen)
+{
+	struct sctp_assoc_value  params;
+	struct sctp_association *asoc = NULL;
+	struct sctp_sock        *sp = sctp_sk(sk);
+
+	if (len != sizeof(struct sctp_assoc_value))
+		return - EINVAL;
+
+	if (copy_from_user(&params, optval, len))
+		return -EFAULT;
+
+	/* Get association, if assoc_id != 0 and the socket is a one
+	 * to many style socket, and an association was not found, then
+	 * the id was invalid.
+ 	 */
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		/* Fetch association values. */
+		if (asoc->param_flags & SPP_SACKDELAY_ENABLE)
+			params.assoc_value = jiffies_to_msecs(
+				asoc->sackdelay);
+		else
+			params.assoc_value = 0;
+	} else {
+		/* Fetch socket values. */
+		if (sp->param_flags & SPP_SACKDELAY_ENABLE)
+			params.assoc_value  = sp->sackdelay;
+		else
+			params.assoc_value  = 0;
+	}
 
-done:
 	if (copy_to_user(optval, &params, len))
 		return -EFAULT;
 
@@ -4015,6 +4454,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
 							  optlen);
 		break;
+	case SCTP_DELAYED_ACK_TIME:
+		retval = sctp_getsockopt_delayed_ack_time(sk, len, optval,
+							  optlen);
+		break;
 	case SCTP_INITMSG:
 		retval = sctp_getsockopt_initmsg(sk, len, optval, optlen);
 		break;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 268ddaf2dc0..68d73e2dd15 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -86,10 +86,13 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	peer->init_sent_count = 0;
 
 	peer->state = SCTP_ACTIVE;
-	peer->hb_allowed = 0;
+	peer->param_flags = SPP_HB_DISABLE |
+			    SPP_PMTUD_ENABLE |
+			    SPP_SACKDELAY_ENABLE;
+	peer->hbinterval  = 0;
 
 	/* Initialize the default path max_retrans.  */
-	peer->max_retrans = sctp_max_retrans_path;
+	peer->pathmaxrxt  = sctp_max_retrans_path;
 	peer->error_count = 0;
 
 	INIT_LIST_HEAD(&peer->transmitted);
@@ -229,10 +232,10 @@ void sctp_transport_pmtu(struct sctp_transport *transport)
 	dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
 
 	if (dst) {
-		transport->pmtu = dst_mtu(dst);
+		transport->pathmtu = dst_mtu(dst);
 		dst_release(dst);
 	} else
-		transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
+		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
 /* Caches the dst entry and source address for a transport's destination
@@ -254,8 +257,11 @@ void sctp_transport_route(struct sctp_transport *transport,
 		af->get_saddr(asoc, dst, daddr, &transport->saddr);
 
 	transport->dst = dst;
+	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
+		return;
+	}
 	if (dst) {
-		transport->pmtu = dst_mtu(dst);
+		transport->pathmtu = dst_mtu(dst);
 
 		/* Initialize sk->sk_rcv_saddr, if the transport is the
 		 * association's active path for getsockname().
@@ -264,7 +270,7 @@ void sctp_transport_route(struct sctp_transport *transport,
 			opt->pf->af->to_sk_saddr(&transport->saddr,
 						 asoc->base.sk);
 	} else
-		transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
+		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
 /* Hold a reference to a transport.  */
@@ -369,7 +375,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 
 	ssthresh = transport->ssthresh;
 	pba = transport->partial_bytes_acked;
-	pmtu = transport->asoc->pmtu;
+	pmtu = transport->asoc->pathmtu;
 
 	if (cwnd <= ssthresh) {
 		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
@@ -441,8 +447,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pmtu);
-		transport->cwnd = transport->asoc->pmtu;
+					  4*transport->asoc->pathmtu);
+		transport->cwnd = transport->asoc->pathmtu;
 		break;
 
 	case SCTP_LOWER_CWND_FAST_RTX:
@@ -459,7 +465,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pmtu);
+					  4*transport->asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
 		break;
 
@@ -479,7 +485,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		if ((jiffies - transport->last_time_ecne_reduced) >
 		    transport->rtt) {
 			transport->ssthresh = max(transport->cwnd/2,
-					  	  4*transport->asoc->pmtu);
+					  	  4*transport->asoc->pathmtu);
 			transport->cwnd = transport->ssthresh;
 			transport->last_time_ecne_reduced = jiffies;
 		}
@@ -496,7 +502,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 */
 		if ((jiffies - transport->last_time_used) > transport->rto)
 			transport->cwnd = max(transport->cwnd/2,
-						 4*transport->asoc->pmtu);
+						 4*transport->asoc->pathmtu);
 		break;
 	};
 
@@ -511,7 +517,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 unsigned long sctp_transport_timeout(struct sctp_transport *t)
 {
 	unsigned long timeout;
-	timeout = t->hb_interval + t->rto + sctp_jitter(t->rto);
+	timeout = t->hbinterval + t->rto + sctp_jitter(t->rto);
 	timeout += jiffies;
 	return timeout;
 }
diff --git a/net/socket.c b/net/socket.c
index 3145103cdf5..06fa217f58a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -640,154 +640,150 @@ static void sock_aio_dtor(struct kiocb *iocb)
 	kfree(iocb->private);
 }
 
-/*
- *	Read data from a socket. ubuf is a user mode pointer. We make sure the user
- *	area ubuf...ubuf+size-1 is writable before asking the protocol.
- */
-
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-			 size_t size, loff_t pos)
+static ssize_t sock_sendpage(struct file *file, struct page *page,
+			     int offset, size_t size, loff_t *ppos, int more)
 {
-	struct sock_iocb *x, siocb;
 	struct socket *sock;
 	int flags;
 
-	if (pos != 0)
-		return -ESPIPE;
-	if (size==0)		/* Match SYS5 behaviour */
-		return 0;
+	sock = file->private_data;
 
-	if (is_sync_kiocb(iocb))
-		x = &siocb;
-	else {
-		x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
-		if (!x)
-			return -ENOMEM;
+	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	if (more)
+		flags |= MSG_MORE;
+
+	return sock->ops->sendpage(sock, page, offset, size, flags);
+}
+
+static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
+		char __user *ubuf, size_t size, struct sock_iocb *siocb)
+{
+	if (!is_sync_kiocb(iocb)) {
+		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
+		if (!siocb)
+			return NULL;
 		iocb->ki_dtor = sock_aio_dtor;
 	}
-	iocb->private = x;
-	x->kiocb = iocb;
-	sock = iocb->ki_filp->private_data; 
 
-	x->async_msg.msg_name = NULL;
-	x->async_msg.msg_namelen = 0;
-	x->async_msg.msg_iov = &x->async_iov;
-	x->async_msg.msg_iovlen = 1;
-	x->async_msg.msg_control = NULL;
-	x->async_msg.msg_controllen = 0;
-	x->async_iov.iov_base = ubuf;
-	x->async_iov.iov_len = size;
-	flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	siocb->kiocb = iocb;
+	siocb->async_iov.iov_base = ubuf;
+	siocb->async_iov.iov_len = size;
 
-	return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
+	iocb->private = siocb;
+	return siocb;
 }
 
+static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
+		struct file *file, struct iovec *iov, unsigned long nr_segs)
+{
+	struct socket *sock = file->private_data;
+	size_t size = 0;
+	int i;
 
-/*
- *	Write data to a socket. We verify that the user area ubuf..ubuf+size-1
- *	is readable by the user process.
- */
+        for (i = 0 ; i < nr_segs ; i++)
+                size += iov[i].iov_len;
 
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-			  size_t size, loff_t pos)
+	msg->msg_name = NULL;
+	msg->msg_namelen = 0;
+	msg->msg_control = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iovlen = nr_segs;
+	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+
+	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
+}
+
+static ssize_t sock_readv(struct file *file, const struct iovec *iov,
+			  unsigned long nr_segs, loff_t *ppos)
 {
-	struct sock_iocb *x, siocb;
-	struct socket *sock;
-	
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	struct msghdr msg;
+	int ret;
+
+        init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+
+	ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
+static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
+			 size_t count, loff_t pos)
+{
+	struct sock_iocb siocb, *x;
+
 	if (pos != 0)
 		return -ESPIPE;
-	if(size==0)		/* Match SYS5 behaviour */
+	if (count == 0)		/* Match SYS5 behaviour */
 		return 0;
 
-	if (is_sync_kiocb(iocb))
-		x = &siocb;
-	else {
-		x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
-		if (!x)
-			return -ENOMEM;
-		iocb->ki_dtor = sock_aio_dtor;
-	}
-	iocb->private = x;
-	x->kiocb = iocb;
-	sock = iocb->ki_filp->private_data; 
-
-	x->async_msg.msg_name = NULL;
-	x->async_msg.msg_namelen = 0;
-	x->async_msg.msg_iov = &x->async_iov;
-	x->async_msg.msg_iovlen = 1;
-	x->async_msg.msg_control = NULL;
-	x->async_msg.msg_controllen = 0;
-	x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
-	if (sock->type == SOCK_SEQPACKET)
-		x->async_msg.msg_flags |= MSG_EOR;
-	x->async_iov.iov_base = (void __user *)ubuf;
-	x->async_iov.iov_len = size;
-	
-	return __sock_sendmsg(iocb, sock, &x->async_msg, size);
+	x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+	if (!x)
+		return -ENOMEM;
+	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
+			&x->async_iov, 1);
 }
 
-static ssize_t sock_sendpage(struct file *file, struct page *page,
-			     int offset, size_t size, loff_t *ppos, int more)
+static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
+		struct file *file, struct iovec *iov, unsigned long nr_segs)
 {
-	struct socket *sock;
-	int flags;
+	struct socket *sock = file->private_data;
+	size_t size = 0;
+	int i;
 
-	sock = file->private_data;
+        for (i = 0 ; i < nr_segs ; i++)
+                size += iov[i].iov_len;
 
-	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
-	if (more)
-		flags |= MSG_MORE;
+	msg->msg_name = NULL;
+	msg->msg_namelen = 0;
+	msg->msg_control = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iovlen = nr_segs;
+	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	if (sock->type == SOCK_SEQPACKET)
+		msg->msg_flags |= MSG_EOR;
 
-	return sock->ops->sendpage(sock, page, offset, size, flags);
+	return __sock_sendmsg(iocb, sock, msg, size);
 }
 
-static int sock_readv_writev(int type,
-			     struct file * file, const struct iovec * iov,
-			     long count, size_t size)
+static ssize_t sock_writev(struct file *file, const struct iovec *iov,
+			   unsigned long nr_segs, loff_t *ppos)
 {
 	struct msghdr msg;
-	struct socket *sock;
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
 
-	sock = file->private_data;
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
 
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_iov = (struct iovec *) iov;
-	msg.msg_iovlen = count;
-	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
 
-	/* read() does a VERIFY_WRITE */
-	if (type == VERIFY_WRITE)
-		return sock_recvmsg(sock, &msg, size, msg.msg_flags);
+static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
+			  size_t count, loff_t pos)
+{
+	struct sock_iocb siocb, *x;
 
-	if (sock->type == SOCK_SEQPACKET)
-		msg.msg_flags |= MSG_EOR;
+	if (pos != 0)
+		return -ESPIPE;
+	if (count == 0)		/* Match SYS5 behaviour */
+		return 0;
 
-	return sock_sendmsg(sock, &msg, size);
-}
+	x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+	if (!x)
+		return -ENOMEM;
 
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
-			  unsigned long count, loff_t *ppos)
-{
-	size_t tot_len = 0;
-	int i;
-        for (i = 0 ; i < count ; i++)
-                tot_len += vector[i].iov_len;
-	return sock_readv_writev(VERIFY_WRITE,
-				 file, vector, count, tot_len);
-}
-	
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
-			   unsigned long count, loff_t *ppos)
-{
-	size_t tot_len = 0;
-	int i;
-        for (i = 0 ; i < count ; i++)
-                tot_len += vector[i].iov_len;
-	return sock_readv_writev(VERIFY_READ,
-				 file, vector, count, tot_len);
+	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
+			&x->async_iov, 1);
 }
 
 
@@ -904,6 +900,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			break;
 		default:
 			err = sock->ops->ioctl(sock, cmd, arg);
+
+			/*
+			 * If this ioctl is unknown try to hand it down
+			 * to the NIC driver.
+			 */
+			if (err == -ENOIOCTLCMD)
+				err = dev_ioctl(cmd, argp);
 			break;
 	}
 	return err;
@@ -2036,7 +2039,7 @@ int sock_unregister(int family)
 	return 0;
 }
 
-void __init sock_init(void)
+static int __init sock_init(void)
 {
 	/*
 	 *	Initialize sock SLAB cache.
@@ -2044,12 +2047,10 @@ void __init sock_init(void)
 	 
 	sk_init();
 
-#ifdef SLAB_SKB
 	/*
 	 *	Initialize skbuff SLAB cache 
 	 */
 	skb_init();
-#endif
 
 	/*
 	 *	Initialize the protocols module. 
@@ -2058,15 +2059,19 @@ void __init sock_init(void)
 	init_inodecache();
 	register_filesystem(&sock_fs_type);
 	sock_mnt = kern_mount(&sock_fs_type);
-	/* The real protocol initialization is performed when
-	 *  do_initcalls is run.  
+
+	/* The real protocol initialization is performed in later initcalls.
 	 */
 
 #ifdef CONFIG_NETFILTER
 	netfilter_init();
 #endif
+
+	return 0;
 }
 
+core_initcall(sock_init);	/* early initcall */
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 5f1f806a0b1..129e2bd36af 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -97,13 +97,17 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
 			alg_mode = CRYPTO_TFM_MODE_CBC;
 			break;
 		default:
-			dprintk("RPC:      get_key: unsupported algorithm %d\n", alg);
+			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
 		goto out_err_free_key;
-	if (crypto_cipher_setkey(*res, key.data, key.len))
+	}
+	if (crypto_cipher_setkey(*res, key.data, key.len)) {
+		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
 		goto out_err_free_tfm;
+	}
 
 	kfree(key.data);
 	return p;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 39b3edc1469..58400807d4d 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -111,14 +111,18 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
 			setkey = 0;
 			break;
 		default:
-			dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg);
+			dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+		printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
 		goto out_err_free_key;
+	}
 	if (setkey) {
-		if (crypto_cipher_setkey(*res, key.data, key.len))
+		if (crypto_cipher_setkey(*res, key.data, key.len)) {
+			printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
 			goto out_err_free_tfm;
+		}
 	}
 
 	if(key.len > 0)
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index d1e12b25d6e..86fbf7c3e39 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -59,7 +59,7 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 	char			tokhdrbuf[25];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
 	struct xdr_netobj	mic_hdr = {.len = 0, .data = tokhdrbuf};
-	int			tmsglen, tokenlen = 0;
+	int			tokenlen = 0;
 	unsigned char		*ptr;
 	s32			now;
 	int			ctxelen = 0, ctxzbit = 0;
@@ -92,24 +92,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 	}
 
 	if (toktype == SPKM_MIC_TOK) {
-		tmsglen = 0;
 		/* Calculate checksum over the mic-header */
 		asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
 		spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
 		                         ctxelen, ctxzbit);
 
 		if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 
-		                             text, &md5cksum))
+		                             text, 0, &md5cksum))
 			goto out_err;
 
 		asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
-		tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1;
+		tokenlen = 10 + ctxelen + 1 + md5elen + 1;
 
 		/* Create token header using generic routines */
-		token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen);
+		token->len = g_token_size(&ctx->mech_used, tokenlen);
 
 		ptr = token->data;
-		g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr);
+		g_make_token_header(&ctx->mech_used, tokenlen, &ptr);
 
 		spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
 	} else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 1f824578d77..af0d7ce7468 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -182,6 +182,7 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
  * *tokp points to the beginning of the SPKM_MIC token  described 
  * in rfc 2025, section 3.2.1: 
  *
+ * toklen is the inner token length
  */
 void
 spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
@@ -189,7 +190,7 @@ spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hd
 	unsigned char *ict = *tokp;
 
 	*(u8 *)ict++ = 0xa4;
-	*(u8 *)ict++ = toklen - 2; 
+	*(u8 *)ict++ = toklen;
 	memcpy(ict, mic_hdr->data, mic_hdr->len);
 	ict += mic_hdr->len;
 
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 241d5b30dfc..96851b0ba1b 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -95,7 +95,7 @@ spkm3_read_token(struct spkm3_ctx *ctx,
 		ret = GSS_S_DEFECTIVE_TOKEN;
 		code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 
 					mic_hdrlen + 2, 
-		                        message_buffer, &md5cksum);
+		                        message_buffer, 0, &md5cksum);
 
 		if (code)
 			goto out;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 61c3abeacca..5530ac8c6df 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -374,19 +374,23 @@ out:
  * Default callback for async RPC calls
  */
 static void
-rpc_default_callback(struct rpc_task *task)
+rpc_default_callback(struct rpc_task *task, void *data)
 {
 }
 
+static const struct rpc_call_ops rpc_default_ops = {
+	.rpc_call_done = rpc_default_callback,
+};
+
 /*
  *	Export the signal mask handling for synchronous code that
  *	sleeps on RPC calls
  */
-#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL))
+#define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
  
 static void rpc_save_sigmask(sigset_t *oldset, int intr)
 {
-	unsigned long	sigallow = 0;
+	unsigned long	sigallow = sigmask(SIGKILL);
 	sigset_t sigmask;
 
 	/* Block all signals except those listed in sigallow */
@@ -432,7 +436,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	BUG_ON(flags & RPC_TASK_ASYNC);
 
 	status = -ENOMEM;
-	task = rpc_new_task(clnt, NULL, flags);
+	task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
 	if (task == NULL)
 		goto out;
 
@@ -442,14 +446,15 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	rpc_call_setup(task, msg, 0);
 
 	/* Set up the call info struct and execute the task */
-	if (task->tk_status == 0) {
+	status = task->tk_status;
+	if (status == 0) {
+		atomic_inc(&task->tk_count);
 		status = rpc_execute(task);
-	} else {
-		status = task->tk_status;
-		rpc_release_task(task);
+		if (status == 0)
+			status = task->tk_status;
 	}
-
 	rpc_restore_sigmask(&oldset);
+	rpc_release_task(task);
 out:
 	return status;
 }
@@ -459,7 +464,7 @@ out:
  */
 int
 rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
-	       rpc_action callback, void *data)
+	       const struct rpc_call_ops *tk_ops, void *data)
 {
 	struct rpc_task	*task;
 	sigset_t	oldset;
@@ -472,12 +477,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	flags |= RPC_TASK_ASYNC;
 
 	/* Create/initialize a new RPC task */
-	if (!callback)
-		callback = rpc_default_callback;
 	status = -ENOMEM;
-	if (!(task = rpc_new_task(clnt, callback, flags)))
+	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
 		goto out;
-	task->tk_calldata = data;
 
 	/* Mask signals on GSS_AUTH upcalls */
 	rpc_task_sigmask(task, &oldset);		
@@ -511,7 +513,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 	if (task->tk_status == 0)
 		task->tk_action = call_start;
 	else
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 }
 
 void
@@ -536,6 +538,18 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL(rpc_max_payload);
 
+/**
+ * rpc_force_rebind - force transport to check that remote port is unchanged
+ * @clnt: client to rebind
+ *
+ */
+void rpc_force_rebind(struct rpc_clnt *clnt)
+{
+	if (clnt->cl_autobind)
+		clnt->cl_port = 0;
+}
+EXPORT_SYMBOL(rpc_force_rebind);
+
 /*
  * Restart an (async) RPC call. Usually called from within the
  * exit handler.
@@ -642,24 +656,26 @@ call_reserveresult(struct rpc_task *task)
 
 /*
  * 2.	Allocate the buffer. For details, see sched.c:rpc_malloc.
- *	(Note: buffer memory is freed in rpc_task_release).
+ *	(Note: buffer memory is freed in xprt_release).
  */
 static void
 call_allocate(struct rpc_task *task)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = task->tk_xprt;
 	unsigned int	bufsiz;
 
 	dprintk("RPC: %4d call_allocate (status %d)\n", 
 				task->tk_pid, task->tk_status);
 	task->tk_action = call_bind;
-	if (task->tk_buffer)
+	if (req->rq_buffer)
 		return;
 
 	/* FIXME: compute buffer requirements more exactly using
 	 * auth->au_wslack */
 	bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
 
-	if (rpc_malloc(task, bufsiz << 1) != NULL)
+	if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
 		return;
 	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 
 
@@ -702,14 +718,14 @@ call_encode(struct rpc_task *task)
 				task->tk_pid, task->tk_status);
 
 	/* Default buffer setup */
-	bufsiz = task->tk_bufsize >> 1;
-	sndbuf->head[0].iov_base = (void *)task->tk_buffer;
+	bufsiz = req->rq_bufsize >> 1;
+	sndbuf->head[0].iov_base = (void *)req->rq_buffer;
 	sndbuf->head[0].iov_len  = bufsiz;
 	sndbuf->tail[0].iov_len  = 0;
 	sndbuf->page_len	 = 0;
 	sndbuf->len		 = 0;
 	sndbuf->buflen		 = bufsiz;
-	rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
+	rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
 	rcvbuf->head[0].iov_len  = bufsiz;
 	rcvbuf->tail[0].iov_len  = 0;
 	rcvbuf->page_len	 = 0;
@@ -849,8 +865,7 @@ call_connect_status(struct rpc_task *task)
 	}
 
 	/* Something failed: remote service port may have changed */
-	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+	rpc_force_rebind(clnt);
 
 	switch (status) {
 	case -ENOTCONN:
@@ -892,7 +907,7 @@ call_transmit(struct rpc_task *task)
 	if (task->tk_status < 0)
 		return;
 	if (!task->tk_msg.rpc_proc->p_decode) {
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 		rpc_wake_up_task(task);
 	}
 	return;
@@ -931,8 +946,7 @@ call_status(struct rpc_task *task)
 		break;
 	case -ECONNREFUSED:
 	case -ENOTCONN:
-		if (clnt->cl_autobind)
-			clnt->cl_port = 0;
+		rpc_force_rebind(clnt);
 		task->tk_action = call_bind;
 		break;
 	case -EAGAIN:
@@ -943,8 +957,7 @@ call_status(struct rpc_task *task)
 		rpc_exit(task, status);
 		break;
 	default:
-		if (clnt->cl_chatty)
-			printk("%s: RPC call returned error %d\n",
+		printk("%s: RPC call returned error %d\n",
 			       clnt->cl_protname, -status);
 		rpc_exit(task, status);
 		break;
@@ -979,20 +992,18 @@ call_timeout(struct rpc_task *task)
 
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
 	if (RPC_IS_SOFT(task)) {
-		if (clnt->cl_chatty)
-			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
 		return;
 	}
 
-	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
+	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_protname, clnt->cl_server);
 	}
-	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+	rpc_force_rebind(clnt);
 
 retry:
 	clnt->cl_stats->rpcretrans++;
@@ -1014,7 +1025,7 @@ call_decode(struct rpc_task *task)
 	dprintk("RPC: %4d call_decode (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) {
+	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		printk(KERN_NOTICE "%s: server %s OK\n",
 			clnt->cl_protname, clnt->cl_server);
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
@@ -1039,13 +1050,14 @@ call_decode(struct rpc_task *task)
 				sizeof(req->rq_rcv_buf)) != 0);
 
 	/* Verify the RPC header */
-	if (!(p = call_verify(task))) {
-		if (task->tk_action == NULL)
-			return;
-		goto out_retry;
+	p = call_verify(task);
+	if (IS_ERR(p)) {
+		if (p == ERR_PTR(-EAGAIN))
+			goto out_retry;
+		return;
 	}
 
-	task->tk_action = NULL;
+	task->tk_action = rpc_exit_task;
 
 	if (decode)
 		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
@@ -1138,7 +1150,7 @@ call_verify(struct rpc_task *task)
 
 	if ((n = ntohl(*p++)) != RPC_REPLY) {
 		printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
-		goto out_retry;
+		goto out_garbage;
 	}
 	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
 		if (--len < 0)
@@ -1168,7 +1180,7 @@ call_verify(struct rpc_task *task)
 							task->tk_pid);
 			rpcauth_invalcred(task);
 			task->tk_action = call_refresh;
-			return NULL;
+			goto out_retry;
 		case RPC_AUTH_BADCRED:
 		case RPC_AUTH_BADVERF:
 			/* possibly garbled cred/verf? */
@@ -1178,7 +1190,7 @@ call_verify(struct rpc_task *task)
 			dprintk("RPC: %4d call_verify: retry garbled creds\n",
 							task->tk_pid);
 			task->tk_action = call_bind;
-			return NULL;
+			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
 			printk(KERN_NOTICE "call_verify: server requires stronger "
 			       "authentication.\n");
@@ -1193,7 +1205,7 @@ call_verify(struct rpc_task *task)
 	}
 	if (!(p = rpcauth_checkverf(task, p))) {
 		printk(KERN_WARNING "call_verify: auth check failed\n");
-		goto out_retry;		/* bad verifier, retry */
+		goto out_garbage;		/* bad verifier, retry */
 	}
 	len = p - (u32 *)iov->iov_base - 1;
 	if (len < 0)
@@ -1230,23 +1242,24 @@ call_verify(struct rpc_task *task)
 		/* Also retry */
 	}
 
-out_retry:
+out_garbage:
 	task->tk_client->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
 		dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
 		task->tk_action = call_bind;
-		return NULL;
+out_retry:
+		return ERR_PTR(-EAGAIN);
 	}
 	printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
 out_eio:
 	error = -EIO;
 out_err:
 	rpc_exit(task, error);
-	return NULL;
+	return ERR_PTR(error);
 out_overflow:
 	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
-	goto out_retry;
+	goto out_garbage;
 }
 
 static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index a398575f94b..8139ce68e91 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -90,8 +90,7 @@ bailout:
 	map->pm_binding = 0;
 	rpc_wake_up(&map->pm_bindwait);
 	spin_unlock(&pmap_lock);
-	task->tk_status = -EIO;
-	task->tk_action = NULL;
+	rpc_exit(task, -EIO);
 }
 
 #ifdef CONFIG_ROOT_NFS
@@ -132,21 +131,22 @@ static void
 pmap_getport_done(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_portmap *map = clnt->cl_pmap;
 
 	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
 			task->tk_pid, task->tk_status, clnt->cl_port);
+
+	xprt->ops->set_port(xprt, 0);
 	if (task->tk_status < 0) {
 		/* Make the calling task exit with an error */
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 	} else if (clnt->cl_port == 0) {
 		/* Program not registered */
-		task->tk_status = -EACCES;
-		task->tk_action = NULL;
+		rpc_exit(task, -EACCES);
 	} else {
-		/* byte-swap port number first */
+		xprt->ops->set_port(xprt, clnt->cl_port);
 		clnt->cl_port = htons(clnt->cl_port);
-		clnt->cl_xprt->addr.sin_port = clnt->cl_port;
 	}
 	spin_lock(&pmap_lock);
 	map->pm_binding = 0;
@@ -207,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 	xprt = xprt_create_proto(proto, srvaddr, NULL);
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
-	xprt->addr.sin_port = htons(RPC_PMAP_PORT);
+	xprt->ops->set_port(xprt, RPC_PMAP_PORT);
 	if (!privileged)
 		xprt->resvport = 0;
 
@@ -217,7 +217,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 				RPC_AUTH_UNIX);
 	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 	}
 	return clnt;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 16a2458f38f..24cc23af9b9 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -70,8 +70,11 @@ rpc_timeout_upcall_queue(void *data)
 	struct inode *inode = &rpci->vfs_inode;
 
 	down(&inode->i_sem);
+	if (rpci->ops == NULL)
+		goto out;
 	if (rpci->nreaders == 0 && !list_empty(&rpci->pipe))
 		__rpc_purge_upcall(inode, -ETIMEDOUT);
+out:
 	up(&inode->i_sem);
 }
 
@@ -113,8 +116,6 @@ rpc_close_pipes(struct inode *inode)
 {
 	struct rpc_inode *rpci = RPC_I(inode);
 
-	cancel_delayed_work(&rpci->queue_timeout);
-	flush_scheduled_work();
 	down(&inode->i_sem);
 	if (rpci->ops != NULL) {
 		rpci->nreaders = 0;
@@ -127,6 +128,8 @@ rpc_close_pipes(struct inode *inode)
 	}
 	rpc_inode_setowner(inode, NULL);
 	up(&inode->i_sem);
+	cancel_delayed_work(&rpci->queue_timeout);
+	flush_scheduled_work();
 }
 
 static struct inode *
@@ -166,7 +169,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
 static int
 rpc_pipe_release(struct inode *inode, struct file *filp)
 {
-	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	struct rpc_inode *rpci = RPC_I(inode);
 	struct rpc_pipe_msg *msg;
 
 	down(&inode->i_sem);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 54e60a65750..7415406aa1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,8 +41,6 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-static void			rpc_free(struct rpc_task *task);
-
 static void			rpc_async_schedule(void *);
 
 /*
@@ -264,6 +262,35 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 }
 EXPORT_SYMBOL(rpc_init_wait_queue);
 
+static int rpc_wait_bit_interruptible(void *word)
+{
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	schedule();
+	return 0;
+}
+
+/*
+ * Mark an RPC call as having completed by clearing the 'active' bit
+ */
+static inline void rpc_mark_complete_task(struct rpc_task *task)
+{
+	rpc_clear_active(task);
+	wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+}
+
+/*
+ * Allow callers to wait for completion of an RPC call
+ */
+int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
+{
+	if (action == NULL)
+		action = rpc_wait_bit_interruptible;
+	return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+			action, TASK_INTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__rpc_wait_for_completion_task);
+
 /*
  * Make an RPC task runnable.
  *
@@ -299,10 +326,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 static inline void
 rpc_schedule_run(struct rpc_task *task)
 {
-	/* Don't run a child twice! */
-	if (RPC_IS_ACTIVATED(task))
-		return;
-	task->tk_active = 1;
+	rpc_set_active(task);
 	rpc_make_runnable(task);
 }
 
@@ -324,8 +348,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	}
 
 	/* Mark the task as being activated if so needed */
-	if (!RPC_IS_ACTIVATED(task))
-		task->tk_active = 1;
+	rpc_set_active(task);
 
 	__rpc_add_wait_queue(q, task);
 
@@ -555,36 +578,29 @@ __rpc_atrun(struct rpc_task *task)
 }
 
 /*
- * Helper that calls task->tk_exit if it exists and then returns
- * true if we should exit __rpc_execute.
+ * Helper to call task->tk_ops->rpc_call_prepare
  */
-static inline int __rpc_do_exit(struct rpc_task *task)
+static void rpc_prepare_task(struct rpc_task *task)
 {
-	if (task->tk_exit != NULL) {
-		lock_kernel();
-		task->tk_exit(task);
-		unlock_kernel();
-		/* If tk_action is non-null, we should restart the call */
-		if (task->tk_action != NULL) {
-			if (!RPC_ASSASSINATED(task)) {
-				/* Release RPC slot and buffer memory */
-				xprt_release(task);
-				rpc_free(task);
-				return 0;
-			}
-			printk(KERN_ERR "RPC: dead task tried to walk away.\n");
-		}
-	}
-	return 1;
+	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
 }
 
-static int rpc_wait_bit_interruptible(void *word)
+/*
+ * Helper that calls task->tk_ops->rpc_call_done if it exists
+ */
+void rpc_exit_task(struct rpc_task *task)
 {
-	if (signal_pending(current))
-		return -ERESTARTSYS;
-	schedule();
-	return 0;
+	task->tk_action = NULL;
+	if (task->tk_ops->rpc_call_done != NULL) {
+		task->tk_ops->rpc_call_done(task, task->tk_calldata);
+		if (task->tk_action != NULL) {
+			WARN_ON(RPC_ASSASSINATED(task));
+			/* Always release the RPC slot and buffer memory */
+			xprt_release(task);
+		}
+	}
 }
+EXPORT_SYMBOL(rpc_exit_task);
 
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
@@ -631,12 +647,11 @@ static int __rpc_execute(struct rpc_task *task)
 		 * by someone else.
 		 */
 		if (!RPC_IS_QUEUED(task)) {
-			if (task->tk_action != NULL) {
-				lock_kernel();
-				task->tk_action(task);
-				unlock_kernel();
-			} else if (__rpc_do_exit(task))
+			if (task->tk_action == NULL)
 				break;
+			lock_kernel();
+			task->tk_action(task);
+			unlock_kernel();
 		}
 
 		/*
@@ -676,9 +691,9 @@ static int __rpc_execute(struct rpc_task *task)
 		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 	}
 
-	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
-	status = task->tk_status;
-
+	dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
+	/* Wake up anyone who is waiting for task completion */
+	rpc_mark_complete_task(task);
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
 	return status;
@@ -696,9 +711,7 @@ static int __rpc_execute(struct rpc_task *task)
 int
 rpc_execute(struct rpc_task *task)
 {
-	BUG_ON(task->tk_active);
-
-	task->tk_active = 1;
+	rpc_set_active(task);
 	rpc_set_running(task);
 	return __rpc_execute(task);
 }
@@ -708,17 +721,19 @@ static void rpc_async_schedule(void *arg)
 	__rpc_execute((struct rpc_task *)arg);
 }
 
-/*
- * Allocate memory for RPC purposes.
+/**
+ * rpc_malloc - allocate an RPC buffer
+ * @task: RPC task that will use this buffer
+ * @size: requested byte size
  *
  * We try to ensure that some NFS reads and writes can always proceed
  * by using a mempool when allocating 'small' buffers.
  * In order to avoid memory starvation triggering more writebacks of
  * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
  */
-void *
-rpc_malloc(struct rpc_task *task, size_t size)
+void * rpc_malloc(struct rpc_task *task, size_t size)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
 	gfp_t	gfp;
 
 	if (task->tk_flags & RPC_TASK_SWAPPER)
@@ -727,42 +742,52 @@ rpc_malloc(struct rpc_task *task, size_t size)
 		gfp = GFP_NOFS;
 
 	if (size > RPC_BUFFER_MAXSIZE) {
-		task->tk_buffer =  kmalloc(size, gfp);
-		if (task->tk_buffer)
-			task->tk_bufsize = size;
+		req->rq_buffer = kmalloc(size, gfp);
+		if (req->rq_buffer)
+			req->rq_bufsize = size;
 	} else {
-		task->tk_buffer =  mempool_alloc(rpc_buffer_mempool, gfp);
-		if (task->tk_buffer)
-			task->tk_bufsize = RPC_BUFFER_MAXSIZE;
+		req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
+		if (req->rq_buffer)
+			req->rq_bufsize = RPC_BUFFER_MAXSIZE;
 	}
-	return task->tk_buffer;
+	return req->rq_buffer;
 }
 
-static void
-rpc_free(struct rpc_task *task)
+/**
+ * rpc_free - free buffer allocated via rpc_malloc
+ * @task: RPC task with a buffer to be freed
+ *
+ */
+void rpc_free(struct rpc_task *task)
 {
-	if (task->tk_buffer) {
-		if (task->tk_bufsize == RPC_BUFFER_MAXSIZE)
-			mempool_free(task->tk_buffer, rpc_buffer_mempool);
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (req->rq_buffer) {
+		if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
+			mempool_free(req->rq_buffer, rpc_buffer_mempool);
 		else
-			kfree(task->tk_buffer);
-		task->tk_buffer = NULL;
-		task->tk_bufsize = 0;
+			kfree(req->rq_buffer);
+		req->rq_buffer = NULL;
+		req->rq_bufsize = 0;
 	}
 }
 
 /*
  * Creation and deletion of RPC task structures
  */
-void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags)
+void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	memset(task, 0, sizeof(*task));
 	init_timer(&task->tk_timer);
 	task->tk_timer.data     = (unsigned long) task;
 	task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+	atomic_set(&task->tk_count, 1);
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
-	task->tk_exit   = callback;
+	task->tk_ops = tk_ops;
+	if (tk_ops->rpc_call_prepare != NULL)
+		task->tk_action = rpc_prepare_task;
+	task->tk_calldata = calldata;
 
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
@@ -791,6 +816,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 	list_add_tail(&task->tk_task, &all_tasks);
 	spin_unlock(&rpc_sched_lock);
 
+	BUG_ON(task->tk_ops == NULL);
+
 	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
 				current->pid);
 }
@@ -801,8 +828,7 @@ rpc_alloc_task(void)
 	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
-static void
-rpc_default_free_task(struct rpc_task *task)
+static void rpc_free_task(struct rpc_task *task)
 {
 	dprintk("RPC: %4d freeing task\n", task->tk_pid);
 	mempool_free(task, rpc_task_mempool);
@@ -813,8 +839,7 @@ rpc_default_free_task(struct rpc_task *task)
  * clean up after an allocation failure, as the client may
  * have specified "oneshot".
  */
-struct rpc_task *
-rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
+struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	struct rpc_task	*task;
 
@@ -822,10 +847,7 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
 	if (!task)
 		goto cleanup;
 
-	rpc_init_task(task, clnt, callback, flags);
-
-	/* Replace tk_release */
-	task->tk_release = rpc_default_free_task;
+	rpc_init_task(task, clnt, flags, tk_ops, calldata);
 
 	dprintk("RPC: %4d allocated task\n", task->tk_pid);
 	task->tk_flags |= RPC_TASK_DYNAMIC;
@@ -845,11 +867,15 @@ cleanup:
 
 void rpc_release_task(struct rpc_task *task)
 {
-	dprintk("RPC: %4d release task\n", task->tk_pid);
+	const struct rpc_call_ops *tk_ops = task->tk_ops;
+	void *calldata = task->tk_calldata;
 
 #ifdef RPC_DEBUG
 	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
 #endif
+	if (!atomic_dec_and_test(&task->tk_count))
+		return;
+	dprintk("RPC: %4d release task\n", task->tk_pid);
 
 	/* Remove from global task list */
 	spin_lock(&rpc_sched_lock);
@@ -857,7 +883,6 @@ void rpc_release_task(struct rpc_task *task)
 	spin_unlock(&rpc_sched_lock);
 
 	BUG_ON (RPC_IS_QUEUED(task));
-	task->tk_active = 0;
 
 	/* Synchronously delete any running timer */
 	rpc_delete_timer(task);
@@ -867,7 +892,6 @@ void rpc_release_task(struct rpc_task *task)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
 		rpcauth_unbindcred(task);
-	rpc_free(task);
 	if (task->tk_client) {
 		rpc_release_client(task->tk_client);
 		task->tk_client = NULL;
@@ -876,11 +900,34 @@ void rpc_release_task(struct rpc_task *task)
 #ifdef RPC_DEBUG
 	task->tk_magic = 0;
 #endif
-	if (task->tk_release)
-		task->tk_release(task);
+	if (task->tk_flags & RPC_TASK_DYNAMIC)
+		rpc_free_task(task);
+	if (tk_ops->rpc_release)
+		tk_ops->rpc_release(calldata);
 }
 
 /**
+ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ * @clnt - pointer to RPC client
+ * @flags - RPC flags
+ * @ops - RPC call ops
+ * @data - user call data
+ */
+struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+					const struct rpc_call_ops *ops,
+					void *data)
+{
+	struct rpc_task *task;
+	task = rpc_new_task(clnt, flags, ops, data);
+	if (task == NULL)
+		return ERR_PTR(-ENOMEM);
+	atomic_inc(&task->tk_count);
+	rpc_execute(task);
+	return task;
+}
+EXPORT_SYMBOL(rpc_run_task);
+
+/**
  * rpc_find_parent - find the parent of a child task.
  * @child: child task
  *
@@ -890,12 +937,11 @@ void rpc_release_task(struct rpc_task *task)
  *
  * Caller must hold childq.lock
  */
-static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
+static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
 {
-	struct rpc_task	*task, *parent;
+	struct rpc_task	*task;
 	struct list_head *le;
 
-	parent = (struct rpc_task *) child->tk_calldata;
 	task_for_each(task, le, &childq.tasks[0])
 		if (task == parent)
 			return parent;
@@ -903,18 +949,22 @@ static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
 	return NULL;
 }
 
-static void rpc_child_exit(struct rpc_task *child)
+static void rpc_child_exit(struct rpc_task *child, void *calldata)
 {
 	struct rpc_task	*parent;
 
 	spin_lock_bh(&childq.lock);
-	if ((parent = rpc_find_parent(child)) != NULL) {
+	if ((parent = rpc_find_parent(child, calldata)) != NULL) {
 		parent->tk_status = child->tk_status;
 		__rpc_wake_up_task(parent);
 	}
 	spin_unlock_bh(&childq.lock);
 }
 
+static const struct rpc_call_ops rpc_child_ops = {
+	.rpc_call_done = rpc_child_exit,
+};
+
 /*
  * Note: rpc_new_task releases the client after a failure.
  */
@@ -923,11 +973,9 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
 {
 	struct rpc_task	*task;
 
-	task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
+	task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
 	if (!task)
 		goto fail;
-	task->tk_exit = rpc_child_exit;
-	task->tk_calldata = parent;
 	return task;
 
 fail:
@@ -1063,7 +1111,7 @@ void rpc_show_tasks(void)
 		return;
 	}
 	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
-		"-rpcwait -action- --exit--\n");
+		"-rpcwait -action- ---ops--\n");
 	alltask_for_each(t, le, &all_tasks) {
 		const char *rpc_waitq = "none";
 
@@ -1078,7 +1126,7 @@ void rpc_show_tasks(void)
 			(t->tk_client ? t->tk_client->cl_prog : 0),
 			t->tk_rqstp, t->tk_timeout,
 			rpc_waitq,
-			t->tk_action, t->tk_exit);
+			t->tk_action, t->tk_ops);
 	}
 	spin_unlock(&rpc_sched_lock);
 }
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index a03d4b600c9..9f737320359 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -30,8 +30,6 @@ EXPORT_SYMBOL(rpc_init_task);
 EXPORT_SYMBOL(rpc_sleep_on);
 EXPORT_SYMBOL(rpc_wake_up_next);
 EXPORT_SYMBOL(rpc_wake_up_task);
-EXPORT_SYMBOL(rpc_new_child);
-EXPORT_SYMBOL(rpc_run_child);
 EXPORT_SYMBOL(rpciod_down);
 EXPORT_SYMBOL(rpciod_up);
 EXPORT_SYMBOL(rpc_new_task);
@@ -45,7 +43,6 @@ EXPORT_SYMBOL(rpc_clone_client);
 EXPORT_SYMBOL(rpc_bind_new_program);
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
-EXPORT_SYMBOL(rpc_release_client);
 EXPORT_SYMBOL(rpc_killall_tasks);
 EXPORT_SYMBOL(rpc_call_sync);
 EXPORT_SYMBOL(rpc_call_async);
@@ -120,7 +117,6 @@ EXPORT_SYMBOL(unix_domain_find);
 
 /* Generic XDR */
 EXPORT_SYMBOL(xdr_encode_string);
-EXPORT_SYMBOL(xdr_decode_string);
 EXPORT_SYMBOL(xdr_decode_string_inplace);
 EXPORT_SYMBOL(xdr_decode_netobj);
 EXPORT_SYMBOL(xdr_encode_netobj);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index cac2e774dd8..3e6c694bbad 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -101,10 +101,22 @@ static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
 	}
 }
 
+#if IP_HASHBITS == 8
+/* hash_long on a 64 bit machine is currently REALLY BAD for
+ * IP addresses in reverse-endian (i.e. on a little-endian machine).
+ * So use a trivial but reliable hash instead
+ */
+static inline int hash_ip(unsigned long ip)
+{
+	int hash = ip ^ (ip>>16);
+	return (hash ^ (hash>>8)) & 0xff;
+}
+#endif
+
 static inline int ip_map_hash(struct ip_map *item)
 {
 	return hash_str(item->m_class, IP_HASHBITS) ^ 
-		hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS);
+		hash_ip((unsigned long)item->m_addr.s_addr);
 }
 static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp)
 {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c6a51911e71..e67613e4eb1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk)
 	struct svc_serv	*serv = svsk->sk_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
-	struct proto_ops *ops;
+	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
 
@@ -1026,7 +1026,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	} else {
 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 					svsk->sk_server->sv_name, -len);
-		svc_sock_received(svsk);
+		goto err_delete;
 	}
 
 	return len;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index aaf08cdd19f..ca4bfa57e11 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -93,27 +93,6 @@ xdr_encode_string(u32 *p, const char *string)
 }
 
 u32 *
-xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
-{
-	unsigned int	len;
-	char		*string;
-
-	if ((len = ntohl(*p++)) > maxlen)
-		return NULL;
-	if (lenp)
-		*lenp = len;
-	if ((len % 4) != 0) {
-		string = (char *) p;
-	} else {
-		string = (char *) (p - 1);
-		memmove(string, p, len);
-	}
-	string[len] = '\0';
-	*sp = string;
-	return p + XDR_QUADLEN(len);
-}
-
-u32 *
 xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
 {
 	unsigned int	len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6dda3860351..8ff2c8acb22 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -119,6 +119,17 @@ out_sleep:
 	return 0;
 }
 
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+	xprt->snd_task = NULL;
+	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) {
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_LOCKED, &xprt->state);
+		smp_mb__after_clear_bit();
+	} else
+		schedule_work(&xprt->task_cleanup);
+}
+
 /*
  * xprt_reserve_xprt_cong - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -145,9 +156,7 @@ int xprt_reserve_xprt_cong(struct rpc_task *task)
 		}
 		return 1;
 	}
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 out_sleep:
 	dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt);
 	task->tk_timeout = 0;
@@ -193,9 +202,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
 	return;
 
 out_unlock:
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 }
 
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
@@ -222,9 +229,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 		return;
 	}
 out_unlock:
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 }
 
 /**
@@ -237,10 +242,7 @@ out_unlock:
 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt->snd_task = NULL;
-		smp_mb__before_clear_bit();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		xprt_clear_locked(xprt);
 		__xprt_lock_write_next(xprt);
 	}
 }
@@ -256,10 +258,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt->snd_task = NULL;
-		smp_mb__before_clear_bit();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		xprt_clear_locked(xprt);
 		__xprt_lock_write_next_cong(xprt);
 	}
 }
@@ -535,10 +534,6 @@ void xprt_connect(struct rpc_task *task)
 	dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
 			xprt, (xprt_connected(xprt) ? "is" : "is not"));
 
-	if (xprt->shutdown) {
-		task->tk_status = -EIO;
-		return;
-	}
 	if (!xprt->addr.sin_port) {
 		task->tk_status = -EIO;
 		return;
@@ -687,9 +682,6 @@ int xprt_prepare_transmit(struct rpc_task *task)
 
 	dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid);
 
-	if (xprt->shutdown)
-		return -EIO;
-
 	spin_lock_bh(&xprt->transport_lock);
 	if (req->rq_received && !req->rq_bytes_sent) {
 		err = req->rq_received;
@@ -814,11 +806,9 @@ void xprt_reserve(struct rpc_task *task)
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
 	task->tk_status = -EIO;
-	if (!xprt->shutdown) {
-		spin_lock(&xprt->reserve_lock);
-		do_xprt_reserve(task);
-		spin_unlock(&xprt->reserve_lock);
-	}
+	spin_lock(&xprt->reserve_lock);
+	do_xprt_reserve(task);
+	spin_unlock(&xprt->reserve_lock);
 }
 
 static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -838,6 +828,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 	req->rq_timeout = xprt->timeout.to_initval;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
+	req->rq_buffer  = NULL;
+	req->rq_bufsize = 0;
 	req->rq_xid     = xprt_alloc_xid(xprt);
 	req->rq_release_snd_buf = NULL;
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
@@ -863,10 +855,11 @@ void xprt_release(struct rpc_task *task)
 	if (!list_empty(&req->rq_list))
 		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
-	if (list_empty(&xprt->recv) && !xprt->shutdown)
+	if (list_empty(&xprt->recv))
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
+	xprt->ops->buf_free(task);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
@@ -974,16 +967,6 @@ struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rp
 	return xprt;
 }
 
-static void xprt_shutdown(struct rpc_xprt *xprt)
-{
-	xprt->shutdown = 1;
-	rpc_wake_up(&xprt->sending);
-	rpc_wake_up(&xprt->resend);
-	xprt_wake_pending_tasks(xprt, -EIO);
-	rpc_wake_up(&xprt->backlog);
-	del_timer_sync(&xprt->timer);
-}
-
 /**
  * xprt_destroy - destroy an RPC transport, killing off all requests.
  * @xprt: transport to destroy
@@ -992,7 +975,8 @@ static void xprt_shutdown(struct rpc_xprt *xprt)
 int xprt_destroy(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:      destroying transport %p\n", xprt);
-	xprt_shutdown(xprt);
+	xprt->shutdown = 1;
+	del_timer_sync(&xprt->timer);
 	xprt->ops->destroy(xprt);
 	kfree(xprt);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 77e8800d412..c458f8d1d6d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -28,6 +28,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
 #include <linux/file.h>
 
 #include <net/sock.h>
@@ -424,7 +425,7 @@ static void xs_close(struct rpc_xprt *xprt)
 	struct sock *sk = xprt->inet;
 
 	if (!sk)
-		return;
+		goto clear_close_wait;
 
 	dprintk("RPC:      xs_close xprt %p\n", xprt);
 
@@ -441,6 +442,10 @@ static void xs_close(struct rpc_xprt *xprt)
 	sk->sk_no_check = 0;
 
 	sock_release(sock);
+clear_close_wait:
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+	smp_mb__after_clear_bit();
 }
 
 /**
@@ -800,9 +805,13 @@ static void xs_tcp_state_change(struct sock *sk)
 	case TCP_SYN_SENT:
 	case TCP_SYN_RECV:
 		break;
+	case TCP_CLOSE_WAIT:
+		/* Try to schedule an autoclose RPC calls */
+		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+		if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+			schedule_work(&xprt->task_cleanup);
 	default:
 		xprt_disconnect(xprt);
-		break;
 	}
  out:
 	read_unlock(&sk->sk_callback_lock);
@@ -920,6 +929,18 @@ static void xs_udp_timer(struct rpc_task *task)
 	xprt_adjust_cwnd(task, -ETIMEDOUT);
 }
 
+/**
+ * xs_set_port - reset the port number in the remote endpoint address
+ * @xprt: generic transport
+ * @port: new port number
+ *
+ */
+static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
+{
+	dprintk("RPC:      setting port for xprt %p to %u\n", xprt, port);
+	xprt->addr.sin_port = htons(port);
+}
+
 static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
@@ -1160,7 +1181,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
+	.set_port		= xs_set_port,
 	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
 	.send_request		= xs_udp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_rtt,
 	.timer			= xs_udp_timer,
@@ -1172,7 +1196,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
 static struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
+	.set_port		= xs_set_port,
 	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_close,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index acc73ba8bad..5f6ae79b8b1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -121,7 +121,7 @@
 int sysctl_unix_max_dgram_qlen = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
-DEFINE_RWLOCK(unix_table_lock);
+DEFINE_SPINLOCK(unix_table_lock);
 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 
 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
@@ -130,7 +130,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 
 /*
  *  SMP locking strategy:
- *    hash table is protected with rwlock unix_table_lock
+ *    hash table is protected with spinlock unix_table_lock
  *    each socket state is protected by separate rwlock.
  */
 
@@ -214,16 +214,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 
 static inline void unix_remove_socket(struct sock *sk)
 {
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	__unix_remove_socket(sk);
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 }
 
 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 {
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	__unix_insert_socket(list, sk);
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 }
 
 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
@@ -250,11 +250,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
 {
 	struct sock *s;
 
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	s = __unix_find_socket_byname(sunname, len, type, hash);
 	if (s)
 		sock_hold(s);
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 	return s;
 }
 
@@ -263,7 +263,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 	struct sock *s;
 	struct hlist_node *node;
 
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	sk_for_each(s, node,
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->dentry;
@@ -276,7 +276,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 	}
 	s = NULL;
 found:
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 	return s;
 }
 
@@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *,
 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 				  struct msghdr *, size_t);
 
-static struct proto_ops unix_stream_ops = {
+static const struct proto_ops unix_stream_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops unix_dgram_ops = {
+static const struct proto_ops unix_dgram_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops unix_seqpacket_ops = {
+static const struct proto_ops unix_seqpacket_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -564,7 +564,7 @@ static struct sock * unix_create1(struct socket *sock)
 	u	  = unix_sk(sk);
 	u->dentry = NULL;
 	u->mnt	  = NULL;
-	rwlock_init(&u->lock);
+	spin_lock_init(&u->lock);
 	atomic_set(&u->inflight, sock ? 0 : -1);
 	init_MUTEX(&u->readsem); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
@@ -642,12 +642,12 @@ retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
 
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
 
 	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
 				      addr->hash)) {
-		write_unlock(&unix_table_lock);
+		spin_unlock(&unix_table_lock);
 		/* Sanity yield. It is unusual case, but yet... */
 		if (!(ordernum&0xFF))
 			yield();
@@ -658,7 +658,7 @@ retry:
 	__unix_remove_socket(sk);
 	u->addr = addr;
 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 	err = 0;
 
 out:	up(&u->readsem);
@@ -791,7 +791,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		addr->hash = UNIX_HASH_SIZE;
 	}
 
-	write_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 
 	if (!sunaddr->sun_path[0]) {
 		err = -EADDRINUSE;
@@ -814,7 +814,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	__unix_insert_socket(list, sk);
 
 out_unlock:
-	write_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 out_up:
 	up(&u->readsem);
 out:
@@ -1063,10 +1063,12 @@ restart:
 	/* Set credentials */
 	sk->sk_peercred = other->sk_peercred;
 
-	sock_hold(newsk);
-	unix_peer(sk)	= newsk;
 	sock->state	= SS_CONNECTED;
 	sk->sk_state	= TCP_ESTABLISHED;
+	sock_hold(newsk);
+
+	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
+	unix_peer(sk)	= newsk;
 
 	unix_state_wunlock(sk);
 
@@ -1414,7 +1416,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	} else {
 		sunaddr = NULL;
 		err = -ENOTCONN;
-		other = unix_peer_get(sk);
+		other = unix_peer(sk);
 		if (!other)
 			goto out_err;
 	}
@@ -1476,7 +1478,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		other->sk_data_ready(other, size);
 		sent+=size;
 	}
-	sock_put(other);
 
 	scm_destroy(siocb->scm);
 	siocb->scm = NULL;
@@ -1491,8 +1492,6 @@ pipe_err:
 		send_sig(SIGPIPE,current,0);
 	err = -EPIPE;
 out_err:
-        if (other)
-		sock_put(other);
 	scm_destroy(siocb->scm);
 	siocb->scm = NULL;
 	return sent ? : err;
@@ -1860,7 +1859,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		}
 
 		default:
-			err = dev_ioctl(cmd, (void __user *)arg);
+			err = -ENOIOCTLCMD;
 			break;
 	}
 	return err;
@@ -1917,7 +1916,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos)
 
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
 }
 
@@ -1932,7 +1931,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 }
 
 static int unix_seq_show(struct seq_file *seq, void *v)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6ffc64e1712..411802bd4d3 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -182,7 +182,7 @@ void unix_gc(void)
 	if (down_trylock(&unix_gc_sem))
 		return;
 
-	read_lock(&unix_table_lock);
+	spin_lock(&unix_table_lock);
 
 	forall_unix_sockets(i, s)
 	{
@@ -301,7 +301,7 @@ void unix_gc(void)
 		}
 		u->gc_tree = GC_ORPHAN;
 	}
-	read_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_lock);
 
 	/*
 	 *	Here we are. Hitlist is filled. Die.
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index 59fec59b213..7a43ae4721e 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -181,7 +181,7 @@ struct wanpipe_opt
 #endif
 
 static int sk_count;
-extern struct proto_ops wanpipe_ops;
+extern const struct proto_ops wanpipe_ops;
 static unsigned long find_free_critical;
 
 static void wanpipe_unlink_driver(struct sock *sk);
@@ -1839,7 +1839,7 @@ static int wanpipe_ioctl(struct socket *sock, unsigned int cmd, unsigned long ar
 #endif
 
 		default:
-			return dev_ioctl(cmd,(void __user *) arg);
+			return -ENOIOCTLCMD;
 	}
 	/*NOTREACHED*/
 }
@@ -2546,7 +2546,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr
 	return 0;
 }
 
-struct proto_ops wanpipe_ops = {
+const struct proto_ops wanpipe_ops = {
 	.family = 	PF_WANPIPE,
 	.owner =	THIS_MODULE,
 	.release = 	wanpipe_release,
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 020d73cc841..bfabaf9cba8 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -64,7 +64,7 @@ int sysctl_x25_ack_holdback_timeout    = X25_DEFAULT_T2;
 HLIST_HEAD(x25_list);
 DEFINE_RWLOCK(x25_list_lock);
 
-static struct proto_ops x25_proto_ops;
+static const struct proto_ops x25_proto_ops;
 
 static struct x25_address null_x25_address = {"               "};
 
@@ -540,12 +540,7 @@ static struct sock *x25_make_new(struct sock *osk)
 	sk->sk_state       = TCP_ESTABLISHED;
 	sk->sk_sleep       = osk->sk_sleep;
 	sk->sk_backlog_rcv = osk->sk_backlog_rcv;
-
-	if (sock_flag(osk, SOCK_ZAPPED))
-		sock_set_flag(sk, SOCK_ZAPPED);
-	
-	if (sock_flag(osk, SOCK_DBG))
-		sock_set_flag(sk, SOCK_DBG);
+	sock_copy_flags(sk, osk);
 
 	ox25 = x25_sk(osk);
 	x25->t21        = ox25->t21;
@@ -1378,7 +1373,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		}
 
  		default:
-			rc = dev_ioctl(cmd, argp);
+			rc = -ENOIOCTLCMD;
 			break;
 	}
 
@@ -1391,7 +1386,7 @@ static struct net_proto_family x25_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
 	.family =	AF_X25,
 	.owner =	THIS_MODULE,
 	.release =	x25_release,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d19e274b9c4..64a447375fd 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -10,7 +10,7 @@
  * 	YOSHIFUJI Hideaki
  * 		Split up af-specific portion
  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
- * 	
+ *
  */
 
 #include <asm/bug.h>
@@ -256,6 +256,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy)
 	if (del_timer(&policy->timer))
 		BUG();
 
+	security_xfrm_policy_free(policy);
 	kfree(policy);
 }
 EXPORT_SYMBOL(__xfrm_policy_destroy);
@@ -350,7 +351,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
-		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
 				write_unlock_bh(&xfrm_policy_lock);
 				return -EEXIST;
@@ -416,14 +418,15 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
-				      int delete)
+struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+					  struct xfrm_sec_ctx *ctx, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
+		    (xfrm_sec_ctx_match(ctx, pol->security))) {
 			xfrm_pol_hold(pol);
 			if (delete)
 				*p = pol->next;
@@ -438,7 +441,7 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
 	}
 	return pol;
 }
-EXPORT_SYMBOL(xfrm_policy_bysel);
+EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 {
@@ -519,7 +522,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
@@ -533,9 +536,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			continue;
 
 		match = xfrm_selector_match(sel, fl, family);
+
 		if (match) {
-			xfrm_pol_hold(pol);
-			break;
+ 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+				xfrm_pol_hold(pol);
+				break;
+			}
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
@@ -543,15 +549,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 		*obj_refp = &pol->refcnt;
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+static inline int policy_to_flow_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+ 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+ 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+ 		return dir;
+ 	switch (dir) {
+ 	default:
+ 	case XFRM_POLICY_IN:
+ 		return FLOW_DIR_IN;
+ 	case XFRM_POLICY_OUT:
+ 		return FLOW_DIR_OUT;
+ 	case XFRM_POLICY_FWD:
+ 		return FLOW_DIR_FWD;
+	};
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
 {
 	struct xfrm_policy *pol;
 
 	read_lock_bh(&xfrm_policy_lock);
 	if ((pol = sk->sk_policy[dir]) != NULL) {
-		int match = xfrm_selector_match(&pol->selector, fl,
+ 		int match = xfrm_selector_match(&pol->selector, fl,
 						sk->sk_family);
+ 		int err = 0;
+
 		if (match)
+		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+
+ 		if (match && !err)
 			xfrm_pol_hold(pol);
 		else
 			pol = NULL;
@@ -624,6 +652,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 
 	if (newp) {
 		newp->selector = old->selector;
+		if (security_xfrm_policy_clone(old, newp)) {
+			kfree(newp);
+			return NULL;  /* ENOMEM */
+		}
 		newp->lft = old->lft;
 		newp->curlft = old->curlft;
 		newp->action = old->action;
@@ -735,22 +767,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
 	return err;
 }
 
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	};
-}
 
 static int stale_bundle(struct dst_entry *dst);
 
@@ -769,19 +785,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	int err;
 	u32 genid;
 	u16 family = dst_orig->ops->family;
+	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u32 sk_sid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, family,
-					   policy_to_flow_dir(XFRM_POLICY_OUT),
+		policy = flow_cache_lookup(fl, sk_sid, family, dir,
 					   xfrm_policy_lookup);
 	}
 
@@ -962,16 +979,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 {
 	struct xfrm_policy *pol;
 	struct flowi fl;
+	u8 fl_dir = policy_to_flow_dir(dir);
+	u32 sk_sid;
 
 	if (_decode_session(skb, &fl, family) < 0)
 		return 0;
 
+	sk_sid = security_sk_sid(sk, &fl, fl_dir);
+
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
 
 		for (i=skb->sp->len-1; i>=0; i--) {
-		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
+			struct sec_decap_state *xvec = &(skb->sp->x[i]);
 			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
 				return 0;
 
@@ -986,11 +1007,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, family,
-					policy_to_flow_dir(dir),
+		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (!pol)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 479effc9766..e12d0be5f97 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -10,7 +10,7 @@
  * 		Split up af-specific functions
  *	Derek Atkins <derek@ihtfp.com>
  *		Add UDP Encapsulation
- * 	
+ *
  */
 
 #include <linux/workqueue.h>
@@ -70,6 +70,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 		x->type->destructor(x);
 		xfrm_put_type(x->type);
 	}
+	security_xfrm_state_free(x);
 	kfree(x);
 }
 
@@ -343,7 +344,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			      selector.
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
-				if (!xfrm_selector_match(&x->sel, fl, family))
+				if (!xfrm_selector_match(&x->sel, fl, family) ||
+				    !xfrm_sec_ctx_match(pol->security, x->security))
 					continue;
 				if (!best ||
 				    best->km.dying > x->km.dying ||
@@ -354,7 +356,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 				acquire_in_progress = 1;
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
-				if (xfrm_selector_match(&x->sel, fl, family))
+ 				if (xfrm_selector_match(&x->sel, fl, family) &&
+				    xfrm_sec_ctx_match(pol->security, x->security))
 					error = -ESRCH;
 			}
 		}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0cdd9a07e04..ac87a09ba83 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -7,7 +7,7 @@
  * 	Kazunori MIYAZAWA @USAGI
  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
  * 		IPv6 support
- * 	
+ *
  */
 
 #include <linux/module.h>
@@ -88,6 +88,34 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
 	return 0;
 }
 
+
+static inline int verify_sec_ctx_len(struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1];
+	struct xfrm_user_sec_ctx *uctx;
+	int len = 0;
+
+	if (!rt)
+		return 0;
+
+	if (rt->rta_len < sizeof(*uctx))
+		return -EINVAL;
+
+	uctx = RTA_DATA(rt);
+
+	if (uctx->ctx_len > PAGE_SIZE)
+		return -EINVAL;
+
+	len += sizeof(struct xfrm_user_sec_ctx);
+	len += uctx->ctx_len;
+
+	if (uctx->len != len)
+		return -EINVAL;
+
+	return 0;
+}
+
+
 static int verify_newsa_info(struct xfrm_usersa_info *p,
 			     struct rtattr **xfrma)
 {
@@ -145,6 +173,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		goto out;
 	if ((err = verify_encap_tmpl(xfrma)))
 		goto out;
+	if ((err = verify_sec_ctx_len(xfrma)))
+		goto out;
 
 	err = -EINVAL;
 	switch (p->mode) {
@@ -209,6 +239,30 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
 	return 0;
 }
 
+
+static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp)
+{
+	struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+	int len = 0;
+
+	if (xfrm_ctx) {
+		len += sizeof(struct xfrm_user_sec_ctx);
+		len += xfrm_ctx->ctx_len;
+	}
+	return len;
+}
+
+static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
+{
+	struct xfrm_user_sec_ctx *uctx;
+
+	if (!u_arg)
+		return 0;
+
+	uctx = RTA_DATA(u_arg);
+	return security_xfrm_state_alloc(x, uctx);
+}
+
 static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
 {
 	memcpy(&x->id, &p->id, sizeof(x->id));
@@ -253,6 +307,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
 	if (err)
 		goto error;
 
+	if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1])))
+		goto error;
+
 	x->km.seq = p->seq;
 
 	return x;
@@ -272,11 +329,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	int err;
 	struct km_event c;
 
-	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	err = verify_newsa_info(p, (struct rtattr **)xfrma);
 	if (err)
 		return err;
 
-	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err);
 	if (!x)
 		return err;
 
@@ -390,6 +447,19 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	if (x->encap)
 		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
 
+	if (x->security) {
+		int ctx_size = sizeof(struct xfrm_sec_ctx) +
+				x->security->ctx_len;
+		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+		uctx->exttype = XFRMA_SEC_CTX;
+		uctx->len = ctx_size;
+		uctx->ctx_doi = x->security->ctx_doi;
+		uctx->ctx_alg = x->security->ctx_alg;
+		uctx->ctx_len = x->security->ctx_len;
+		memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
+	}
 	nlh->nlmsg_len = skb->tail - b;
 out:
 	sp->this_idx++;
@@ -603,6 +673,18 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 	return verify_policy_dir(p->dir);
 }
 
+static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1];
+	struct xfrm_user_sec_ctx *uctx;
+
+	if (!rt)
+		return 0;
+
+	uctx = RTA_DATA(rt);
+	return security_xfrm_policy_alloc(pol, uctx);
+}
+
 static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 			   int nr)
 {
@@ -681,7 +763,10 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 	}
 
 	copy_from_user_policy(xp, p);
-	err = copy_from_user_tmpl(xp, xfrma);
+
+	if (!(err = copy_from_user_tmpl(xp, xfrma)))
+		err = copy_from_user_sec_ctx(xp, xfrma);
+
 	if (err) {
 		*errp = err;
 		kfree(xp);
@@ -702,8 +787,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	err = verify_newpolicy_info(p);
 	if (err)
 		return err;
+	err = verify_sec_ctx_len((struct rtattr **)xfrma);
+	if (err)
+		return err;
 
-	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err);
 	if (!xp)
 		return err;
 
@@ -714,6 +802,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
 	if (err) {
+		security_xfrm_policy_free(xp);
 		kfree(xp);
 		return err;
 	}
@@ -761,6 +850,27 @@ rtattr_failure:
 	return -1;
 }
 
+static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	if (xp->security) {
+		int ctx_size = sizeof(struct xfrm_sec_ctx) +
+				xp->security->ctx_len;
+		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+		uctx->exttype = XFRMA_SEC_CTX;
+		uctx->len = ctx_size;
+		uctx->ctx_doi = xp->security->ctx_doi;
+		uctx->ctx_alg = xp->security->ctx_alg;
+		uctx->ctx_len = xp->security->ctx_len;
+		memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
+	}
+	return 0;
+
+ rtattr_failure:
+	return -1;
+}
+
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
 {
 	struct xfrm_dump_info *sp = ptr;
@@ -782,6 +892,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	copy_to_user_policy(xp, p, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 out:
@@ -852,8 +964,25 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 
 	if (p->index)
 		xp = xfrm_policy_byid(p->dir, p->index, delete);
-	else
-		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
+	else {
+		struct rtattr **rtattrs = (struct rtattr **)xfrma;
+		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
+		struct xfrm_policy tmp;
+
+		err = verify_sec_ctx_len(rtattrs);
+		if (err)
+			return err;
+
+		memset(&tmp, 0, sizeof(struct xfrm_policy));
+		if (rt) {
+			struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
+				return err;
+		}
+		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
+		security_xfrm_policy_free(&tmp);
+	}
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -1224,6 +1353,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
@@ -1241,6 +1372,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
@@ -1324,6 +1456,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	copy_to_user_policy(xp, &upe->pol, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 	upe->hard = !!hard;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -1341,6 +1475,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;