148 files changed, 2512 insertions, 5197 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e55136ae09f..011cca7ae02 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -456,6 +456,14 @@ config TCP_CONG_BIC
 	increase provides TCP friendliness.
 	See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
 
+config TCP_CONG_CUBIC
+	tristate "CUBIC TCP"
+	default m
+	---help---
+	This is version 2.0 of BIC-TCP which uses a cubic growth function
+	among other techniques.
+	See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+
 config TCP_CONG_WESTWOOD
 	tristate "TCP Westwood+"
 	default m
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f0435d00db6..35e5f599909 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -9,7 +9,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -28,12 +28,13 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
-obj-$(CONFIG_NETFILTER)	+= netfilter/
+obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
 obj-$(CONFIG_IP_VS) += ipvs/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
 obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
 obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d368cf24900..97c276f95b3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -79,6 +79,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/capability.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -93,6 +94,7 @@
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -302,6 +304,7 @@ lookup_protocol:
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
@@ -775,16 +778,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			err = devinet_ioctl(cmd, (void __user *)arg);
 			break;
 		default:
-			if (!sk->sk_prot->ioctl ||
-			    (err = sk->sk_prot->ioctl(sk, cmd, arg)) ==
-			    					-ENOIOCTLCMD)
-				err = dev_ioctl(cmd, (void __user *)arg);
+			if (sk->sk_prot->ioctl)
+				err = sk->sk_prot->ioctl(sk, cmd, arg);
+			else
+				err = -ENOIOCTLCMD;
 			break;
 	}
 	return err;
 }
 
-struct proto_ops inet_stream_ops = {
+const struct proto_ops inet_stream_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -805,7 +808,7 @@ struct proto_ops inet_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet_dgram_ops = {
+const struct proto_ops inet_dgram_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -830,7 +833,7 @@ struct proto_ops inet_dgram_ops = {
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
  * udp_poll
  */
-static struct proto_ops inet_sockraw_ops = {
+static const struct proto_ops inet_sockraw_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -869,7 +872,8 @@ static struct inet_protosw inetsw_array[] =
                 .ops =        &inet_stream_ops,
                 .capability = -1,
                 .no_check =   0,
-                .flags =      INET_PROTOSW_PERMANENT,
+                .flags =      INET_PROTOSW_PERMANENT |
+			      INET_PROTOSW_ICSK,
         },
 
         {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1b..aed537fa2c8 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <asm/scatterlist.h>
 
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d..accdefedfed 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -86,6 +87,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a..95b9d81ac48 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -58,6 +59,7 @@
 #endif
 #include <linux/kmod.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b..73bfcae8af9 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <net/udp.h>
 
 /* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 19b1b984d68..4e3d3811dea 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -20,6 +20,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -30,6 +31,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
@@ -287,13 +289,13 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
 {
 	int i;
 
-	for (i=1; i<=RTA_MAX; i++) {
-		struct rtattr *attr = rta[i-1];
+	for (i=1; i<=RTA_MAX; i++, rta++) {
+		struct rtattr *attr = *rta;
 		if (attr) {
 			if (RTA_PAYLOAD(attr) < 4)
 				return -EINVAL;
 			if (i != RTA_MULTIPATH && i != RTA_METRICS)
-				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
+				*rta = (struct rtattr*)RTA_DATA(attr);
 		}
 	}
 	return 0;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb16..e2890ec8159 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc151..0dd4d06e456 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e..ef4724de735 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
 #include <linux/netlink.h>
 #include <linux/init.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 705e3ce86df..e320b32373e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -41,6 +41,13 @@
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
+ *
+ * Substantial contributions to this work comes from:
+ *
+ *		David S. Miller, <davem@davemloft.net>
+ *		Stephen Hemminger <shemminger@osdl.org>
+ *		Paul E. McKenney <paulmck@us.ibm.com>
+ *		Patrick McHardy <kaber@trash.net>
  */
 
 #define VERSION "0.404"
@@ -59,6 +66,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d..105039eb762 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
@@ -898,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb)
 		u32 _mask, *mp;
 
 		mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
-		if (mp == NULL)
-			BUG();
+		BUG_ON(mp == NULL);
 		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 			if (*mp == ifa->ifa_mask &&
 			    inet_ifa_match(rt->rt_src, ifa))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4a195c724f0..d8ce7133cd8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/times.h>
+
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -231,7 +233,18 @@ static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
 	case IGMPV3_MODE_IS_EXCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
-		return !(pmc->gsquery && !psf->sf_gsresp);
+		if (!(pmc->gsquery && !psf->sf_gsresp)) {
+			if (pmc->sfmode == MCAST_INCLUDE)
+				return 1;
+			/* don't include if this source is excluded
+			 * in all filters
+			 */
+			if (psf->sf_count[MCAST_INCLUDE])
+				return type == IGMPV3_MODE_IS_INCLUDE;
+			return pmc->sfcount[MCAST_EXCLUDE] ==
+				psf->sf_count[MCAST_EXCLUDE];
+		}
+		return 0;
 	case IGMPV3_CHANGE_TO_INCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
@@ -383,7 +396,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	struct igmpv3_report *pih;
 	struct igmpv3_grec *pgr = NULL;
 	struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
-	int scount, first, isquery, truncate;
+	int scount, stotal, first, isquery, truncate;
 
 	if (pmc->multiaddr == IGMP_ALL_HOSTS)
 		return skb;
@@ -393,25 +406,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
 		    type == IGMPV3_CHANGE_TO_EXCLUDE;
 
+	stotal = scount = 0;
+
 	psf_list = sdeleted ? &pmc->tomb : &pmc->sources;
 
-	if (!*psf_list) {
-		if (type == IGMPV3_ALLOW_NEW_SOURCES ||
-		    type == IGMPV3_BLOCK_OLD_SOURCES)
-			return skb;
-		if (pmc->crcount || isquery) {
-			/* make sure we have room for group header and at
-			 * least one source.
-			 */
-			if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)+
-			    sizeof(__u32)) {
-				igmpv3_sendpack(skb);
-				skb = NULL; /* add_grhead will get a new one */
-			}
-			skb = add_grhead(skb, pmc, type, &pgr);
-		}
-		return skb;
-	}
+	if (!*psf_list)
+		goto empty_source;
+
 	pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
@@ -424,7 +425,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 	}
 	first = 1;
-	scount = 0;
 	psf_prev = NULL;
 	for (psf=*psf_list; psf; psf=psf_next) {
 		u32 *psrc;
@@ -458,7 +458,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 		psrc = (u32 *)skb_put(skb, sizeof(u32));
 		*psrc = psf->sf_inaddr;
-		scount++;
+		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
 		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
 			psf->sf_crcount--;
@@ -473,6 +473,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		}
 		psf_prev = psf;
 	}
+
+empty_source:
+	if (!stotal) {
+		if (type == IGMPV3_ALLOW_NEW_SOURCES ||
+		    type == IGMPV3_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->crcount || isquery) {
+			/* make sure we have room for group header */
+			if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) {
+				igmpv3_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+	}
 	if (pgr)
 		pgr->grec_nsrcs = htons(scount);
 
@@ -555,11 +570,11 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 			skb = add_grec(skb, pmc, dtype, 1, 1);
 		}
 		if (pmc->crcount) {
-			pmc->crcount--;
 			if (pmc->sfmode == MCAST_EXCLUDE) {
 				type = IGMPV3_CHANGE_TO_INCLUDE;
 				skb = add_grec(skb, pmc, type, 1, 0);
 			}
+			pmc->crcount--;
 			if (pmc->crcount == 0) {
 				igmpv3_clear_zeros(&pmc->tomb);
 				igmpv3_clear_zeros(&pmc->sources);
@@ -592,12 +607,12 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 
 		/* filter mode changes */
 		if (pmc->crcount) {
-			pmc->crcount--;
 			if (pmc->sfmode == MCAST_EXCLUDE)
 				type = IGMPV3_CHANGE_TO_EXCLUDE;
 			else
 				type = IGMPV3_CHANGE_TO_INCLUDE;
 			skb = add_grec(skb, pmc, type, 0, 0);
+			pmc->crcount--;
 		}
 		spin_unlock_bh(&pmc->lock);
 	}
@@ -733,7 +748,8 @@ static void igmp_timer_expire(unsigned long data)
 	ip_ma_put(im);
 }
 
-static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+/* mark EXCLUDE-mode sources */
+static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 {
 	struct ip_sf_list *psf;
 	int i, scount;
@@ -742,6 +758,37 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 	for (psf=pmc->sources; psf; psf=psf->sf_next) {
 		if (scount == nsrcs)
 			break;
+		for (i=0; i<nsrcs; i++) {
+			/* skip inactive filters */
+			if (pmc->sfcount[MCAST_INCLUDE] ||
+			    pmc->sfcount[MCAST_EXCLUDE] !=
+			    psf->sf_count[MCAST_EXCLUDE])
+				continue;
+			if (srcs[i] == psf->sf_inaddr) {
+				scount++;
+				break;
+			}
+		}
+	}
+	pmc->gsquery = 0;
+	if (scount == nsrcs)	/* all sources excluded */
+		return 0;
+	return 1;
+}
+
+static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+{
+	struct ip_sf_list *psf;
+	int i, scount;
+
+	if (pmc->sfmode == MCAST_EXCLUDE)
+		return igmp_xmarksources(pmc, nsrcs, srcs);
+
+	/* mark INCLUDE-mode sources */
+	scount = 0;
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
 		for (i=0; i<nsrcs; i++)
 			if (srcs[i] == psf->sf_inaddr) {
 				psf->sf_gsresp = 1;
@@ -749,6 +796,12 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 				break;
 			}
 	}
+	if (!scount) {
+		pmc->gsquery = 0;
+		return 0;
+	}
+	pmc->gsquery = 1;
+	return 1;
 }
 
 static void igmp_heard_report(struct in_device *in_dev, u32 group)
@@ -843,6 +896,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	 */
 	read_lock(&in_dev->mc_list_lock);
 	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+		int changed;
+
 		if (group && group != im->multiaddr)
 			continue;
 		if (im->multiaddr == IGMP_ALL_HOSTS)
@@ -852,10 +907,11 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 			im->gsquery = im->gsquery && mark;
 		else
 			im->gsquery = mark;
-		if (im->gsquery)
-			igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
+		changed = !im->gsquery ||
+		    	igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
 		spin_unlock_bh(&im->lock);
-		igmp_mod_timer(im, max_delay);
+		if (changed)
+			igmp_mod_timer(im, max_delay);
 	}
 	read_unlock(&in_dev->mc_list_lock);
 }
@@ -973,7 +1029,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = (struct ip_mc_list *)kmalloc(sizeof(*pmc), GFP_KERNEL);
+	pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
 	if (!pmc)
 		return;
 	memset(pmc, 0, sizeof(*pmc));
@@ -1153,7 +1209,7 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
 		}
 	}
 
-	im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL);
+	im = kmalloc(sizeof(*im), GFP_KERNEL);
 	if (!im)
 		goto out;
 
@@ -1474,7 +1530,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
 		psf_prev = psf;
 	}
 	if (!psf) {
-		psf = (struct ip_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+		psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
 		if (!psf)
 			return -ENOBUFS;
 		memset(psf, 0, sizeof(*psf));
@@ -1508,7 +1564,7 @@ static void sf_markstate(struct ip_mc_list *pmc)
 
 static int sf_setstate(struct ip_mc_list *pmc)
 {
-	struct ip_sf_list *psf;
+	struct ip_sf_list *psf, *dpsf;
 	int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
 	int qrv = pmc->interface->mr_qrv;
 	int new_in, rv;
@@ -1520,8 +1576,46 @@ static int sf_setstate(struct ip_mc_list *pmc)
 				!psf->sf_count[MCAST_INCLUDE];
 		} else
 			new_in = psf->sf_count[MCAST_INCLUDE] != 0;
-		if (new_in != psf->sf_oldin) {
-			psf->sf_crcount = qrv;
+		if (new_in) {
+			if (!psf->sf_oldin) {
+				struct ip_sf_list *prev = 0;
+
+				for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) {
+					if (dpsf->sf_inaddr == psf->sf_inaddr)
+						break;
+					prev = dpsf;
+				}
+				if (dpsf) {
+					if (prev)
+						prev->sf_next = dpsf->sf_next;
+					else
+						pmc->tomb = dpsf->sf_next;
+					kfree(dpsf);
+				}
+				psf->sf_crcount = qrv;
+				rv++;
+			}
+		} else if (psf->sf_oldin) {
+
+			psf->sf_crcount = 0;
+			/*
+			 * add or update "delete" records if an active filter
+			 * is now inactive
+			 */
+			for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next)
+				if (dpsf->sf_inaddr == psf->sf_inaddr)
+					break;
+			if (!dpsf) {
+				dpsf = (struct ip_sf_list *)
+					kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				if (!dpsf)
+					continue;
+				*dpsf = *psf;
+				/* pmc->lock held by callers */
+				dpsf->sf_next = pmc->tomb;
+				pmc->tomb = dpsf;
+			}
+			dpsf->sf_crcount = qrv;
 			rv++;
 		}
 	}
@@ -1657,7 +1751,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	err = -ENOBUFS;
 	if (count >= sysctl_igmp_max_memberships)
 		goto done;
-	iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
+	iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
 	if (iml == NULL)
 		goto done;
 
@@ -1821,8 +1915,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 
 		if (psl)
 			count += psl->sl_max;
-		newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
-			IP_SFLSIZE(count), GFP_KERNEL);
+		newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -1905,8 +1998,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 		goto done;
 	}
 	if (msf->imsf_numsrc) {
-		newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
-				IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL);
+		newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
+							   GFP_KERNEL);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3fe021f1a56..ae20281d8de 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
  */
 int sysctl_local_port_range[2] = { 1024, 4999 };
 
-static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
+int inet_csk_bind_conflict(const struct sock *sk,
+			   const struct inet_bind_bucket *tb)
 {
 	const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
 	struct sock *sk2;
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke
 	return node != NULL;
 }
 
+EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
+
 /* Obtain a reference to a local port for the given sock,
  * if snum is zero it means select any available local port.
  */
 int inet_csk_get_port(struct inet_hashinfo *hashinfo,
-		      struct sock *sk, unsigned short snum)
+		      struct sock *sk, unsigned short snum,
+		      int (*bind_conflict)(const struct sock *sk,
+					   const struct inet_bind_bucket *tb))
 {
 	struct inet_bind_hashbucket *head;
 	struct hlist_node *node;
@@ -125,7 +130,7 @@ tb_found:
 			goto success;
 		} else {
 			ret = 1;
-			if (inet_csk_bind_conflict(sk, tb))
+			if (bind_conflict(sk, tb))
 				goto fail_unlock;
 		}
 	}
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 EXPORT_SYMBOL_GPL(inet_csk_search_req);
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
-				   const unsigned timeout)
+				   unsigned long timeout)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk)
 }
 
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
+
+void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+	const struct inet_sock *inet = inet_sk(sk);
+
+	sin->sin_family		= AF_INET;
+	sin->sin_addr.s_addr	= inet->daddr;
+	sin->sin_port		= inet->dport;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cf..457db99c76d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -50,9 +50,10 @@ static struct sock *idiagnl;
 #define INET_DIAG_PUT(skb, attrtype, attrlen) \
 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
 
-static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
-			int ext, u32 pid, u32 seq, u16 nlmsg_flags,
-			const struct nlmsghdr *unlh)
+static int inet_csk_diag_fill(struct sock *sk,
+			      struct sk_buff *skb,
+			      int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+			      const struct nlmsghdr *unlh)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
 	nlh->nlmsg_flags = nlmsg_flags;
 
 	r = NLMSG_DATA(nlh);
-	if (sk->sk_state != TCP_TIME_WAIT) {
-		if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
-			minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO,
-					      sizeof(*minfo));
-		if (ext & (1 << (INET_DIAG_INFO - 1)))
-			info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
-					   handler->idiag_info_size);
-		
-		if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
-			size_t len = strlen(icsk->icsk_ca_ops->name);
-			strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
-			       icsk->icsk_ca_ops->name);
-		}
+	BUG_ON(sk->sk_state == TCP_TIME_WAIT);
+
+	if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
+		minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
+
+	if (ext & (1 << (INET_DIAG_INFO - 1)))
+		info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
+				     handler->idiag_info_size);
+
+	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+		const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+		strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+		       icsk->icsk_ca_ops->name);
 	}
+
 	r->idiag_family = sk->sk_family;
 	r->idiag_state = sk->sk_state;
 	r->idiag_timer = 0;
@@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
 	r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
 	r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
 
-	if (r->idiag_state == TCP_TIME_WAIT) {
-		const struct inet_timewait_sock *tw = inet_twsk(sk);
-		long tmo = tw->tw_ttd - jiffies;
-		if (tmo < 0)
-			tmo = 0;
-
-		r->id.idiag_sport = tw->tw_sport;
-		r->id.idiag_dport = tw->tw_dport;
-		r->id.idiag_src[0] = tw->tw_rcv_saddr;
-		r->id.idiag_dst[0] = tw->tw_daddr;
-		r->idiag_state = tw->tw_substate;
-		r->idiag_timer = 3;
-		r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
-		r->idiag_rqueue = 0;
-		r->idiag_wqueue = 0;
-		r->idiag_uid = 0;
-		r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-		if (r->idiag_family == AF_INET6) {
-			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
-
-			ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-				       &tcp6tw->tw_v6_rcv_saddr);
-			ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-				       &tcp6tw->tw_v6_daddr);
-		}
-#endif
-		nlh->nlmsg_len = skb->tail - b;
-		return skb->len;
-	}
-
 	r->id.idiag_sport = inet->sport;
 	r->id.idiag_dport = inet->dport;
 	r->id.idiag_src[0] = inet->rcv_saddr;
@@ -185,7 +157,75 @@ nlmsg_failure:
 	return -1;
 }
 
-static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
+			       struct sk_buff *skb, int ext, u32 pid,
+			       u32 seq, u16 nlmsg_flags,
+			       const struct nlmsghdr *unlh)
+{
+	long tmo;
+	struct inet_diag_msg *r;
+	const unsigned char *previous_tail = skb->tail;
+	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
+					 unlh->nlmsg_type, sizeof(*r));
+
+	r = NLMSG_DATA(nlh);
+	BUG_ON(tw->tw_state != TCP_TIME_WAIT);
+
+	nlh->nlmsg_flags = nlmsg_flags;
+
+	tmo = tw->tw_ttd - jiffies;
+	if (tmo < 0)
+		tmo = 0;
+
+	r->idiag_family	      = tw->tw_family;
+	r->idiag_state	      = tw->tw_state;
+	r->idiag_timer	      = 0;
+	r->idiag_retrans      = 0;
+	r->id.idiag_if	      = tw->tw_bound_dev_if;
+	r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
+	r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+	r->id.idiag_sport     = tw->tw_sport;
+	r->id.idiag_dport     = tw->tw_dport;
+	r->id.idiag_src[0]    = tw->tw_rcv_saddr;
+	r->id.idiag_dst[0]    = tw->tw_daddr;
+	r->idiag_state	      = tw->tw_substate;
+	r->idiag_timer	      = 3;
+	r->idiag_expires      = (tmo * 1000 + HZ - 1) / HZ;
+	r->idiag_rqueue	      = 0;
+	r->idiag_wqueue	      = 0;
+	r->idiag_uid	      = 0;
+	r->idiag_inode	      = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (tw->tw_family == AF_INET6) {
+		const struct inet6_timewait_sock *tw6 =
+						inet6_twsk((struct sock *)tw);
+
+		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+			       &tw6->tw_v6_rcv_saddr);
+		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+			       &tw6->tw_v6_daddr);
+	}
+#endif
+	nlh->nlmsg_len = skb->tail - previous_tail;
+	return skb->len;
+nlmsg_failure:
+	skb_trim(skb, previous_tail - skb->data);
+	return -1;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+			const struct nlmsghdr *unlh)
+{
+	if (sk->sk_state == TCP_TIME_WAIT)
+		return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
+					   skb, ext, pid, seq, nlmsg_flags,
+					   unlh);
+	return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+}
+
+static int inet_diag_get_exact(struct sk_buff *in_skb,
+			       const struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock *sk;
@@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl
 	if (!rep)
 		goto out;
 
-	if (inet_diag_fill(rep, sk, req->idiag_ext,
+	if (sk_diag_fill(sk, rep, req->idiag_ext,
 			 NETLINK_CB(in_skb).pid,
 			 nlh->nlmsg_seq, 0, nlh) <= 0)
 		BUG();
@@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
 
 
 static int inet_diag_bc_run(const void *bc, int len,
-			  const struct inet_diag_entry *entry)
+			    const struct inet_diag_entry *entry)
 {
 	while (len > 0) {
 		int yes = 1;
@@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len,
 				yes = 0;
 				break;
 			}
-			
+
 			if (cond->prefix_len == 0)
 				break;
 
@@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len,
 			else
 				addr = entry->daddr;
 
-			if (bitstring_match(addr, cond->addr, cond->prefix_len))
+			if (bitstring_match(addr, cond->addr,
+					    cond->prefix_len))
 				break;
 			if (entry->family == AF_INET6 &&
 			    cond->family == AF_INET) {
@@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len,
 		}
 		}
 
-		if (yes) { 
+		if (yes) {
 			len -= op->yes;
 			bc += op->yes;
 		} else {
@@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 		default:
 			return -EINVAL;
 		}
-		bc += op->yes;
+		bc  += op->yes;
 		len -= op->yes;
 	}
 	return len == 0 ? 0 : -EINVAL;
 }
 
-static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
-			     struct netlink_callback *cb)
+static int inet_csk_diag_dump(struct sock *sk,
+			      struct sk_buff *skb,
+			      struct netlink_callback *cb)
 {
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 
@@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
 			return 0;
 	}
 
-	return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid,
-			    cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+	return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+				  NETLINK_CB(cb->skb).pid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
+			       struct sk_buff *skb,
+			       struct netlink_callback *cb)
+{
+	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+
+	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+		struct inet_diag_entry entry;
+		struct rtattr *bc = (struct rtattr *)(r + 1);
+
+		entry.family = tw->tw_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		if (tw->tw_family == AF_INET6) {
+			struct inet6_timewait_sock *tw6 =
+						inet6_twsk((struct sock *)tw);
+			entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
+			entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+		} else
+#endif
+		{
+			entry.saddr = &tw->tw_rcv_saddr;
+			entry.daddr = &tw->tw_daddr;
+		}
+		entry.sport = tw->tw_num;
+		entry.dport = ntohs(tw->tw_dport);
+		entry.userlocks = 0; 
+
+		if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+			return 0;
+	}
+
+	return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+				   NETLINK_CB(cb->skb).pid,
+				   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
 
 static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
-			    struct request_sock *req,
-			    u32 pid, u32 seq,
-			    const struct nlmsghdr *unlh)
+			      struct request_sock *req, u32 pid, u32 seq,
+			      const struct nlmsghdr *unlh)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *inet = inet_sk(sk);
@@ -489,9 +567,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &tcp6_rsk(req)->loc_addr);
+			       &inet6_rsk(req)->loc_addr);
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &tcp6_rsk(req)->rmt_addr);
+			       &inet6_rsk(req)->rmt_addr);
 	}
 #endif
 	nlh->nlmsg_len = skb->tail - b;
@@ -504,7 +582,7 @@ nlmsg_failure:
 }
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
-			     struct netlink_callback *cb)
+			       struct netlink_callback *cb)
 {
 	struct inet_diag_entry entry;
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -553,13 +631,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 				entry.saddr =
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->loc_addr.s6_addr32 :
+					inet6_rsk(req)->loc_addr.s6_addr32 :
 #endif
 					&ireq->loc_addr;
-				entry.daddr = 
+				entry.daddr =
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->rmt_addr.s6_addr32 :
+					inet6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
 					&ireq->rmt_addr;
 				entry.dport = ntohs(ireq->rmt_port);
@@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	handler = inet_diag_table[cb->nlh->nlmsg_type];
 	BUG_ON(handler == NULL);
 	hashinfo = handler->idiag_hashinfo;
-		
+
 	s_i = cb->args[1];
 	s_num = num = cb->args[2];
 
@@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				    cb->args[3] > 0)
 					goto syn_recv;
 
-				if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+				if (inet_csk_diag_dump(sk, skb, cb) < 0) {
 					inet_listen_unlock(hashinfo);
 					goto done;
 				}
@@ -672,7 +750,6 @@ skip_listen_ht:
 			s_num = 0;
 
 		read_lock_bh(&head->lock);
-
 		num = 0;
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
@@ -684,9 +761,10 @@ skip_listen_ht:
 			if (r->id.idiag_sport != inet->sport &&
 			    r->id.idiag_sport)
 				goto next_normal;
-			if (r->id.idiag_dport != inet->dport && r->id.idiag_dport)
+			if (r->id.idiag_dport != inet->dport &&
+			    r->id.idiag_dport)
 				goto next_normal;
-			if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+			if (inet_csk_diag_dump(sk, skb, cb) < 0) {
 				read_unlock_bh(&head->lock);
 				goto done;
 			}
@@ -695,19 +773,20 @@ next_normal:
 		}
 
 		if (r->idiag_states & TCPF_TIME_WAIT) {
-			sk_for_each(sk, node,
+			struct inet_timewait_sock *tw;
+
+			inet_twsk_for_each(tw, node,
 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
-				struct inet_sock *inet = inet_sk(sk);
 
 				if (num < s_num)
 					goto next_dying;
-				if (r->id.idiag_sport != inet->sport &&
+				if (r->id.idiag_sport != tw->tw_sport &&
 				    r->id.idiag_sport)
 					goto next_dying;
-				if (r->id.idiag_dport != inet->dport &&
+				if (r->id.idiag_dport != tw->tw_dport &&
 				    r->id.idiag_dport)
 					goto next_dying;
-				if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+				if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
 					read_unlock_bh(&head->lock);
 					goto done;
 				}
@@ -724,8 +803,7 @@ done:
 	return skb->len;
 }
 
-static __inline__ int
-inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
 		return 0;
@@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 		return netlink_dump_start(idiagnl, skb, nlh,
 					  inet_diag_dump, NULL);
-	} else {
+	} else
 		return inet_diag_get_exact(skb, nlh);
-	}
 
 err_inval:
 	return -EINVAL;
@@ -766,15 +843,15 @@ err_inval:
 
 static inline void inet_diag_rcv_skb(struct sk_buff *skb)
 {
-	int err;
-	struct nlmsghdr * nlh;
-
 	if (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
-		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+		int err;
+		struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+
+		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		    skb->len < nlh->nlmsg_len)
 			return;
 		err = inet_diag_rcv_msg(skb, nlh);
-		if (err || nlh->nlmsg_flags & NLM_F_ACK) 
+		if (err || nlh->nlmsg_flags & NLM_F_ACK)
 			netlink_ack(skb, nlh, err);
 	}
 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d..33228115cda 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#include <net/ip.h>
 
 /*
  * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
 }
 
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+
+/* called with local bh disabled */
+static int __inet_check_established(struct inet_timewait_death_row *death_row,
+				    struct sock *sk, __u16 lport,
+				    struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	struct inet_sock *inet = inet_sk(sk);
+	u32 daddr = inet->rcv_saddr;
+	u32 saddr = inet->daddr;
+	int dif = sk->sk_bound_dev_if;
+	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		tw = inet_twsk(sk2);
+
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity. */
+	inet->num = lport;
+	inet->sport = htons(lport);
+	sk->sk_hash = hash;
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, 
+					  inet->dport);
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+int inet_hash_connect(struct inet_timewait_death_row *death_row,
+		      struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + inet_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet_check_established(death_row,
+								      sk, port,
+								      &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet_hash(hinfo, sk, 0);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);;
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb  = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+		__inet_hash(hinfo, sk, 0);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a6881..417f126c749 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
-							 SLAB_ATOMIC);
+	struct inet_timewait_sock *tw =
+		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+				 SLAB_ATOMIC);
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924..2160874ce7a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p)
 			/* look for a node to insert instead of p */
 			struct inet_peer *t;
 			t = lookup_rightempty(p);
-			if (*stackptr[-1] != t)
-				BUG();
+			BUG_ON(*stackptr[-1] != t);
 			**--stackptr = t->avl_left;
 			/* t is removed, t->v4daddr > x->v4daddr for any
 			 * x in p->avl_left subtree.
@@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p)
 			t->avl_left = p->avl_left;
 			t->avl_right = p->avl_right;
 			t->avl_height = p->avl_height;
-			if (delp[1] != &p->avl_left)
-				BUG();
+			BUG_ON(delp[1] != &p->avl_left);
 			delp[1] = &t->avl_left; /* was &p->avl_left */
 		}
 		peer_avl_rebalance(stack, stackptr);
@@ -401,6 +399,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
 		return NULL;
 	n->v4daddr = daddr;
 	atomic_set(&n->refcnt, 1);
+	atomic_set(&n->rid, 0);
 	n->ip_id_count = secure_ip_id(daddr);
 	n->tcp_ts_stamp = 0;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48..2a8adda15e1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
  *		Patrick McHardy :	LRU queue of frag heads for evictor.
  */
 
+#include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -38,6 +39,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/inetpeer.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -56,6 +58,8 @@
 int sysctl_ipfrag_high_thresh = 256*1024;
 int sysctl_ipfrag_low_thresh = 192*1024;
 
+int sysctl_ipfrag_max_dist = 64;
+
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
@@ -89,8 +93,10 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	int		iif;
 	struct timeval	stamp;
+	int             iif;
+	unsigned int    rid;
+	struct inet_peer *peer;
 };
 
 /* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
 	BUG_TRAP(qp->last_in&COMPLETE);
 	BUG_TRAP(del_timer(&qp->timer) == 0);
 
+	if (qp->peer)
+		inet_putpeer(qp->peer);
+
 	/* Release all fragment data. */
 	fp = qp->fragments;
 	while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
 	qp->meat = 0;
 	qp->fragments = NULL;
 	qp->iif = 0;
+	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
 	init_timer(&qp->timer);
@@ -373,7 +383,7 @@ out_nomem:
  */
 static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 {
-	__u16 id = iph->id;
+	__be16 id = iph->id;
 	__u32 saddr = iph->saddr;
 	__u32 daddr = iph->daddr;
 	__u8 protocol = iph->protocol;
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	return ip_frag_create(hash, iph, user);
 }
 
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+	struct inet_peer *peer = qp->peer;
+	unsigned int max = sysctl_ipfrag_max_dist;
+	unsigned int start, end;
+
+	int rc;
+
+	if (!peer || !max)
+		return 0;
+
+	start = qp->rid;
+	end = atomic_inc_return(&peer->rid);
+	qp->rid = end;
+
+	rc = qp->fragments && (end - start) > max;
+
+	if (rc) {
+		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	}
+
+	return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+	struct sk_buff *fp;
+
+	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+		atomic_inc(&qp->refcnt);
+		return -ETIMEDOUT;
+	}
+
+	fp = qp->fragments;
+	do {
+		struct sk_buff *xp = fp->next;
+		frag_kfree_skb(fp, NULL);
+		fp = xp;
+	} while (fp);
+
+	qp->last_in = 0;
+	qp->len = 0;
+	qp->meat = 0;
+	qp->fragments = NULL;
+	qp->iif = 0;
+
+	return 0;
+}
+
 /* Add new segment to existing queue. */
 static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (qp->last_in & COMPLETE)
 		goto err;
 
+	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+	    unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+		ipq_kill(qp);
+		goto err;
+	}
+
  	offset = ntohs(skb->nh.iph->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 46f9d9cf7a5..abe23923e4e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -28,6 +29,7 @@
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/ip.h>
@@ -187,7 +189,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
 	}
 
 	if (ipgre_fb_tunnel_dev->flags&IFF_UP)
-		return ipgre_fb_tunnel_dev->priv;
+		return netdev_priv(ipgre_fb_tunnel_dev);
 	return NULL;
 }
 
@@ -277,7 +279,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
 	  return NULL;
 
 	dev->init = ipgre_tunnel_init;
-	nt = dev->priv;
+	nt = netdev_priv(dev);
 	nt->parms = *parms;
 
 	if (register_netdevice(dev) < 0) {
@@ -285,9 +287,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
 		goto failed;
 	}
 
-	nt = dev->priv;
-	nt->parms = *parms;
-
 	dev_hold(dev);
 	ipgre_tunnel_link(nt);
 	return nt;
@@ -298,7 +297,7 @@ failed:
 
 static void ipgre_tunnel_uninit(struct net_device *dev)
 {
-	ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
+	ipgre_tunnel_unlink(netdev_priv(dev));
 	dev_put(dev);
 }
 
@@ -517,7 +516,7 @@ out:
 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 		rel_info = htonl(rel_info);
 	} else if (type == ICMP_TIME_EXCEEDED) {
-		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
 			rel_type = ICMP_DEST_UNREACH;
 			rel_code = ICMP_HOST_UNREACH;
@@ -668,7 +667,7 @@ drop_nolock:
 
 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *old_iph = skb->nh.iph;
 	struct iphdr  *tiph;
@@ -831,6 +830,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, gre_hlen);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
@@ -913,7 +913,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			t = ipgre_tunnel_locate(&p, 0);
 		}
 		if (t == NULL)
-			t = (struct ip_tunnel*)dev->priv;
+			t = netdev_priv(dev);
 		memcpy(&p, &t->parms, sizeof(p));
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 			err = -EFAULT;
@@ -953,7 +953,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			} else {
 				unsigned nflags=0;
 
-				t = (struct ip_tunnel*)dev->priv;
+				t = netdev_priv(dev);
 
 				if (MULTICAST(p.iph.daddr))
 					nflags = IFF_BROADCAST;
@@ -1002,7 +1002,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t == ipgre_fb_tunnel_dev->priv)
+			if (t == netdev_priv(ipgre_fb_tunnel_dev))
 				goto done;
 			dev = t->dev;
 		}
@@ -1019,12 +1019,12 @@ done:
 
 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
 {
-	return &(((struct ip_tunnel*)dev->priv)->stat);
+	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 }
 
 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
 		return -EINVAL;
 	dev->mtu = new_mtu;
@@ -1064,7 +1064,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 			void *daddr, void *saddr, unsigned len)
 {
-	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
 	u16 *p = (u16*)(iph+1);
 
@@ -1091,7 +1091,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
 
 static int ipgre_open(struct net_device *dev)
 {
-	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *t = netdev_priv(dev);
 
 	if (MULTICAST(t->parms.iph.daddr)) {
 		struct flowi fl = { .oif = t->parms.link,
@@ -1115,7 +1115,7 @@ static int ipgre_open(struct net_device *dev)
 
 static int ipgre_close(struct net_device *dev)
 {
-	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *t = netdev_priv(dev);
 	if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
 		struct in_device *in_dev = inetdev_by_index(t->mlink);
 		if (in_dev) {
@@ -1140,7 +1140,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_IPGRE;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
-	dev->mtu		= 1500 - sizeof(struct iphdr) - 4;
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -1152,10 +1152,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
 	int hlen = LL_MAX_HEADER;
-	int mtu = 1500;
+	int mtu = ETH_DATA_LEN;
 	int addend = sizeof(struct iphdr) + 4;
 
-	tunnel = (struct ip_tunnel*)dev->priv;
+	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
@@ -1219,7 +1219,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 
 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0..18d7fad474d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
 #include <linux/sockios.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
@@ -184,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
 					raw_rcv(last, skb2);
 			}
 			last = sk;
-			nf_reset(skb);
 		}
 	}
 
@@ -203,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 
 	__skb_pull(skb, ihl);
 
-	/* Free reference early: we don't need it any more, and it may
-           hold ip_conntrack module loaded indefinitely. */
-	nf_reset(skb);
-
         /* Point into the IP datagram, just past the header. */
         skb->h.raw = skb->data;
 
@@ -231,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
 			int ret;
 
-			if (!ipprot->no_policy &&
-			    !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-				kfree_skb(skb);
-				goto out;
+			if (!ipprot->no_policy) {
+				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+					kfree_skb(skb);
+					goto out;
+				}
+				nf_reset(skb);
 			}
 			ret = ipprot->handler(skb);
 			if (ret < 0) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b..9bebad07bf2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
  *		
  */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <asm/uaccess.h>
@@ -22,6 +23,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
+#include <net/route.h>
 
 /* 
  * Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd39..3324fbfe528 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,6 +69,7 @@
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
+#include <net/xfrm.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/arp.h>
@@ -85,6 +86,8 @@
 
 int sysctl_ip_default_ttl = IPDEFTTL;
 
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
 {
@@ -202,13 +205,16 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 
 static inline int ip_finish_output(struct sk_buff *skb)
 {
-	struct net_device *dev = skb->dst->dev;
-
-	skb->dev = dev;
-	skb->protocol = htons(ETH_P_IP);
-
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
-		       ip_finish_output2);
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+	/* Policy lookup after SNAT yielded a new policy */
+	if (skb->dst->xfrm != NULL)
+		return xfrm4_output_finish(skb);
+#endif
+	if (skb->len > dst_mtu(skb->dst) &&
+	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+		return ip_fragment(skb, ip_finish_output2);
+	else
+		return ip_finish_output2(skb);
 }
 
 int ip_mc_output(struct sk_buff *skb)
@@ -265,21 +271,21 @@ int ip_mc_output(struct sk_buff *skb)
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	if (skb->len > dst_mtu(&rt->u.dst))
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+		       ip_finish_output);
 }
 
 int ip_output(struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dst->dev;
+
 	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 
-	if (skb->len > dst_mtu(skb->dst) &&
-		!(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_IP);
+
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		       ip_finish_output);
 }
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -411,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
  *	single device frame, and queue such a frame for sending.
  */
 
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 {
 	struct iphdr *iph;
 	int raw = 0;
@@ -420,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	struct sk_buff *skb2;
 	unsigned int mtu, hlen, left, len, ll_rs;
 	int offset;
-	int not_last_frag;
+	__be16 not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	int err = 0;
 
@@ -445,6 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
 	 * some transformers could create wrong frag_list or break existing
@@ -1181,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk)
 	struct ip_options *opt = NULL;
 	struct rtable *rt = inet->cork.rt;
 	struct iphdr *iph;
-	int df = 0;
+	__be16 df = 0;
 	__u8 ttl;
 	int err = 0;
 
@@ -1392,7 +1399,6 @@ void __init ip_init(void)
 #endif
 }
 
-EXPORT_SYMBOL(ip_fragment);
 EXPORT_SYMBOL(ip_generic_getfrag);
 EXPORT_SYMBOL(ip_queue_xmit);
 EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d8725730..2bf8d782f67 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,12 +25,12 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
-#include <net/tcp.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <linux/udp.h>
 #include <linux/igmp.h>
 #include <linux/netfilter.h>
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 			err = ip_options_get_from_user(&opt, optval, optlen);
 			if (err)
 				break;
-			if (sk->sk_type == SOCK_STREAM) {
-				struct tcp_sock *tp = tcp_sk(sk);
+			if (inet->is_icsk) {
+				struct inet_connection_sock *icsk = inet_csk(sk);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				if (sk->sk_family == PF_INET ||
 				    (!((1 << sk->sk_state) &
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				     inet->daddr != LOOPBACK4_IPV6)) {
 #endif
 					if (inet->opt)
-						tp->ext_header_len -= inet->opt->optlen;
+						icsk->icsk_ext_hdr_len -= inet->opt->optlen;
 					if (opt)
-						tp->ext_header_len += opt->optlen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+						icsk->icsk_ext_hdr_len += opt->optlen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				}
 #endif
@@ -621,7 +621,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				err = -ENOBUFS;
 				break;
 			}
-			msf = (struct ip_msfilter *)kmalloc(optlen, GFP_KERNEL);
+			msf = kmalloc(optlen, GFP_KERNEL);
 			if (msf == 0) {
 				err = -ENOBUFS;
 				break;
@@ -778,7 +778,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				err = -ENOBUFS;
 				break;
 			}
-			gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+			gsf = kmalloc(optlen,GFP_KERNEL);
 			if (gsf == 0) {
 				err = -ENOBUFS;
 				break;
@@ -798,7 +798,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				goto mc_msf_out;
 			}
 			msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
-			msf = (struct ip_msfilter *)kmalloc(msize,GFP_KERNEL);
+			msf = kmalloc(msize,GFP_KERNEL);
 			if (msf == 0) {
 				err = -ENOBUFS;
 				goto mc_msf_out;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b4..d64e2ec8da7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
+#include <net/protocol.h>
 
 struct ipcomp_tfms {
 	struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d..bb3613ec448 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/if.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
@@ -58,6 +59,7 @@
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/ipconfig.h>
+#include <net/route.h>
 
 #include <asm/uaccess.h>
 #include <net/checksum.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c05c1df0bb0..e5cbe72c6b8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -93,6 +93,7 @@
  */
 
  
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -108,6 +109,7 @@
 #include <linux/mroute.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/ip.h>
@@ -243,7 +245,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
 	if (dev == NULL)
 		return NULL;
 
-	nt = dev->priv;
+	nt = netdev_priv(dev);
 	SET_MODULE_OWNER(dev);
 	dev->init = ipip_tunnel_init;
 	nt->parms = *parms;
@@ -268,7 +270,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
 		tunnels_wc[0] = NULL;
 		write_unlock_bh(&ipip_lock);
 	} else
-		ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
+		ipip_tunnel_unlink(netdev_priv(dev));
 	dev_put(dev);
 }
 
@@ -442,7 +444,7 @@ out:
 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 		rel_info = htonl(rel_info);
 	} else if (type == ICMP_TIME_EXCEEDED) {
-		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
 			rel_type = ICMP_DEST_UNREACH;
 			rel_code = ICMP_HOST_UNREACH;
@@ -513,7 +515,7 @@ out:
 
 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	u8     tos = tunnel->parms.iph.tos;
@@ -620,6 +622,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
@@ -672,7 +675,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			t = ipip_tunnel_locate(&p, 0);
 		}
 		if (t == NULL)
-			t = (struct ip_tunnel*)dev->priv;
+			t = netdev_priv(dev);
 		memcpy(&p, &t->parms, sizeof(p));
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 			err = -EFAULT;
@@ -709,7 +712,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 					err = -EINVAL;
 					break;
 				}
-				t = (struct ip_tunnel*)dev->priv;
+				t = netdev_priv(dev);
 				ipip_tunnel_unlink(t);
 				t->parms.iph.saddr = p.iph.saddr;
 				t->parms.iph.daddr = p.iph.daddr;
@@ -763,7 +766,7 @@ done:
 
 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 {
-	return &(((struct ip_tunnel*)dev->priv)->stat);
+	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 }
 
 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -786,7 +789,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_TUNNEL;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
@@ -798,7 +801,7 @@ static int ipip_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
 
-	tunnel = (struct ip_tunnel*)dev->priv;
+	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
@@ -836,7 +839,7 @@ static int ipip_tunnel_init(struct net_device *dev)
 
 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 
 	tunnel->dev = dev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c..5c94c222e3f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -33,6 +33,7 @@
 #include <asm/uaccess.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
 #include <linux/mm.h>
@@ -49,9 +50,11 @@
 #include <linux/seq_file.h>
 #include <linux/mroute.h>
 #include <linux/init.h>
+#include <linux/if_ether.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
+#include <net/route.h>
 #include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
@@ -176,8 +179,8 @@ static int reg_vif_num = -1;
 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	read_lock(&mrt_lock);
-	((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
-	((struct net_device_stats*)dev->priv)->tx_packets++;
+	((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
+	((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
 	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
@@ -186,13 +189,13 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 {
-	return (struct net_device_stats*)dev->priv;
+	return (struct net_device_stats*)netdev_priv(dev);
 }
 
 static void reg_vif_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_PIMREG;
-	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->hard_start_xmit	= reg_vif_xmit;
 	dev->get_stats		= reg_vif_get_stats;
@@ -1147,8 +1150,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	if (vif->flags & VIFF_REGISTER) {
 		vif->pkt_out++;
 		vif->bytes_out+=skb->len;
-		((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
-		((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+		((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
+		((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
 		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
 		kfree_skb(skb);
 		return;
@@ -1208,8 +1211,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	if (vif->flags & VIFF_TUNNEL) {
 		ip_encap(skb, vif->local, vif->remote);
 		/* FIXME: extra output firewall step used to be here. --RR */
-		((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
-		((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
+		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
+		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
 	}
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1465,8 +1468,8 @@ int pim_rcv_v1(struct sk_buff * skb)
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
 	skb->dst = NULL;
-	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
-	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
 	nf_reset(skb);
 	netif_rx(skb);
 	dev_put(reg_dev);
@@ -1520,8 +1523,8 @@ static int pim_rcv(struct sk_buff * skb)
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
-	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
-	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
 	skb->dst = NULL;
 	nf_reset(skb);
 	netif_rx(skb);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d7eb680101c..9b176a942ac 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 }
 
 
-#if 0000
-/*
- *	Get reference to app by name (called from user context)
- */
-struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
-{
-	struct ip_vs_app *app, *a = NULL;
-
-	down(&__ip_vs_app_mutex);
-
-	list_for_each_entry(ent, &ip_vs_app_list, a_list) {
-		if (strcmp(app->name, appname))
-			continue;
-
-		/* softirq may call ip_vs_app_get too, so the caller
-		   must disable softirq on the current CPU */
-		if (ip_vs_app_get(app))
-			a = app;
-		break;
-	}
-
-	up(&__ip_vs_app_mutex);
-
-	return a;
-}
-#endif
-
-
 /*
  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c65..87b83813cf2 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,11 @@
  *
  */
 
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>		/* for proc_net_* */
 #include <linux/seq_file.h>
@@ -219,7 +223,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
 		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
 
-	IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -254,7 +258,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
   out:
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -295,7 +299,7 @@ struct ip_vs_conn *ip_vs_conn_out_get
 
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+	IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
 		  ip_vs_proto_name(protocol),
 		  NIPQUAD(s_addr), ntohs(s_port),
 		  NIPQUAD(d_addr), ntohs(d_port),
@@ -391,8 +395,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	cp->flags |= atomic_read(&dest->conn_flags);
 	cp->dest = dest;
 
-	IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+	IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -430,8 +435,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 	if (!dest)
 		return;
 
-	IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+	IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -571,7 +577,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	ip_vs_conn_hash(cp);
 
   expire_later:
-	IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
+	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
 		  atomic_read(&cp->refcnt)-1,
 		  atomic_read(&cp->n_control));
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1a0843cd58a..3f47ad8e1ca 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 
 	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-		  "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+		  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
 		  ip_vs_fwd_tag(cp),
 		  NIPQUAD(cp->caddr), ntohs(cp->cport),
 		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -532,11 +532,8 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
 {
 	if (!((*pskb)->ipvs_property))
 		return NF_ACCEPT;
-
 	/* The packet was sent from IPVS, exit this chain */
-	(*okfn)(*pskb);
-
-	return NF_STOLEN;
+	return NF_STOP;
 }
 
 u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760..7f0288b25fa 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
@@ -35,6 +36,7 @@
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip.h>
+#include <net/route.h>
 #include <net/sock.h>
 
 #include <asm/uaccess.h>
@@ -447,7 +449,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
   out:
 	read_unlock(&__ip_vs_svc_lock);
 
-	IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+	IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
 		  fwmark, ip_vs_proto_name(protocol),
 		  NIPQUAD(vaddr), ntohs(vport),
 		  svc?"hit":"not hit");
@@ -597,7 +599,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
 	 */
 	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
-			  "refcnt=%d\n",
+			  "dest->refcnt=%d\n",
 			  dest->vfwmark,
 			  NIPQUAD(dest->addr), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
@@ -804,7 +806,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 	dest = ip_vs_trash_get_dest(svc, daddr, dport);
 	if (dest != NULL) {
 		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
-			  "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+			  "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
 			  NIPQUAD(daddr), ntohs(dport),
 			  atomic_read(&dest->refcnt),
 			  dest->vfwmark,
@@ -949,7 +951,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 		atomic_dec(&dest->svc->refcnt);
 		kfree(dest);
 	} else {
-		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
+			  "dest->refcnt=%d\n",
 			  NIPQUAD(dest->addr), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
 		list_add(&dest->n_list, &ip_vs_dest_trash);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce9..9fee19c4c61 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa..c453e1e57f4 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,8 +13,12 @@
  * Changes:
  *
  */
+#include <linux/config.h>
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/interrupt.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 561cda326fa..6e5cb92a5c8 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
  * me to write this module.
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
 }
 
 
-#if 0000
-/*
- *	Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
- *	returns bool success.
- */
-static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
-			     struct ip_vs_lblc_entry *en)
-{
-	if (list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	/*
-	 * Remove it from the table
-	 */
-	write_lock(&tbl->lock);
-	list_del(&en->list);
-	INIT_LIST_HEAD(&en->list);
-	write_unlock(&tbl->lock);
-
-	return 1;
-}
-#endif
-
-
 /*
  *  Get ip_vs_lblc_entry associated with supplied parameters.
  */
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index ce456dbf09a..32ba37ba72d 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 }
 
 
-#if 0000
-/*
- *	Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
- *	returns bool success.
- */
-static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
-			     struct ip_vs_lblcr_entry *en)
-{
-	if (list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	/*
-	 * Remove it from the table
-	 */
-	write_lock(&tbl->lock);
-	list_del(&en->list);
-	INIT_LIST_HEAD(&en->list);
-	write_unlock(&tbl->lock);
-
-	return 1;
-}
-#endif
-
-
 /*
  *  Get ip_vs_lblcr_entry associated with supplied parameters.
  */
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd..8b0505b0931 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8..c36ccf057a1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 0e878fd6215..bc28b1160a3 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_LAST]		=	2*HZ,
 };
 
-
-#if 0
-
-/* FIXME: This is going to die */
-
-static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
-	[IP_VS_TCP_S_NONE]		=	2*HZ,
-	[IP_VS_TCP_S_ESTABLISHED]	=	8*60*HZ,
-	[IP_VS_TCP_S_SYN_SENT]		=	60*HZ,
-	[IP_VS_TCP_S_SYN_RECV]		=	10*HZ,
-	[IP_VS_TCP_S_FIN_WAIT]		=	60*HZ,
-	[IP_VS_TCP_S_TIME_WAIT]		=	60*HZ,
-	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
-	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
-	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
-	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
-	[IP_VS_TCP_S_SYNACK]		=	100*HZ,
-	[IP_VS_TCP_S_LAST]		=	2*HZ,
-};
-
-#endif
-
 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	"NONE",
 	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		struct ip_vs_dest *dest = cp->dest;
 
 		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
-			  "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
+			  "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
 			  pp->name,
 			  (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
 			  th->syn? 'S' : '.',
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aef..89d9175d8f2 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/kernel.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 0f7c56a225b..8bc42b76223 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
+#include <linux/interrupt.h>
 #include <asm/string.h>
 #include <linux/kmod.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a3..7775e6cc68b 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d806..1bca714bda3 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/inetdevice.h>
 #include <linux/net.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049c..52c12e9edbb 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct net_device *tdev;		/* Device to other host */
 	struct iphdr  *old_iph = skb->nh.iph;
 	u8     tos = old_iph->tos;
-	u16    df = old_iph->frag_off;
+	__be16 df = old_iph->frag_off;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5..52a3d7c5790 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,17 +1,11 @@
 /* IPv4 specific functions of netfilter core */
-
-#include <linux/config.h>
-#ifdef CONFIG_NETFILTER
-
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
-
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/route.h>
 #include <linux/ip.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +27,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
 #endif
-		fl.proto = iph->protocol;
 		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
@@ -60,6 +53,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
 	if ((*pskb)->dst->error)
 		return -1;
 
+#ifdef CONFIG_XFRM
+	if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
+		if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+			return -1;
+#endif
+
 	/* Change in oif may mean change in hh_len. */
 	hh_len = (*pskb)->dst->dev->hard_header_len;
 	if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +78,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
+void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(ip_nat_decode_session);
+
 /*
  * Extra routing may needed on local out, as the QUEUE target never
  * returns control to the table.
@@ -135,5 +138,3 @@ static void fini(void)
 
 module_init(init);
 module_exit(fini);
-
-#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 88a60650e6b..db783036e4d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -182,6 +182,7 @@ config IP_NF_QUEUE
 
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
+	depends on NETFILTER_XTABLES
 	help
 	  iptables is a general, extensible packet identification framework.
 	  The packet filtering and full NAT (masquerading, port forwarding,
@@ -191,16 +192,6 @@ config IP_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The matches.
-config IP_NF_MATCH_LIMIT
-	tristate "limit match support"
-	depends on IP_NF_IPTABLES
-	help
-	  limit matching allows you to control the rate at which a rule can be
-	  matched: mainly useful in combination with the LOG target ("LOG
-	  target support", below) and to avoid some Denial of Service attacks.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_IPRANGE
 	tristate "IP range match support"
 	depends on IP_NF_IPTABLES
@@ -210,37 +201,6 @@ config IP_NF_MATCH_IPRANGE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_MAC
-	tristate "MAC address match support"
-	depends on IP_NF_IPTABLES
-	help
-	  MAC matching allows you to match packets based on the source
-	  Ethernet address of the packet.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_PKTTYPE
-	tristate "Packet type match support"
-	depends on IP_NF_IPTABLES
-	help
-	  Packet type matching allows you to match a packet by
-	  its "class", eg. BROADCAST, MULTICAST, ...
-
-	  Typical usage:
-	  iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_MARK
-	tristate "netfilter MARK match support"
-	depends on IP_NF_IPTABLES
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_MULTIPORT
 	tristate "Multiple port match support"
 	depends on IP_NF_IPTABLES
@@ -301,15 +261,6 @@ config IP_NF_MATCH_AH_ESP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_LENGTH
-	tristate "LENGTH match support"
-	depends on IP_NF_IPTABLES
-	help
-	  This option allows you to match the length of a packet against a
-	  specific value or range of values.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_TTL
 	tristate "TTL match support"
 	depends on IP_NF_IPTABLES
@@ -319,50 +270,6 @@ config IP_NF_MATCH_TTL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_TCPMSS
-	tristate "tcpmss match support"
-	depends on IP_NF_IPTABLES
-	help
-	  This option adds a `tcpmss' match, which allows you to examine the
-	  MSS value of TCP SYN packets, which control the maximum packet size
-	  for that connection.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_HELPER
-	tristate "Helper match support"
-	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  Helper matching allows you to match packets in dynamic connections
-	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_MATCH_STATE
-	tristate "Connection state match support"
-	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  Connection state matching allows you to match packets based on their
-	  relationship to a tracked connection (ie. previous packets).  This
-	  is a powerful tool for packet classification.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_MATCH_CONNTRACK
-	tristate "Connection tracking match support"
-	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  This is a general conntrack match module, a superset of the state match.
-
-	  It allows matching on additional conntrack information, which is
-	  useful in complex configurations, such as NAT gateways with multiple
-	  internet links or tunnels.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_OWNER
 	tristate "Owner match support"
 	depends on IP_NF_IPTABLES
@@ -372,15 +279,6 @@ config IP_NF_MATCH_OWNER
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_PHYSDEV
-	tristate "Physdev match support"
-	depends on IP_NF_IPTABLES && BRIDGE_NETFILTER
-	help
-	  Physdev packet matching matches against the physical bridge ports
-	  the IP packet arrived on or will leave by.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_ADDRTYPE
 	tristate  'address type match support'
 	depends on IP_NF_IPTABLES
@@ -391,75 +289,6 @@ config IP_NF_MATCH_ADDRTYPE
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
-config IP_NF_MATCH_REALM
-	tristate  'realm match support'
-	depends on IP_NF_IPTABLES
-	select NET_CLS_ROUTE
-	help
-	  This option adds a `realm' match, which allows you to use the realm
-	  key from the routing subsystem inside iptables.
-	
-	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
-	  in tc world.
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_SCTP
-	tristate  'SCTP protocol match support'
-	depends on IP_NF_IPTABLES
-	help
-	  With this option enabled, you will be able to use the iptables
-	  `sctp' match in order to match on SCTP source/destination ports
-	  and SCTP chunk types.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_DCCP
-	tristate  'DCCP protocol match support'
-	depends on IP_NF_IPTABLES
-	help
-	  With this option enabled, you will be able to use the iptables
-	  `dccp' match in order to match on DCCP source/destination ports
-	  and DCCP flags.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_COMMENT
-	tristate  'comment match support'
-	depends on IP_NF_IPTABLES
-	help
-	  This option adds a `comment' dummy-match, which allows you to put
-	  comments in your iptables ruleset.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_MATCH_CONNMARK
-	tristate  'Connection mark match support'
-	depends on IP_NF_IPTABLES
-	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
-	help
-	  This option adds a `connmark' match, which allows you to match the
-	  connection mark value previously set for the session by `CONNMARK'. 
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  The module will be called
-	  ipt_connmark.o.  If unsure, say `N'.
-
-config IP_NF_MATCH_CONNBYTES
-	tristate  'Connection byte/packet counter match support'
-	depends on IP_NF_IPTABLES
-	depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
-	help
-	  This option adds a `connbytes' match, which allows you to match the
-	  number of bytes and/or packets for each direction within a connection.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
 config IP_NF_MATCH_HASHLIMIT
 	tristate  'hashlimit match support'
 	depends on IP_NF_IPTABLES
@@ -474,18 +303,15 @@ config IP_NF_MATCH_HASHLIMIT
 	  destination IP' or `500pps from any given source IP'  with a single
 	  IPtables rule.
 
-config IP_NF_MATCH_STRING
-	tristate  'string match support'
-	depends on IP_NF_IPTABLES 
-	select TEXTSEARCH
-	select TEXTSEARCH_KMP
-	select TEXTSEARCH_BM
-	select TEXTSEARCH_FSM
-	help
-	  This option adds a `string' match, which allows you to look for
-	  pattern matchings in packets.
+config IP_NF_MATCH_POLICY
+       tristate "IPsec policy match support"
+       depends on IP_NF_IPTABLES && XFRM
+       help
+         Policy matching allows you to match packets based on the
+         IPsec policy that was used during decapsulation/will
+         be used during encapsulation.
 
-	  To compile it as a module, choose M here.  If unsure, say N.
+         To compile it as a module, choose M here.  If unsure, say N.
 
 # `filter', generic and specific targets
 config IP_NF_FILTER
@@ -562,17 +388,6 @@ config IP_NF_TARGET_TCPMSS
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_NFQUEUE
-	tristate "NFQUEUE Target Support"
-	depends on IP_NF_IPTABLES
-	help
-	  This Target replaced the old obsolete QUEUE target.
-
-	  As opposed to QUEUE, it supports 65535 different queues,
-	  not just one.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # NAT + specific targets
 config IP_NF_NAT
 	tristate "Full NAT"
@@ -725,31 +540,6 @@ config IP_NF_TARGET_DSCP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_MARK
-	tristate "MARK target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_TARGET_CLASSIFY
-	tristate "CLASSIFY target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `CLASSIFY' target, which enables the user to set
-	  the priority of a packet. Some qdiscs can use this value for
-	  classification, among these are:
-
-  	  atm, cbq, dsmark, pfifo_fast, htb, prio
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_TARGET_TTL
 	tristate  'TTL target support'
 	depends on IP_NF_MANGLE
@@ -764,19 +554,6 @@ config IP_NF_TARGET_TTL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_CONNMARK
-	tristate  'CONNMARK target support'
-	depends on IP_NF_MANGLE
-	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
-	help
-	  This option adds a `CONNMARK' target, which allows one to manipulate
-	  the connection mark value.  Similar to the MARK target, but
-	  affects the connection mark value rather than the packet mark value.
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  The module will be called
-	  ipt_CONNMARK.o.  If unsure, say `N'.
-
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
 	depends on IP_NF_MANGLE && EXPERIMENTAL
@@ -800,23 +577,10 @@ config IP_NF_RAW
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
-config IP_NF_TARGET_NOTRACK
-	tristate  'NOTRACK target support'
-	depends on IP_NF_RAW
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	help
-	  The NOTRACK target allows a select rule to specify
-	  which packets *not* to enter the conntrack/NAT
-	  subsystem with all the consequences (no ICMP error tracking,
-	  no protocol helpers for the selected packets).
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-
 # ARP tables
 config IP_NF_ARPTABLES
 	tristate "ARP tables support"
+	depends on NETFILTER_XTABLES
 	help
 	  arptables is a general, extensible packet identification framework.
 	  The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258f..e5c5b3202f0 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
 
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
 
 # conntrack netlink interface
 obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,19 +42,12 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
 # matches
-obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
-obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
 obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
-obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
-obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o
-obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
-obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
-obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
 obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
 obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
 obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
@@ -61,39 +55,25 @@ obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
 obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
-obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
-obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
-obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
-obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
-obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o
-obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
-obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
-obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
-obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
-obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
+obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
 
 # targets
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
 obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
-obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
 obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
 obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
-obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
-obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
-obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
 
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba..afe3d8f8177 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <linux/capability.h>
 #include <linux/if_arp.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
@@ -23,6 +24,7 @@
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -54,33 +56,9 @@ do {								\
 #else
 #define ARP_NF_ASSERT(x)
 #endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
-static DECLARE_MUTEX(arpt_mutex);
-
-#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
 #include <linux/netfilter_ipv4/listhelp.h>
 
-struct arpt_table_info {
-	unsigned int size;
-	unsigned int number;
-	unsigned int initial_entries;
-	unsigned int hook_entry[NF_ARP_NUMHOOKS];
-	unsigned int underflow[NF_ARP_NUMHOOKS];
-	char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
-};
-
-static LIST_HEAD(arpt_target);
-static LIST_HEAD(arpt_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      char *hdr_addr, int len)
 {
@@ -227,9 +205,9 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 }
 
 static unsigned int arpt_error(struct sk_buff **pskb,
-			       unsigned int hooknum,
 			       const struct net_device *in,
 			       const struct net_device *out,
+			       unsigned int hooknum,
 			       const void *targinfo,
 			       void *userinfo)
 {
@@ -258,6 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
+	struct xt_table_info *private = table->private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -269,11 +248,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	outdev = out ? out->name : nulldevname;
 
 	read_lock_bh(&table->lock);
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private,
-			       smp_processor_id());
-	e = get_entry(table_base, table->private->hook_entry[hook]);
-	back = get_entry(table_base, table->private->underflow[hook]);
+	table_base = (void *)private->entries[smp_processor_id()];
+	e = get_entry(table_base, private->hook_entry[hook]);
+	back = get_entry(table_base, private->underflow[hook]);
 
 	arp = (*pskb)->nh.arph;
 	do {
@@ -321,8 +298,8 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 				 * abs. verdicts
 				 */
 				verdict = t->u.kernel.target->target(pskb,
-								     hook,
 								     in, out,
+								     hook,
 								     t->data,
 								     userdata);
 
@@ -347,106 +324,6 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 		return verdict;
 }
 
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-static inline struct arpt_table *find_table_lock(const char *name)
-{
-	struct arpt_table *t;
-
-	if (down_interruptible(&arpt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &arpt_tables, list)
-		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
-			return t;
-	up(&arpt_mutex);
-	return NULL;
-}
-
-
-/* Find target, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct arpt_target *find_target(const char *name, u8 revision)
-{
-	struct arpt_target *t;
-	int err = 0;
-
-	if (down_interruptible(&arpt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &arpt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision == revision) {
-				if (try_module_get(t->me)) {
-					up(&arpt_mutex);
-					return t;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&arpt_mutex);
-	return ERR_PTR(err);
-}
-
-struct arpt_target *arpt_find_target(const char *name, u8 revision)
-{
-	struct arpt_target *target;
-
-	target = try_then_request_module(find_target(name, revision),
-					 "arpt_%s", name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct arpt_target *t;
-	int have_rev = 0;
-
-	list_for_each_entry(t, &arpt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision > *bestp)
-				*bestp = t->revision;
-			if (t->revision == revision)
-				have_rev =1;
-		}
-	}
-	return have_rev;
-}
-
-/* Returns true or false (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
-				int (*revfn)(const char *, u8, int *),
-				int *err)
-{
-	int have_rev, best = -1;
-
-	if (down_interruptible(&arpt_mutex) != 0) {
-		*err = -EINTR;
-		return 1;
-	}
-	have_rev = revfn(name, revision, &best);
-	up(&arpt_mutex);
-
-	/* Nothing at all?  Return 0 to try loading module. */
-	if (best == -1) {
-		*err = -ENOENT;
-		return 0;
-	}
-
-	*err = best;
-	if (!have_rev)
-		*err = -EPROTONOSUPPORT;
-	return 1;
-}
-
-
 /* All zeroes == unconditional rule. */
 static inline int unconditional(const struct arpt_arp *arp)
 {
@@ -462,7 +339,8 @@ static inline int unconditional(const struct arpt_arp *arp)
 /* Figures out from what hook each rule can be called: returns 0 if
  * there are loops.  Puts hook bitmask in comefrom.
  */
-static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
+static int mark_source_chains(struct xt_table_info *newinfo,
+			      unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -472,7 +350,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 	for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct arpt_entry *e
-			= (struct arpt_entry *)(newinfo->entries + pos);
+			= (struct arpt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -514,13 +392,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 						goto next;
 
 					e = (struct arpt_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct arpt_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -537,7 +415,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
 					newpos = pos + e->next_offset;
 				}
 				e = (struct arpt_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -592,8 +470,8 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
 	}
 
 	t = arpt_get_target(e);
-	target = try_then_request_module(find_target(t->u.user.name,
-						     t->u.user.revision),
+	target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
+							t->u.user.revision),
 					 "arpt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
 		duprintf("check_entry: `%s' not found\n", t->u.user.name);
@@ -627,7 +505,7 @@ out:
 }
 
 static inline int check_entry_size_and_hooks(struct arpt_entry *e,
-					     struct arpt_table_info *newinfo,
+					     struct xt_table_info *newinfo,
 					     unsigned char *base,
 					     unsigned char *limit,
 					     const unsigned int *hook_entries,
@@ -661,7 +539,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
            < 0 (not ARPT_RETURN). --RR */
 
 	/* Clear counters and comefrom */
-	e->counters = ((struct arpt_counters) { 0, 0 });
+	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
 
 	(*i)++;
@@ -688,7 +566,8 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
  */
 static int translate_table(const char *name,
 			   unsigned int valid_hooks,
-			   struct arpt_table_info *newinfo,
+			   struct xt_table_info *newinfo,
+			   void *entry0,
 			   unsigned int size,
 			   unsigned int number,
 			   const unsigned int *hook_entries,
@@ -710,11 +589,11 @@ static int translate_table(const char *name,
 	i = 0;
 
 	/* Walk through entries, checking offsets. */
-	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				 check_entry_size_and_hooks,
 				 newinfo,
-				 newinfo->entries,
-				 newinfo->entries + size,
+				 entry0,
+				 entry0 + size,
 				 hook_entries, underflows, &i);
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -743,79 +622,78 @@ static int translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks)) {
+	if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
 		duprintf("Looping hook\n");
 		return -ELOOP;
 	}
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				 check_entry, name, size, &i);
 
 	if (ret != 0) {
-		ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		ARPT_ENTRY_ITERATE(entry0, newinfo->size,
 				   cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
 }
 
-static struct arpt_table_info *replace_table(struct arpt_table *table,
-					     unsigned int num_counters,
-					     struct arpt_table_info *newinfo,
-					     int *error)
+/* Gets counters. */
+static inline int add_entry_to_counter(const struct arpt_entry *e,
+				       struct xt_counters total[],
+				       unsigned int *i)
 {
-	struct arpt_table_info *oldinfo;
-
-	/* Do the substitution. */
-	write_lock_bh(&table->lock);
-	/* Check inside lock: is the old number correct? */
-	if (num_counters != table->private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
-			 num_counters, table->private->number);
-		write_unlock_bh(&table->lock);
-		*error = -EAGAIN;
-		return NULL;
-	}
-	oldinfo = table->private;
-	table->private = newinfo;
-	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
-	return oldinfo;
+	(*i)++;
+	return 0;
 }
 
-/* Gets counters. */
-static inline int add_entry_to_counter(const struct arpt_entry *e,
-				       struct arpt_counters total[],
+static inline int set_entry_to_counter(const struct arpt_entry *e,
+				       struct xt_counters total[],
 				       unsigned int *i)
 {
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
 	(*i)++;
 	return 0;
 }
 
-static void get_counters(const struct arpt_table_info *t,
-			 struct arpt_counters counters[])
+static void get_counters(const struct xt_table_info *t,
+			 struct xt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	ARPT_ENTRY_ITERATE(t->entries[curcpu],
+			   t->size,
+			   set_entry_to_counter,
+			   counters,
+			   &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		ARPT_ENTRY_ITERATE(t->entries[cpu],
 				   t->size,
 				   add_entry_to_counter,
 				   counters,
@@ -829,27 +707,29 @@ static int copy_entries_to_user(unsigned int total_size,
 {
 	unsigned int off, num, countersize;
 	struct arpt_entry *e;
-	struct arpt_counters *counters;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
 	 * about).
 	 */
-	countersize = sizeof(struct arpt_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	countersize = sizeof(struct xt_counters) * private->number;
+	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
-	get_counters(table->private, counters);
+	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	/* ... then copy entire thing ... */
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -859,7 +739,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 		struct arpt_entry_target *t;
 
-		e = (struct arpt_entry *)(table->private->entries + off);
+		e = (struct arpt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct arpt_entry, counters),
 				 &counters[num],
@@ -890,21 +770,21 @@ static int get_entries(const struct arpt_get_entries *entries,
 	int ret;
 	struct arpt_table *t;
 
-	t = find_table_lock(entries->name);
+	t = xt_find_table_lock(NF_ARP, entries->name);
 	if (t || !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
+			 private->number);
+		if (entries->size == private->size)
+			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
-				 entries->size);
+				 private->size, entries->size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
-		up(&arpt_mutex);
+		xt_table_unlock(t);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
 
@@ -916,8 +796,9 @@ static int do_replace(void __user *user, unsigned int len)
 	int ret;
 	struct arpt_replace tmp;
 	struct arpt_table *t;
-	struct arpt_table_info *newinfo, *oldinfo;
-	struct arpt_counters *counters;
+	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -926,38 +807,33 @@ static int do_replace(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.size)
 		return -ENOPROTOOPT;
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return -ENOMEM;
-
-	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(tmp.size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
+	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
 
 	duprintf("arp_tables: Translated table\n");
 
-	t = try_then_request_module(find_table_lock(tmp.name),
+	t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
 				    "arptable_%s", tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
@@ -972,7 +848,7 @@ static int do_replace(void __user *user, unsigned int len)
 		goto put_module;
 	}
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -989,24 +865,26 @@ static int do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+
+	xt_free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct arpt_counters) * tmp.num_counters) != 0)
+			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
-	up(&arpt_mutex);
+	xt_table_unlock(t);
 	return ret;
 
  put_module:
 	module_put(t->me);
-	up(&arpt_mutex);
+	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	xt_free_table_info(newinfo);
 	return ret;
 }
 
@@ -1014,7 +892,7 @@ static int do_replace(void __user *user, unsigned int len)
  * and everything is OK.
  */
 static inline int add_counter_to_entry(struct arpt_entry *e,
-				       const struct arpt_counters addme[],
+				       const struct xt_counters addme[],
 				       unsigned int *i)
 {
 
@@ -1027,14 +905,16 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
 static int do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct arpt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp, *paddc;
 	struct arpt_table *t;
+	struct xt_table_info *private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
 	paddc = vmalloc(len);
@@ -1046,27 +926,30 @@ static int do_add_counters(void __user *user, unsigned int len)
 		goto free;
 	}
 
-	t = find_table_lock(tmp.name);
+	t = xt_find_table_lock(NF_ARP, tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
 
 	write_lock_bh(&t->lock);
-	if (t->private->number != paddc->num_counters) {
+	private = t->private;
+	if (private->number != paddc->num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
-	ARPT_ENTRY_ITERATE(t->private->entries,
-			   t->private->size,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = private->entries[smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_entry,
+			   private->size,
 			   add_counter_to_entry,
 			   paddc->counters,
 			   &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
-	up(&arpt_mutex);
+	xt_table_unlock(t);
 	module_put(t->me);
  free:
 	vfree(paddc);
@@ -1123,25 +1006,26 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 		}
 		name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
 
-		t = try_then_request_module(find_table_lock(name),
+		t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
 					    "arptable_%s", name);
 		if (t && !IS_ERR(t)) {
 			struct arpt_getinfo info;
+			struct xt_table_info *private = t->private;
 
 			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
+			memcpy(info.hook_entry, private->hook_entry,
 			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
+			memcpy(info.underflow, private->underflow,
 			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
+			info.num_entries = private->number;
+			info.size = private->size;
 			strcpy(info.name, name);
 
 			if (copy_to_user(user, &info, *len) != 0)
 				ret = -EFAULT;
 			else
 				ret = 0;
-			up(&arpt_mutex);
+			xt_table_unlock(t);
 			module_put(t->me);
 		} else
 			ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1166,7 +1050,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	}
 
 	case ARPT_SO_GET_REVISION_TARGET: {
-		struct arpt_get_revision rev;
+		struct xt_get_revision rev;
 
 		if (*len != sizeof(rev)) {
 			ret = -EINVAL;
@@ -1177,8 +1061,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 			break;
 		}
 
-		try_then_request_module(find_revision(rev.name, rev.revision,
-						      target_revfn, &ret),
+		try_then_request_module(xt_find_revision(NF_ARP, rev.name,
+							 rev.revision, 1, &ret),
 					"arpt_%s", rev.name);
 		break;
 	}
@@ -1191,101 +1075,57 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	return ret;
 }
 
-/* Registration hooks for targets. */
-int arpt_register_target(struct arpt_target *target)
-{
-	int ret;
-
-	ret = down_interruptible(&arpt_mutex);
-	if (ret != 0)
-		return ret;
-
-	list_add(&target->list, &arpt_target);
-	up(&arpt_mutex);
-
-	return ret;
-}
-
-void arpt_unregister_target(struct arpt_target *target)
-{
-	down(&arpt_mutex);
-	LIST_DELETE(&arpt_target, target);
-	up(&arpt_mutex);
-}
-
 int arpt_register_table(struct arpt_table *table,
 			const struct arpt_replace *repl)
 {
 	int ret;
-	struct arpt_table_info *newinfo;
-	static struct arpt_table_info bootstrap
+	struct xt_table_info *newinfo;
+	static struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(repl->size) *
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo) {
 		ret = -ENOMEM;
 		return ret;
 	}
-	memcpy(newinfo->entries, repl->entries, repl->size);
+
+	/* choose the copy on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
+
 	duprintf("arpt_register_table: translate table gives %d\n", ret);
 	if (ret != 0) {
-		vfree(newinfo);
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	ret = down_interruptible(&arpt_mutex);
-	if (ret != 0) {
-		vfree(newinfo);
+	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&arpt_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
-
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
-
-	duprintf("table->private->number = %u\n",
-		 table->private->number);
-	
-	/* save number of initial entries */
-	table->private->initial_entries = table->private->number;
-
-	rwlock_init(&table->lock);
-	list_prepend(&arpt_tables, table);
-
- unlock:
-	up(&arpt_mutex);
-	return ret;
-
- free_unlock:
-	vfree(newinfo);
-	goto unlock;
+	return 0;
 }
 
 void arpt_unregister_table(struct arpt_table *table)
 {
-	down(&arpt_mutex);
-	LIST_DELETE(&arpt_tables, table);
-	up(&arpt_mutex);
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
+
+	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
-	ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
 			   cleanup_entry, NULL);
-	vfree(table->private);
+	xt_free_table_info(private);
 }
 
 /* The built-in targets: standard (NULL) and error. */
@@ -1308,52 +1148,15 @@ static struct nf_sockopt_ops arpt_sockopts = {
 	.get		= do_arpt_get_ctl,
 };
 
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const struct arpt_table *t,
-			     off_t start_offset, char *buffer, int length,
-			     off_t *pos, unsigned int *count)
-{
-	if ((*count)++ >= start_offset) {
-		unsigned int namelen;
-
-		namelen = sprintf(buffer + *pos, "%s\n", t->name);
-		if (*pos + namelen > length) {
-			/* Stop iterating */
-			return 1;
-		}
-		*pos += namelen;
-	}
-	return 0;
-}
-
-static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&arpt_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&arpt_mutex);
-
-	/* `start' hack - see fs/proc/generic.c line ~105 */
-	*start=(char *)((unsigned long)count-offset);
-	return pos;
-}
-#endif /*CONFIG_PROC_FS*/
-
 static int __init init(void)
 {
 	int ret;
 
+	xt_proto_init(NF_ARP);
+
 	/* Noone else will be downing sem now, so we won't sleep */
-	down(&arpt_mutex);
-	list_append(&arpt_target, &arpt_standard_target);
-	list_append(&arpt_target, &arpt_error_target);
-	up(&arpt_mutex);
+	xt_register_target(NF_ARP, &arpt_standard_target);
+	xt_register_target(NF_ARP, &arpt_error_target);
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&arpt_sockopts);
@@ -1362,19 +1165,6 @@ static int __init init(void)
 		return ret;
 	}
 
-#ifdef CONFIG_PROC_FS
-	{
-		struct proc_dir_entry *proc;
-
-		proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
-		if (!proc) {
-			nf_unregister_sockopt(&arpt_sockopts);
-			return -ENOMEM;
-		}
-		proc->owner = THIS_MODULE;
-	}
-#endif
-
 	printk("arp_tables: (C) 2002 David S. Miller\n");
 	return 0;
 }
@@ -1382,16 +1172,12 @@ static int __init init(void)
 static void __exit fini(void)
 {
 	nf_unregister_sockopt(&arpt_sockopts);
-#ifdef CONFIG_PROC_FS
-	proc_net_remove("arp_tables_names");
-#endif
+	xt_proto_fini(NF_ARP);
 }
 
 EXPORT_SYMBOL(arpt_register_table);
 EXPORT_SYMBOL(arpt_unregister_table);
 EXPORT_SYMBOL(arpt_do_table);
-EXPORT_SYMBOL(arpt_register_target);
-EXPORT_SYMBOL(arpt_unregister_target);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 3e592ec8648..c97650a16a5 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -8,8 +8,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
-   const struct net_device *out, const void *targinfo, void *userinfo)
+target(struct sk_buff **pskb, const struct net_device *in,
+   const struct net_device *out, unsigned int hooknum, const void *targinfo,
+   void *userinfo)
 {
 	const struct arpt_mangle *mangle = targinfo;
 	struct arphdr *arp;
@@ -64,7 +65,7 @@ target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
 }
 
 static int
-checkentry(const char *tablename, const struct arpt_entry *e, void *targinfo,
+checkentry(const char *tablename, const void *e, void *targinfo,
    unsigned int targinfosize, unsigned int hook_mask)
 {
 	const struct arpt_mangle *mangle = targinfo;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 0d759f5a4ef..f6ab45f4868 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -145,6 +145,7 @@ static struct arpt_table packet_filter = {
 	.lock		= RW_LOCK_UNLOCKED,
 	.private	= NULL,
 	.me		= THIS_MODULE,
+	.af		= NF_ARP,
 };
 
 /* The work comes in here from netfilter.c */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f..84e4f79b7ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
  *
  */
 
+#include <linux/in.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
 #include <linux/moduleparam.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 
@@ -34,7 +36,7 @@ static unsigned int master_timeout = 300;
 MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
 MODULE_DESCRIPTION("Amanda connection tracking module");
 MODULE_LICENSE("GPL");
-module_param(master_timeout, int, 0600);
+module_param(master_timeout, uint, 0600);
 MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 
 static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 68b173bcda6..e627e585617 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -34,7 +34,7 @@ static int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 
 static int loose;
-module_param(loose, int, 0600);
+module_param(loose, bool, 0600);
 
 unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
 				enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 4108a5e12b3..d716bba798f 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -762,7 +762,7 @@ static struct ip_conntrack_helper pptp = {
 	.help = conntrack_pptp_help
 };
 
-extern void __exit ip_ct_proto_gre_fini(void);
+extern void ip_ct_proto_gre_fini(void);
 extern int __init ip_ct_proto_gre_init(void);
 
 /* ip_conntrack_pptp initialization */
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index d7c40421d0d..c51a2cf71b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -36,7 +36,7 @@
 #define MAX_PORTS 8
 static unsigned short ports[MAX_PORTS];
 static int ports_c;
-static int max_dcc_channels = 8;
+static unsigned int max_dcc_channels = 8;
 static unsigned int dcc_timeout = 300;
 /* This is slow, but it's simple. --RR */
 static char *irc_buffer;
@@ -54,9 +54,9 @@ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
 MODULE_LICENSE("GPL");
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, int, 0400);
+module_param(max_dcc_channels, uint, 0400);
 MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, int, 0400);
+module_param(dcc_timeout, uint, 0400);
 MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
 
 static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -254,10 +254,6 @@ static int __init init(void)
 		printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
 		return -EBUSY;
 	}
-	if (dcc_timeout < 0) {
-		printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
-		return -EBUSY;
-	}
 
 	irc_buffer = kmalloc(65536, GFP_KERNEL);
 	if (!irc_buffer)
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 186646eb249..4e68e16a261 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
 MODULE_LICENSE("GPL");
 
 static unsigned int timeout = 3;
-module_param(timeout, int, 0600);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 static int help(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 91fe8f2e38f..c9ebbe0d2d9 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -79,6 +79,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 		      const struct ip_conntrack_tuple *tuple)
 {
 	struct nfattr *nest_parms;
+	int ret;
 	
 	nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
 	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
@@ -86,10 +87,10 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 	NFA_NEST_END(skb, nest_parms);
 
 	nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-	ctnetlink_dump_tuples_proto(skb, tuple);
+	ret = ctnetlink_dump_tuples_proto(skb, tuple);
 	NFA_NEST_END(skb, nest_parms);
 
-	return 0;
+	return ret;
 
 nfattr_failure:
 	return -1;
@@ -160,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
 		return 0;
 		
 	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
 
 	if (ct->helper->to_nfattr)
 		ct->helper->to_nfattr(skb, ct);
@@ -229,7 +230,7 @@ nfattr_failure:
 static inline int
 ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	unsigned int use = htonl(atomic_read(&ct->ct_general.use));
+	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
 	
 	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
 	return 0;
@@ -311,29 +312,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	if (events & IPCT_DESTROY) {
 		type = IPCTNL_MSG_CT_DELETE;
 		group = NFNLGRP_CONNTRACK_DESTROY;
-		goto alloc_skb;
-	}
-	if (events & (IPCT_NEW | IPCT_RELATED)) {
+	} else if (events & (IPCT_NEW | IPCT_RELATED)) {
 		type = IPCTNL_MSG_CT_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-		goto alloc_skb;
-	}
-	if (events & (IPCT_STATUS |
+	} else if (events & (IPCT_STATUS |
 		      IPCT_PROTOINFO |
 		      IPCT_HELPER |
 		      IPCT_HELPINFO |
 		      IPCT_NATINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
-		goto alloc_skb;
-	} 
+	} else 
+		return NOTIFY_DONE;
 	
-	return NOTIFY_DONE;
-
-alloc_skb:
   /* FIXME: Check if there are any listeners before, don't hurt performance */
 	
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -1037,6 +1031,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 			return err;
 	}
 
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
 	ct->helper = ip_conntrack_helper_find_get(rtuple);
 
 	add_timer(&ct->timeout);
@@ -1045,11 +1044,6 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 	if (ct->helper)
 		ip_conntrack_helper_put(ct->helper);
 
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
 	DEBUGP("conntrack with id %u inserted\n", ct->id);
 	return 0;
 
@@ -1209,7 +1203,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	unsigned int type;
 	unsigned char *b;
 	int flags = 0;
-	u16 proto;
 
 	if (events & IPEXP_NEW) {
 		type = IPCTNL_MSG_EXP_NEW;
@@ -1236,7 +1229,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 		goto nfattr_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
-	proto = exp->tuple.dst.protonum;
 	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
 	return NOTIFY_DONE;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 88c3712bd25..f891308b5e4 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned long ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377..56794797d55 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,8 @@
 #include <linux/ip.h>
 #include <linux/in.h>
 #include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
 
 static DEFINE_RWLOCK(ip_ct_gre_lock);
 #define ASSERT_READ_LOCK(x)
@@ -308,7 +310,10 @@ int __init ip_ct_proto_gre_init(void)
 	return ip_conntrack_protocol_register(&gre);
 }
 
-void __exit ip_ct_proto_gre_fini(void)
+/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
+ * init() code on errors.
+ */
+void ip_ct_proto_gre_fini(void)
 {
 	struct list_head *pos, *n;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5f9925db608..3021af0910f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -16,13 +16,12 @@
 #include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/checksum.h>
-#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned long ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout = 30*HZ;
 
 #if 0
 #define DEBUGP printk
@@ -47,20 +46,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static const u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -110,17 +110,17 @@ static int icmp_packet(struct ip_conntrack *ct,
 	return NF_ACCEPT;
 }
 
-static const u_int8_t valid_new[] = { 
-	[ICMP_ECHO] = 1,
-	[ICMP_TIMESTAMP] = 1,
-	[ICMP_INFO_REQUEST] = 1,
-	[ICMP_ADDRESS] = 1 
-};
-
 /* Called when a new connection for this protocol found. */
 static int icmp_new(struct ip_conntrack *conntrack,
 		    const struct sk_buff *skb)
 {
+	static const u_int8_t valid_new[] = { 
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1 
+	};
+
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
 		/* Can't create a new ICMP `conn' with this. */
@@ -279,10 +279,6 @@ static int icmp_tuple_to_nfattr(struct sk_buff *skb,
 	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
 		&t->dst.u.icmp.code);
 
-	if (t->dst.u.icmp.type >= sizeof(valid_new) 
-	    || !valid_new[t->dst.u.icmp.type])
-		return -EINVAL;
-
 	return 0;
 
 nfattr_failure:
@@ -295,7 +291,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	if (!tb[CTA_PROTO_ICMP_TYPE-1]
 	    || !tb[CTA_PROTO_ICMP_CODE-1]
 	    || !tb[CTA_PROTO_ICMP_ID-1])
-		return -1;
+		return -EINVAL;
 
 	tuple->dst.u.icmp.type = 
 			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
@@ -304,6 +300,10 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	tuple->src.u.icmp.id =
 			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
 
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
 	return 0;
 }
 #endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d456..be602e8aeab 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
+#include <linux/interrupt.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/in.h>
@@ -57,15 +58,15 @@ static const char *sctp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned long ip_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned long ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned long ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned long ip_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned long ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned long ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed            =  10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_established       =   5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
 
-static const unsigned long * sctp_timeouts[]
+static const unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
     &ip_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
     &ip_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index e7fa29e576d..e0dc3706354 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -32,7 +32,6 @@
 
 #include <net/tcp.h>
 
-#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -85,21 +84,21 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned long ip_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned long ip_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned long ip_ct_tcp_timeout_established =   5 DAYS;
-unsigned long ip_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned long ip_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned long ip_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned long ip_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned long ip_ct_tcp_timeout_close =        10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent =      2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv =     60 SECS;
+unsigned int ip_ct_tcp_timeout_established =   5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait =      2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait =   60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack =     30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait =     2 MINS;
+unsigned int ip_ct_tcp_timeout_close =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static const unsigned long * tcp_timeouts[]
+static const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -995,7 +994,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		        || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
 		            && conntrack->proto.tcp.last_index == TCP_ACK_SET))
 		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
-			/* RST sent to invalid SYN or ACK we had let trough
+			/* RST sent to invalid SYN or ACK we had let through
 			 * at a) and c) above:
 			 *
 			 * a) SYN was in window then
@@ -1006,7 +1005,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 			 * segments we ignored. */
 			goto in_window;
 		}
-		/* Just fall trough */
+		/* Just fall through */
 	default:
 		/* Keep compilers happy. */
 		break;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c766..55b7d3210ad 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,15 +11,15 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/seq_file.h>
 #include <net/checksum.h>
-#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned long ip_ct_udp_timeout = 30*HZ;
-unsigned long ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4..833fcb4be5e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
 #endif
 #include <net/checksum.h>
 #include <net/ip.h>
+#include <net/route.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -450,30 +451,6 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
 	return NF_ACCEPT;
 }
 
-static unsigned int ip_refrag(unsigned int hooknum,
-			      struct sk_buff **pskb,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      int (*okfn)(struct sk_buff *))
-{
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
-	/* We've seen it coming out the other side: confirm */
-	if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
-		return NF_DROP;
-
-	/* Local packets are never produced too large for their
-	   interface.  We degfragment them at LOCAL_OUT, however,
-	   so we have to refragment them here. */
-	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
-	    !skb_shinfo(*pskb)->tso_size) {
-		/* No hook can be after us, so this should be OK. */
-		ip_fragment(*pskb, okfn);
-		return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ip_conntrack_local(unsigned int hooknum,
 				       struct sk_buff **pskb,
 				       const struct net_device *in,
@@ -543,7 +520,7 @@ static struct nf_hook_ops ip_conntrack_helper_in_ops = {
 
 /* Refragmenter; last chance. */
 static struct nf_hook_ops ip_conntrack_out_ops = {
-	.hook		= ip_refrag,
+	.hook		= ip_confirm,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
 	.hooknum	= NF_IP_POST_ROUTING,
@@ -567,28 +544,28 @@ extern int ip_conntrack_max;
 extern unsigned int ip_conntrack_htable_size;
 
 /* From ip_conntrack_proto_tcp.c */
-extern unsigned long ip_ct_tcp_timeout_syn_sent;
-extern unsigned long ip_ct_tcp_timeout_syn_recv;
-extern unsigned long ip_ct_tcp_timeout_established;
-extern unsigned long ip_ct_tcp_timeout_fin_wait;
-extern unsigned long ip_ct_tcp_timeout_close_wait;
-extern unsigned long ip_ct_tcp_timeout_last_ack;
-extern unsigned long ip_ct_tcp_timeout_time_wait;
-extern unsigned long ip_ct_tcp_timeout_close;
-extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern unsigned int ip_ct_tcp_timeout_syn_sent;
+extern unsigned int ip_ct_tcp_timeout_syn_recv;
+extern unsigned int ip_ct_tcp_timeout_established;
+extern unsigned int ip_ct_tcp_timeout_fin_wait;
+extern unsigned int ip_ct_tcp_timeout_close_wait;
+extern unsigned int ip_ct_tcp_timeout_last_ack;
+extern unsigned int ip_ct_tcp_timeout_time_wait;
+extern unsigned int ip_ct_tcp_timeout_close;
+extern unsigned int ip_ct_tcp_timeout_max_retrans;
 extern int ip_ct_tcp_loose;
 extern int ip_ct_tcp_be_liberal;
 extern int ip_ct_tcp_max_retrans;
 
 /* From ip_conntrack_proto_udp.c */
-extern unsigned long ip_ct_udp_timeout;
-extern unsigned long ip_ct_udp_timeout_stream;
+extern unsigned int ip_ct_udp_timeout;
+extern unsigned int ip_ct_udp_timeout_stream;
 
 /* From ip_conntrack_proto_icmp.c */
-extern unsigned long ip_ct_icmp_timeout;
+extern unsigned int ip_ct_icmp_timeout;
 
 /* From ip_conntrack_proto_icmp.c */
-extern unsigned long ip_ct_generic_timeout;
+extern unsigned int ip_ct_generic_timeout;
 
 /* Log invalid packets of a given protocol */
 static int log_invalid_proto_min = 0;
@@ -967,7 +944,7 @@ module_exit(fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
    They should call this. */
-void need_ip_conntrack(void)
+void need_conntrack(void)
 {
 }
 
@@ -985,7 +962,7 @@ EXPORT_SYMBOL(ip_ct_get_tuple);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
 EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(need_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9..b8daab3c64a 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index e546203f566..ac004895781 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -148,14 +148,14 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 {
 	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
-	u_int16_t msg, *cid = NULL, new_callid;
+	u_int16_t msg, new_callid;
+	unsigned int cid_off;
 
 	new_callid = htons(ct_pptp_info->pns_call_id);
 	
 	switch (msg = ntohs(ctlh->messageType)) {
 		case PPTP_OUT_CALL_REQUEST:
-			cid = &pptpReq->ocreq.callID;
+			cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
 			/* FIXME: ideally we would want to reserve a call ID
 			 * here.  current netfilter NAT core is not able to do
 			 * this :( For now we use TCP source port. This breaks
@@ -172,10 +172,10 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 			ct_pptp_info->pns_call_id = ntohs(new_callid);
 			break;
 		case PPTP_IN_CALL_REPLY:
-			cid = &pptpReq->icreq.callID;
+			cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
 			break;
 		case PPTP_CALL_CLEAR_REQUEST:
-			cid = &pptpReq->clrreq.callID;
+			cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
 			break;
 		default:
 			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
@@ -197,18 +197,15 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
 	 * down to here */
-
-	IP_NF_ASSERT(cid);
-
 	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-		ntohs(*cid), ntohs(new_callid));
+		ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid));
 
 	/* mangle packet */
 	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-		(void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
-				 	sizeof(new_callid), 
-					(char *)&new_callid,
-				 	sizeof(new_callid)) == 0)
+	                             cid_off + sizeof(struct pptp_pkt_hdr) +
+	                             sizeof(struct PptpControlHeader),
+	                             sizeof(new_callid), (char *)&new_callid,
+	                             sizeof(new_callid)) == 0)
 		return NF_DROP;
 
 	return NF_ACCEPT;
@@ -299,31 +296,30 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		 union pptp_ctrl_union *pptpReq)
 {
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
-
-	int ret = NF_ACCEPT, rv;
+	u_int16_t msg, new_cid = 0, new_pcid;
+	unsigned int pcid_off, cid_off = 0;
 
 	new_pcid = htons(nat_pptp_info->pns_call_id);
 
 	switch (msg = ntohs(ctlh->messageType)) {
 	case PPTP_OUT_CALL_REPLY:
-		pcid = &pptpReq->ocack.peersCallID;	
-		cid = &pptpReq->ocack.callID;
+		pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+		cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
 		break;
 	case PPTP_IN_CALL_CONNECT:
-		pcid = &pptpReq->iccon.peersCallID;
+		pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
 		break;
 	case PPTP_IN_CALL_REQUEST:
 		/* only need to nat in case PAC is behind NAT box */
-		break;
+		return NF_ACCEPT;
 	case PPTP_WAN_ERROR_NOTIFY:
-		pcid = &pptpReq->wanerr.peersCallID;
+		pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
 		break;
 	case PPTP_CALL_DISCONNECT_NOTIFY:
-		pcid = &pptpReq->disc.callID;
+		pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
 		break;
 	case PPTP_SET_LINK_INFO:
-		pcid = &pptpReq->setlink.peersCallID;
+		pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
 		break;
 
 	default:
@@ -345,35 +341,26 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
 
 	/* mangle packet */
-	IP_NF_ASSERT(pcid);
 	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
-		ntohs(*pcid), ntohs(new_pcid));
-	
-	rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
-				      (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
-				      sizeof(new_pcid), (char *)&new_pcid, 
-				      sizeof(new_pcid));
-	if (rv != NF_ACCEPT) 
-		return rv;
+		ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid));
+
+	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	                             pcid_off + sizeof(struct pptp_pkt_hdr) +
+				     sizeof(struct PptpControlHeader),
+				     sizeof(new_pcid), (char *)&new_pcid,
+				     sizeof(new_pcid)) == 0)
+		return NF_DROP;
 
 	if (new_cid) {
-		IP_NF_ASSERT(cid);
 		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-			ntohs(*cid), ntohs(new_cid));
-		rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
-					      (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 
-					      sizeof(new_cid),
-					      (char *)&new_cid, 
-					      sizeof(new_cid));
-		if (rv != NF_ACCEPT)
-			return rv;
+			ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid));
+		if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+		                             cid_off + sizeof(struct pptp_pkt_hdr) +
+					     sizeof(struct PptpControlHeader),
+					     sizeof(new_cid), (char *)&new_cid,
+					     sizeof(new_cid)) == 0)
+			return NF_DROP;
 	}
-
-	/* check for earlier return value of 'switch' above */
-	if (ret != NF_ACCEPT)
-		return ret;
-
-	/* great, at least we don't need to resize packets */
 	return NF_ACCEPT;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe..461c833eaca 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index f7cad7cf1ae..6c4899d8046 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -151,42 +151,6 @@ gre_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-/* print out a nat tuple */
-static unsigned int 
-gre_print(char *buffer, 
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.gre.key)
-		len += sprintf(buffer + len, "srckey=0x%x ", 
-				ntohl(match->src.u.gre.key));
-
-	if (mask->dst.u.gre.key)
-		len += sprintf(buffer + len, "dstkey=0x%x ",
-				ntohl(match->src.u.gre.key));
-
-	return len;
-}
-
-/* print a range of keys */
-static unsigned int 
-gre_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.gre.key != 0 
-	    || range->max.gre.key != 0xFFFF) {
-		if (range->min.gre.key == range->max.gre.key)
-			return sprintf(buffer, "key 0x%x ",
-					ntohl(range->min.gre.key));
-		else
-			return sprintf(buffer, "keys 0x%u-0x%u ",
-					ntohl(range->min.gre.key),
-					ntohl(range->max.gre.key));
-	} else
-		return 0;
-}
-
 /* nat helper struct */
 static struct ip_nat_protocol gre = { 
 	.name		= "GRE", 
@@ -194,8 +158,6 @@ static struct ip_nat_protocol gre = {
 	.manip_pkt	= gre_manip_pkt,
 	.in_range	= gre_in_range,
 	.unique_tuple	= gre_unique_tuple,
-	.print		= gre_print,
-	.print_range	= gre_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 93871904399..31a3f4ccb99 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -74,38 +74,6 @@ icmp_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-icmp_print(char *buffer,
-	   const struct ip_conntrack_tuple *match,
-	   const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.icmp.id)
-		len += sprintf(buffer + len, "id=%u ",
-			       ntohs(match->src.u.icmp.id));
-
-	if (mask->dst.u.icmp.type)
-		len += sprintf(buffer + len, "type=%u ",
-			       ntohs(match->dst.u.icmp.type));
-
-	if (mask->dst.u.icmp.code)
-		len += sprintf(buffer + len, "code=%u ",
-			       ntohs(match->dst.u.icmp.code));
-
-	return len;
-}
-
-static unsigned int
-icmp_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
-		return sprintf(buffer, "id %u-%u ",
-			       ntohs(range->min.icmp.id),
-			       ntohs(range->max.icmp.id));
-	else return 0;
-}
-
 struct ip_nat_protocol ip_nat_protocol_icmp = {
 	.name			= "ICMP",
 	.protonum		= IPPROTO_ICMP,
@@ -113,8 +81,6 @@ struct ip_nat_protocol ip_nat_protocol_icmp = {
 	.manip_pkt		= icmp_manip_pkt,
 	.in_range		= icmp_in_range,
 	.unique_tuple		= icmp_unique_tuple,
-	.print			= icmp_print,
-	.print_range		= icmp_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 1d381bf6857..a3d14079eba 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -136,40 +136,6 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-tcp_print(char *buffer,
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.tcp.port)
-		len += sprintf(buffer + len, "srcpt=%u ",
-			       ntohs(match->src.u.tcp.port));
-
-
-	if (mask->dst.u.tcp.port)
-		len += sprintf(buffer + len, "dstpt=%u ",
-			       ntohs(match->dst.u.tcp.port));
-
-	return len;
-}
-
-static unsigned int
-tcp_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
-		if (range->min.tcp.port == range->max.tcp.port)
-			return sprintf(buffer, "port %u ",
-				       ntohs(range->min.tcp.port));
-		else
-			return sprintf(buffer, "ports %u-%u ",
-				       ntohs(range->min.tcp.port),
-				       ntohs(range->max.tcp.port));
-	}
-	else return 0;
-}
-
 struct ip_nat_protocol ip_nat_protocol_tcp = {
 	.name			= "TCP",
 	.protonum		= IPPROTO_TCP,
@@ -177,8 +143,6 @@ struct ip_nat_protocol ip_nat_protocol_tcp = {
 	.manip_pkt		= tcp_manip_pkt,
 	.in_range		= tcp_in_range,
 	.unique_tuple		= tcp_unique_tuple,
-	.print			= tcp_print,
-	.print_range		= tcp_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index c4906e1aa24..ec6053fdc86 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -122,40 +122,6 @@ udp_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-udp_print(char *buffer,
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.udp.port)
-		len += sprintf(buffer + len, "srcpt=%u ",
-			       ntohs(match->src.u.udp.port));
-
-
-	if (mask->dst.u.udp.port)
-		len += sprintf(buffer + len, "dstpt=%u ",
-			       ntohs(match->dst.u.udp.port));
-
-	return len;
-}
-
-static unsigned int
-udp_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
-		if (range->min.udp.port == range->max.udp.port)
-			return sprintf(buffer, "port %u ",
-				       ntohs(range->min.udp.port));
-		else
-			return sprintf(buffer, "ports %u-%u ",
-				       ntohs(range->min.udp.port),
-				       ntohs(range->max.udp.port));
-	}
-	else return 0;
-}
-
 struct ip_nat_protocol ip_nat_protocol_udp = {
 	.name			= "UDP",
 	.protonum		= IPPROTO_UDP,
@@ -163,8 +129,6 @@ struct ip_nat_protocol ip_nat_protocol_udp = {
 	.manip_pkt		= udp_manip_pkt,
 	.in_range		= udp_in_range,
 	.unique_tuple		= udp_unique_tuple,
-	.print			= udp_print,
-	.print_range		= udp_print_range,
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index f0099a646a0..3bf04951724 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -46,26 +46,10 @@ unknown_manip_pkt(struct sk_buff **pskb,
 	return 1;
 }
 
-static unsigned int
-unknown_print(char *buffer,
-	      const struct ip_conntrack_tuple *match,
-	      const struct ip_conntrack_tuple *mask)
-{
-	return 0;
-}
-
-static unsigned int
-unknown_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	return 0;
-}
-
 struct ip_nat_protocol ip_nat_unknown_protocol = {
 	.name			= "unknown",
 	/* .me isn't set: getting a ref to this cannot fail. */
 	.manip_pkt		= unknown_manip_pkt,
 	.in_range		= unknown_in_range,
 	.unique_tuple		= unknown_unique_tuple,
-	.print			= unknown_print,
-	.print_range		= unknown_print_range
 };
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index cb66b8bddeb..1de86282d23 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -95,6 +95,7 @@ static struct ipt_table nat_table = {
 	.valid_hooks	= NAT_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= AF_INET,
 };
 
 /* Source NAT */
@@ -168,7 +169,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 }
 
 static int ipt_snat_checkentry(const char *tablename,
-			       const struct ipt_entry *e,
+			       const void *entry,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hook_mask)
@@ -201,7 +202,7 @@ static int ipt_snat_checkentry(const char *tablename,
 }
 
 static int ipt_dnat_checkentry(const char *tablename,
-			       const struct ipt_entry *e,
+			       const void *entry,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b4..4f95d477805 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
  *
  */
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -53,6 +54,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/ip.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 30cd4e18c12..ad438fb185b 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
 			         : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN"  \
 				    : "*ERROR*")))
 
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	struct ip_conntrack *ct;
+	struct ip_conntrack_tuple *t;
+	enum ip_conntrack_info ctinfo;
+	enum ip_conntrack_dir dir;
+	unsigned long statusbit;
+
+	ct = ip_conntrack_get(skb, &ctinfo);
+	if (ct == NULL)
+		return;
+	dir = CTINFO2DIR(ctinfo);
+	t = &ct->tuplehash[dir].tuple;
+
+	if (dir == IP_CT_DIR_ORIGINAL)
+		statusbit = IPS_DST_NAT;
+	else
+		statusbit = IPS_SRC_NAT;
+
+	if (ct->status & statusbit) {
+		fl->fl4_dst = t->dst.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP)
+			fl->fl_ip_dport = t->dst.u.tcp.port;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl->fl4_src = t->src.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP)
+			fl->fl_ip_sport = t->src.u.tcp.port;
+	}
+}
+#endif
+		
 static unsigned int
 ip_nat_fn(unsigned int hooknum,
 	  struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
           const struct net_device *out,
           int (*okfn)(struct sk_buff *))
 {
-	u_int32_t saddr, daddr;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
 
-	saddr = (*pskb)->nh.iph->saddr;
-	daddr = (*pskb)->nh.iph->daddr;
-
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-	    && ((*pskb)->nh.iph->saddr != saddr
-	        || (*pskb)->nh.iph->daddr != daddr)) {
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = NULL;
+	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.ip !=
+		    ct->tuplehash[!dir].tuple.dst.ip) {
+			dst_release((*pskb)->dst);
+			(*pskb)->dst = NULL;
+		}
 	}
 	return ret;
 }
@@ -185,29 +225,30 @@ ip_nat_out(unsigned int hooknum,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	/* We can hit fragment here; forwarded packets get
-	   defragmented by connection tracking coming in, then
-	   fragmented (grr) by the forward code.
-
-	   In future: If we have nfct != NULL, AND we have NAT
-	   initialized, AND there is no helper, then we can do full
-	   NAPT on the head, and IP-address-only NAT on the rest.
-
-	   I'm starting to have nightmares about fragments.  */
-
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
-
-		if (!*pskb)
-			return NF_STOLEN;
+	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN
+	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.ip !=
+		    ct->tuplehash[!dir].tuple.dst.ip
+#ifdef CONFIG_XFRM
+		    || ct->tuplehash[dir].tuple.src.u.all !=
+		       ct->tuplehash[!dir].tuple.dst.u.all
+#endif
+		    )
+			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
 	}
-
-	return ip_nat_fn(hooknum, pskb, in, out, okfn);
+	return ret;
 }
 
 static unsigned int
@@ -217,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
 		const struct net_device *out,
 		int (*okfn)(struct sk_buff *))
 {
-	u_int32_t saddr, daddr;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
@@ -225,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	saddr = (*pskb)->nh.iph->saddr;
-	daddr = (*pskb)->nh.iph->daddr;
-
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-	    && ((*pskb)->nh.iph->saddr != saddr
-		|| (*pskb)->nh.iph->daddr != daddr))
-		return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.ip !=
+		    ct->tuplehash[!dir].tuple.src.ip
+#ifdef CONFIG_XFRM
+		    || ct->tuplehash[dir].tuple.dst.u.all !=
+		       ct->tuplehash[dir].tuple.src.u.all
+#endif
+		    )
+			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+	}
 	return ret;
 }
 
@@ -316,14 +364,18 @@ static int init_or_cleanup(int init)
 {
 	int ret = 0;
 
-	need_ip_conntrack();
+	need_conntrack();
 
 	if (!init) goto cleanup;
 
+#ifdef CONFIG_XFRM
+	BUG_ON(ip_nat_decode_session != NULL);
+	ip_nat_decode_session = nat_decode_session;
+#endif
 	ret = ip_nat_rule_init();
 	if (ret < 0) {
 		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_nothing;
+		goto cleanup_decode_session;
 	}
 	ret = nf_register_hook(&ip_nat_in_ops);
 	if (ret < 0) {
@@ -371,7 +423,11 @@ static int init_or_cleanup(int init)
 	nf_unregister_hook(&ip_nat_in_ops);
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
- cleanup_nothing:
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+	ip_nat_decode_session = NULL;
+	synchronize_net();
+#endif
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e..2371b2062c2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2,7 +2,7 @@
  * Packet matching code.
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
- * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,16 +11,17 @@
  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
  * 	- increase module usage count as soon as we have rules inside
  * 	  a table
+ * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
+ * 	- Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
  */
 #include <linux/config.h>
 #include <linux/cache.h>
+#include <linux/capability.h>
 #include <linux/skbuff.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/module.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
 #include <linux/icmp.h>
 #include <net/ip.h>
 #include <asm/uaccess.h>
@@ -29,6 +30,7 @@
 #include <linux/err.h>
 #include <linux/cpumask.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -61,14 +63,6 @@ do {								\
 #else
 #define IP_NF_ASSERT(x)
 #endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
-
-static DECLARE_MUTEX(ipt_mutex);
-
-/* Must have mutex */
-#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 /* All the better to debug you with... */
@@ -83,48 +77,8 @@ static DECLARE_MUTEX(ipt_mutex);
    context stops packets coming through and allows user context to read
    the counters or update the rules.
 
-   To be cache friendly on SMP, we arrange them like so:
-   [ n-entries ]
-   ... cache-align padding ...
-   [ n-entries ]
-
    Hence the start of any table is given by get_table() below.  */
 
-/* The table itself */
-struct ipt_table_info
-{
-	/* Size per table */
-	unsigned int size;
-	/* Number of entries: FIXME. --RR */
-	unsigned int number;
-	/* Initial number of entries. Needed for module usage count */
-	unsigned int initial_entries;
-
-	/* Entry points and underflows */
-	unsigned int hook_entry[NF_IP_NUMHOOKS];
-	unsigned int underflow[NF_IP_NUMHOOKS];
-
-	/* ipt_entry tables: one per CPU */
-	char entries[0] ____cacheline_aligned;
-};
-
-static LIST_HEAD(ipt_target);
-static LIST_HEAD(ipt_match);
-static LIST_HEAD(ipt_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
-#if 0
-#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
-#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
-#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
-#endif
-
 /* Returns whether matches rule or not. */
 static inline int
 ip_packet_match(const struct iphdr *ip,
@@ -243,7 +197,8 @@ int do_match(struct ipt_entry_match *m,
 	     int *hotdrop)
 {
 	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
+	if (!m->u.kernel.match->match(skb, in, out, m->data, offset, 
+	    skb->nh.iph->ihl*4, hotdrop))
 		return 1;
 	else
 		return 0;
@@ -274,6 +229,7 @@ ipt_do_table(struct sk_buff **pskb,
 	const char *indev, *outdev;
 	void *table_base;
 	struct ipt_entry *e, *back;
+	struct xt_table_info *private = table->private;
 
 	/* Initialization */
 	ip = (*pskb)->nh.iph;
@@ -290,25 +246,11 @@ ipt_do_table(struct sk_buff **pskb,
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private, smp_processor_id());
-	e = get_entry(table_base, table->private->hook_entry[hook]);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Check noone else using our table */
-	if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
-	    && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
-		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
-		       smp_processor_id(),
-		       table->name,
-		       &((struct ipt_entry *)table_base)->comefrom,
-		       ((struct ipt_entry *)table_base)->comefrom);
-	}
-	((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
-#endif
+	table_base = (void *)private->entries[smp_processor_id()];
+	e = get_entry(table_base, private->hook_entry[hook]);
 
 	/* For return from builtin chain */
-	back = get_entry(table_base, table->private->underflow[hook]);
+	back = get_entry(table_base, private->underflow[hook]);
 
 	do {
 		IP_NF_ASSERT(e);
@@ -394,9 +336,6 @@ ipt_do_table(struct sk_buff **pskb,
 		}
 	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
-#endif
 	read_unlock_bh(&table->lock);
 
 #ifdef DEBUG_ALLOW_ALL
@@ -408,145 +347,6 @@ ipt_do_table(struct sk_buff **pskb,
 #endif
 }
 
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-static inline struct ipt_table *find_table_lock(const char *name)
-{
-	struct ipt_table *t;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ipt_tables, list)
-		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
-			return t;
-	up(&ipt_mutex);
-	return NULL;
-}
-
-/* Find match, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ipt_match *find_match(const char *name, u8 revision)
-{
-	struct ipt_match *m;
-	int err = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(m, &ipt_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision == revision) {
-				if (try_module_get(m->me)) {
-					up(&ipt_mutex);
-					return m;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ipt_mutex);
-	return ERR_PTR(err);
-}
-
-/* Find target, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ipt_target *find_target(const char *name, u8 revision)
-{
-	struct ipt_target *t;
-	int err = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ipt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision == revision) {
-				if (try_module_get(t->me)) {
-					up(&ipt_mutex);
-					return t;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ipt_mutex);
-	return ERR_PTR(err);
-}
-
-struct ipt_target *ipt_find_target(const char *name, u8 revision)
-{
-	struct ipt_target *target;
-
-	target = try_then_request_module(find_target(name, revision),
-					 "ipt_%s", name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
-}
-
-static int match_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ipt_match *m;
-	int have_rev = 0;
-
-	list_for_each_entry(m, &ipt_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision > *bestp)
-				*bestp = m->revision;
-			if (m->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ipt_target *t;
-	int have_rev = 0;
-
-	list_for_each_entry(t, &ipt_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision > *bestp)
-				*bestp = t->revision;
-			if (t->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-/* Returns true or false (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
-				int (*revfn)(const char *, u8, int *),
-				int *err)
-{
-	int have_rev, best = -1;
-
-	if (down_interruptible(&ipt_mutex) != 0) {
-		*err = -EINTR;
-		return 1;
-	}
-	have_rev = revfn(name, revision, &best);
-	up(&ipt_mutex);
-
-	/* Nothing at all?  Return 0 to try loading module. */
-	if (best == -1) {
-		*err = -ENOENT;
-		return 0;
-	}
-
-	*err = best;
-	if (!have_rev)
-		*err = -EPROTONOSUPPORT;
-	return 1;
-}
-
-
 /* All zeroes == unconditional rule. */
 static inline int
 unconditional(const struct ipt_ip *ip)
@@ -563,7 +363,8 @@ unconditional(const struct ipt_ip *ip)
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct xt_table_info *newinfo,
+		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
@@ -572,7 +373,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ipt_entry *e
-			= (struct ipt_entry *)(newinfo->entries + pos);
+			= (struct ipt_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -622,13 +423,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 						goto next;
 
 					e = (struct ipt_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct ipt_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
@@ -645,7 +446,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
 					newpos = pos + e->next_offset;
 				}
 				e = (struct ipt_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -708,7 +509,7 @@ check_match(struct ipt_entry_match *m,
 {
 	struct ipt_match *match;
 
-	match = try_then_request_module(find_match(m->u.user.name,
+	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
 						   m->u.user.revision),
 					"ipt_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
@@ -753,7 +554,8 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 		goto cleanup_matches;
 
 	t = ipt_get_target(e);
-	target = try_then_request_module(find_target(t->u.user.name,
+	target = try_then_request_module(xt_find_target(AF_INET,
+						     t->u.user.name,
 						     t->u.user.revision),
 					 "ipt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
@@ -790,7 +592,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 
 static inline int
 check_entry_size_and_hooks(struct ipt_entry *e,
-			   struct ipt_table_info *newinfo,
+			   struct xt_table_info *newinfo,
 			   unsigned char *base,
 			   unsigned char *limit,
 			   const unsigned int *hook_entries,
@@ -824,7 +626,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
            < 0 (not IPT_RETURN). --RR */
 
 	/* Clear counters and comefrom */
-	e->counters = ((struct ipt_counters) { 0, 0 });
+	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
 
 	(*i)++;
@@ -854,7 +656,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 static int
 translate_table(const char *name,
 		unsigned int valid_hooks,
-		struct ipt_table_info *newinfo,
+		struct xt_table_info *newinfo,
+		void *entry0,
 		unsigned int size,
 		unsigned int number,
 		const unsigned int *hook_entries,
@@ -875,11 +678,11 @@ translate_table(const char *name,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry_size_and_hooks,
 				newinfo,
-				newinfo->entries,
-				newinfo->entries + size,
+				entry0,
+				entry0 + size,
 				hook_entries, underflows, &i);
 	if (ret != 0)
 		return ret;
@@ -907,95 +710,79 @@ translate_table(const char *name,
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks))
+	if (!mark_source_chains(newinfo, valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				check_entry, name, size, &i);
 
 	if (ret != 0) {
-		IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+		IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				  cleanup_entry, &i);
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
 	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
 }
 
-static struct ipt_table_info *
-replace_table(struct ipt_table *table,
-	      unsigned int num_counters,
-	      struct ipt_table_info *newinfo,
-	      int *error)
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+		     struct xt_counters total[],
+		     unsigned int *i)
 {
-	struct ipt_table_info *oldinfo;
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	{
-		struct ipt_entry *table_base;
-		unsigned int i;
-
-		for_each_cpu(i) {
-			table_base =
-				(void *)newinfo->entries
-				+ TABLE_OFFSET(newinfo, i);
-
-			table_base->comefrom = 0xdead57ac;
-		}
-	}
-#endif
-
-	/* Do the substitution. */
-	write_lock_bh(&table->lock);
-	/* Check inside lock: is the old number correct? */
-	if (num_counters != table->private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
-			 num_counters, table->private->number);
-		write_unlock_bh(&table->lock);
-		*error = -EAGAIN;
-		return NULL;
-	}
-	oldinfo = table->private;
-	table->private = newinfo;
-	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
-	return oldinfo;
+	(*i)++;
+	return 0;
 }
 
-/* Gets counters. */
 static inline int
-add_entry_to_counter(const struct ipt_entry *e,
+set_entry_to_counter(const struct ipt_entry *e,
 		     struct ipt_counters total[],
 		     unsigned int *i)
 {
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+	SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
 
 	(*i)++;
 	return 0;
 }
 
 static void
-get_counters(const struct ipt_table_info *t,
-	     struct ipt_counters counters[])
+get_counters(const struct xt_table_info *t,
+	     struct xt_counters counters[])
 {
 	unsigned int cpu;
 	unsigned int i;
+	unsigned int curcpu;
+
+	/* Instead of clearing (by a previous call to memset())
+	 * the counters and using adds, we set the counters
+	 * with data used by 'current' CPU
+	 * We dont care about preemption here.
+	 */
+	curcpu = raw_smp_processor_id();
+
+	i = 0;
+	IPT_ENTRY_ITERATE(t->entries[curcpu],
+			  t->size,
+			  set_entry_to_counter,
+			  counters,
+			  &i);
 
 	for_each_cpu(cpu) {
+		if (cpu == curcpu)
+			continue;
 		i = 0;
-		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+		IPT_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
@@ -1010,26 +797,32 @@ copy_entries_to_user(unsigned int total_size,
 {
 	unsigned int off, num, countersize;
 	struct ipt_entry *e;
-	struct ipt_counters *counters;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
-	countersize = sizeof(struct ipt_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	countersize = sizeof(struct xt_counters) * private->number;
+	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
 		return -ENOMEM;
 
 	/* First, sum counters... */
-	memset(counters, 0, countersize);
 	write_lock_bh(&table->lock);
-	get_counters(table->private, counters);
+	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	/* ... then copy entire thing ... */
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -1041,7 +834,7 @@ copy_entries_to_user(unsigned int total_size,
 		struct ipt_entry_match *m;
 		struct ipt_entry_target *t;
 
-		e = (struct ipt_entry *)(table->private->entries + off);
+		e = (struct ipt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct ipt_entry, counters),
 				 &counters[num],
@@ -1089,21 +882,22 @@ get_entries(const struct ipt_get_entries *entries,
 	int ret;
 	struct ipt_table *t;
 
-	t = find_table_lock(entries->name);
+	t = xt_find_table_lock(AF_INET, entries->name);
 	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
+			 private->number);
+		if (entries->size == private->size)
+			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
+				 private->size,
 				 entries->size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
-		up(&ipt_mutex);
+		xt_table_unlock(t);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
 
@@ -1116,8 +910,9 @@ do_replace(void __user *user, unsigned int len)
 	int ret;
 	struct ipt_replace tmp;
 	struct ipt_table *t;
-	struct ipt_table_info *newinfo, *oldinfo;
-	struct ipt_counters *counters;
+	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_counters *counters;
+	void *loc_cpu_entry, *loc_cpu_old_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1126,38 +921,33 @@ do_replace(void __user *user, unsigned int len)
 	if (len != sizeof(tmp) + tmp.size)
 		return -ENOPROTOOPT;
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return -ENOMEM;
-
-	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(tmp.size) * 
-			  	(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
 			   tmp.size) != 0) {
 		ret = -EFAULT;
 		goto free_newinfo;
 	}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto free_newinfo;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
 
 	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
 		goto free_newinfo_counters;
 
 	duprintf("ip_tables: Translated table\n");
 
-	t = try_then_request_module(find_table_lock(tmp.name),
+	t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
 				    "iptable_%s", tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1172,7 +962,7 @@ do_replace(void __user *user, unsigned int len)
 		goto put_module;
 	}
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
@@ -1189,24 +979,25 @@ do_replace(void __user *user, unsigned int len)
 	/* Get the old counters. */
 	get_counters(oldinfo, counters);
 	/* Decrease module usage counts and free resource */
-	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+	xt_free_table_info(oldinfo);
 	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
+			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
-	up(&ipt_mutex);
+	xt_table_unlock(t);
 	return ret;
 
  put_module:
 	module_put(t->me);
-	up(&ipt_mutex);
+	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo_counters:
 	vfree(counters);
  free_newinfo:
-	vfree(newinfo);
+	xt_free_table_info(newinfo);
 	return ret;
 }
 
@@ -1214,7 +1005,7 @@ do_replace(void __user *user, unsigned int len)
  * and everything is OK. */
 static inline int
 add_counter_to_entry(struct ipt_entry *e,
-		     const struct ipt_counters addme[],
+		     const struct xt_counters addme[],
 		     unsigned int *i)
 {
 #if 0
@@ -1236,17 +1027,19 @@ static int
 do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct ipt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp, *paddc;
 	struct ipt_table *t;
+	struct xt_table_info *private;
 	int ret = 0;
+	void *loc_cpu_entry;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = vmalloc_node(len, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
@@ -1255,27 +1048,30 @@ do_add_counters(void __user *user, unsigned int len)
 		goto free;
 	}
 
-	t = find_table_lock(tmp.name);
+	t = xt_find_table_lock(AF_INET, tmp.name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
 
 	write_lock_bh(&t->lock);
-	if (t->private->number != paddc->num_counters) {
+	private = t->private;
+	if (private->number != paddc->num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
-	IPT_ENTRY_ITERATE(t->private->entries,
-			  t->private->size,
+	/* Choose the copy that is on our node */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_entry,
+			  private->size,
 			  add_counter_to_entry,
 			  paddc->counters,
 			  &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
-	up(&ipt_mutex);
+	xt_table_unlock(t);
 	module_put(t->me);
  free:
 	vfree(paddc);
@@ -1334,25 +1130,26 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		}
 		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
 
-		t = try_then_request_module(find_table_lock(name),
+		t = try_then_request_module(xt_find_table_lock(AF_INET, name),
 					    "iptable_%s", name);
 		if (t && !IS_ERR(t)) {
 			struct ipt_getinfo info;
+			struct xt_table_info *private = t->private;
 
 			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
+			memcpy(info.hook_entry, private->hook_entry,
 			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
+			memcpy(info.underflow, private->underflow,
 			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
+			info.num_entries = private->number;
+			info.size = private->size;
 			memcpy(info.name, name, sizeof(info.name));
 
 			if (copy_to_user(user, &info, *len) != 0)
 				ret = -EFAULT;
 			else
 				ret = 0;
-			up(&ipt_mutex);
+			xt_table_unlock(t);
 			module_put(t->me);
 		} else
 			ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1379,7 +1176,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	case IPT_SO_GET_REVISION_MATCH:
 	case IPT_SO_GET_REVISION_TARGET: {
 		struct ipt_get_revision rev;
-		int (*revfn)(const char *, u8, int *);
+		int target;
 
 		if (*len != sizeof(rev)) {
 			ret = -EINVAL;
@@ -1391,12 +1188,13 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		}
 
 		if (cmd == IPT_SO_GET_REVISION_TARGET)
-			revfn = target_revfn;
+			target = 1;
 		else
-			revfn = match_revfn;
+			target = 0;
 
-		try_then_request_module(find_revision(rev.name, rev.revision,
-						      revfn, &ret),
+		try_then_request_module(xt_find_revision(AF_INET, rev.name,
+							 rev.revision,
+							 target, &ret),
 					"ipt_%s", rev.name);
 		break;
 	}
@@ -1409,309 +1207,53 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-/* Registration hooks for targets. */
-int
-ipt_register_target(struct ipt_target *target)
+int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
 {
 	int ret;
-
-	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0)
-		return ret;
-	list_add(&target->list, &ipt_target);
-	up(&ipt_mutex);
-	return ret;
-}
-
-void
-ipt_unregister_target(struct ipt_target *target)
-{
-	down(&ipt_mutex);
-	LIST_DELETE(&ipt_target, target);
-	up(&ipt_mutex);
-}
-
-int
-ipt_register_match(struct ipt_match *match)
-{
-	int ret;
-
-	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0)
-		return ret;
-
-	list_add(&match->list, &ipt_match);
-	up(&ipt_mutex);
-
-	return ret;
-}
-
-void
-ipt_unregister_match(struct ipt_match *match)
-{
-	down(&ipt_mutex);
-	LIST_DELETE(&ipt_match, match);
-	up(&ipt_mutex);
-}
-
-int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
-{
-	int ret;
-	struct ipt_table_info *newinfo;
-	static struct ipt_table_info bootstrap
+	struct xt_table_info *newinfo;
+	static struct xt_table_info bootstrap
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	void *loc_cpu_entry;
 
-	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(repl->size) * 
-			  		(highest_possible_processor_id()+1));
+	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
 		return -ENOMEM;
 
-	memcpy(newinfo->entries, repl->entries, repl->size);
+	/* choose the copy on our node/cpu
+	 * but dont care of preemption
+	 */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
+			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
 			      repl->underflow);
 	if (ret != 0) {
-		vfree(newinfo);
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	ret = down_interruptible(&ipt_mutex);
-	if (ret != 0) {
-		vfree(newinfo);
+	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+		xt_free_table_info(newinfo);
 		return ret;
 	}
 
-	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&ipt_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
-
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
-
-	duprintf("table->private->number = %u\n",
-		 table->private->number);
-	
-	/* save number of initial entries */
-	table->private->initial_entries = table->private->number;
-
-	rwlock_init(&table->lock);
-	list_prepend(&ipt_tables, table);
-
- unlock:
-	up(&ipt_mutex);
-	return ret;
-
- free_unlock:
-	vfree(newinfo);
-	goto unlock;
+	return 0;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
 {
-	down(&ipt_mutex);
-	LIST_DELETE(&ipt_tables, table);
-	up(&ipt_mutex);
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
 
-	/* Decrease module usage counts and free resources */
-	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
-			  cleanup_entry, NULL);
-	vfree(table->private);
-}
-
-/* Returns 1 if the port is matched by the range, 0 otherwise */
-static inline int
-port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
-{
-	int ret;
-
-	ret = (port >= min && port <= max) ^ invert;
-	return ret;
-}
-
-static int
-tcp_find_option(u_int8_t option,
-		const struct sk_buff *skb,
-		unsigned int optlen,
-		int invert,
-		int *hotdrop)
-{
-	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
-	unsigned int i;
-
-	duprintf("tcp_match: finding option\n");
-
-	if (!optlen)
-		return invert;
-
-	/* If we don't have the whole header, drop packet. */
-	op = skb_header_pointer(skb,
-				skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
-				optlen, _opt);
-	if (op == NULL) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	for (i = 0; i < optlen; ) {
-		if (op[i] == option) return !invert;
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
-	}
-
-	return invert;
-}
-
-static int
-tcp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  int *hotdrop)
-{
-	struct tcphdr _tcph, *th;
-	const struct ipt_tcp *tcpinfo = matchinfo;
-
-	if (offset) {
-		/* To quote Alan:
-
-		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
-		   causes this. Its a cracker trying to break in by doing a
-		   flag overwrite to pass the direction checks.
-		*/
-		if (offset == 1) {
-			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = 1;
-		}
-		/* Must not be a fragment. */
-		return 0;
-	}
-
-#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
-
-	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
-			ntohs(th->source),
-			!!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
-		return 0;
-	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
-			ntohs(th->dest),
-			!!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
-		return 0;
-	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
-		      == tcpinfo->flg_cmp,
-		      IPT_TCP_INV_FLAGS))
-		return 0;
-	if (tcpinfo->option) {
-		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = 1;
-			return 0;
-		}
-		if (!tcp_find_option(tcpinfo->option, skb,
-				     th->doff*4 - sizeof(_tcph),
-				     tcpinfo->invflags & IPT_TCP_INV_OPTION,
-				     hotdrop))
-			return 0;
-	}
-	return 1;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-tcp_checkentry(const char *tablename,
-	       const struct ipt_ip *ip,
-	       void *matchinfo,
-	       unsigned int matchsize,
-	       unsigned int hook_mask)
-{
-	const struct ipt_tcp *tcpinfo = matchinfo;
-
-	/* Must specify proto == TCP, and no unknown invflags */
-	return ip->proto == IPPROTO_TCP
-		&& !(ip->invflags & IPT_INV_PROTO)
-		&& matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
-		&& !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
-}
-
-static int
-udp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  int *hotdrop)
-{
-	struct udphdr _udph, *uh;
-	const struct ipt_udp *udpinfo = matchinfo;
-
-	/* Must not be a fragment. */
-	if (offset)
-		return 0;
-
-	uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_udph), &_udph);
-	if (uh == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
+ 	private = xt_unregister_table(table);
 
-	return port_match(udpinfo->spts[0], udpinfo->spts[1],
-			  ntohs(uh->source),
-			  !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
-		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
-			      ntohs(uh->dest),
-			      !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-udp_checkentry(const char *tablename,
-	       const struct ipt_ip *ip,
-	       void *matchinfo,
-	       unsigned int matchinfosize,
-	       unsigned int hook_mask)
-{
-	const struct ipt_udp *udpinfo = matchinfo;
-
-	/* Must specify proto == UDP, and no unknown invflags */
-	if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
-		duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
-			 IPPROTO_UDP);
-		return 0;
-	}
-	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
-		duprintf("ipt_udp: matchsize %u != %u\n",
-			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
-		return 0;
-	}
-	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
-		duprintf("ipt_udp: unknown flags %X\n",
-			 udpinfo->invflags);
-		return 0;
-	}
-
-	return 1;
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	xt_free_table_info(private);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1730,6 +1272,7 @@ icmp_match(const struct sk_buff *skb,
 	   const struct net_device *out,
 	   const void *matchinfo,
 	   int offset,
+	   unsigned int protoff,
 	   int *hotdrop)
 {
 	struct icmphdr _icmph, *ic;
@@ -1739,8 +1282,7 @@ icmp_match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_icmph), &_icmph);
+	ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
@@ -1760,11 +1302,12 @@ icmp_match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 icmp_checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *info,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
+	const struct ipt_ip *ip = info;
 	const struct ipt_icmp *icmpinfo = matchinfo;
 
 	/* Must specify proto == ICMP, and no unknown invflags */
@@ -1794,123 +1337,22 @@ static struct nf_sockopt_ops ipt_sockopts = {
 	.get		= do_ipt_get_ctl,
 };
 
-static struct ipt_match tcp_matchstruct = {
-	.name		= "tcp",
-	.match		= &tcp_match,
-	.checkentry	= &tcp_checkentry,
-};
-
-static struct ipt_match udp_matchstruct = {
-	.name		= "udp",
-	.match		= &udp_match,
-	.checkentry	= &udp_checkentry,
-};
-
 static struct ipt_match icmp_matchstruct = {
 	.name		= "icmp",
 	.match		= &icmp_match,
 	.checkentry	= &icmp_checkentry,
 };
 
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const char *i,
-			     off_t start_offset, char *buffer, int length,
-			     off_t *pos, unsigned int *count)
-{
-	if ((*count)++ >= start_offset) {
-		unsigned int namelen;
-
-		namelen = sprintf(buffer + *pos, "%s\n",
-				  i + sizeof(struct list_head));
-		if (*pos + namelen > length) {
-			/* Stop iterating */
-			return 1;
-		}
-		*pos += namelen;
-	}
-	return 0;
-}
-
-static inline int print_target(const struct ipt_target *t,
-                               off_t start_offset, char *buffer, int length,
-                               off_t *pos, unsigned int *count)
-{
-	if (t == &ipt_standard_target || t == &ipt_error_target)
-		return 0;
-	return print_name((char *)t, start_offset, buffer, length, pos, count);
-}
-
-static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ipt_tables, print_name, void *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ipt_mutex);
-
-	/* `start' hack - see fs/proc/generic.c line ~105 */
-	*start=(char *)((unsigned long)count-offset);
-	return pos;
-}
-
-static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ipt_target, print_target, struct ipt_target *,
-		  offset, buffer, length, &pos, &count);
-	
-	up(&ipt_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ipt_mutex) != 0)
-		return 0;
-	
-	LIST_FIND(&ipt_match, print_name, void *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ipt_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
-{ { "ip_tables_names", ipt_get_tables },
-  { "ip_tables_targets", ipt_get_targets },
-  { "ip_tables_matches", ipt_get_matches },
-  { NULL, NULL} };
-#endif /*CONFIG_PROC_FS*/
-
 static int __init init(void)
 {
 	int ret;
 
+	xt_proto_init(AF_INET);
+
 	/* Noone else will be downing sem now, so we won't sleep */
-	down(&ipt_mutex);
-	list_append(&ipt_target, &ipt_standard_target);
-	list_append(&ipt_target, &ipt_error_target);
-	list_append(&ipt_match, &tcp_matchstruct);
-	list_append(&ipt_match, &udp_matchstruct);
-	list_append(&ipt_match, &icmp_matchstruct);
-	up(&ipt_mutex);
+	xt_register_target(AF_INET, &ipt_standard_target);
+	xt_register_target(AF_INET, &ipt_error_target);
+	xt_register_match(AF_INET, &icmp_matchstruct);
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ipt_sockopts);
@@ -1919,49 +1361,23 @@ static int __init init(void)
 		return ret;
 	}
 
-#ifdef CONFIG_PROC_FS
-	{
-	struct proc_dir_entry *proc;
-	int i;
-
-	for (i = 0; ipt_proc_entry[i].name; i++) {
-		proc = proc_net_create(ipt_proc_entry[i].name, 0,
-				       ipt_proc_entry[i].get_info);
-		if (!proc) {
-			while (--i >= 0)
-				proc_net_remove(ipt_proc_entry[i].name);
-			nf_unregister_sockopt(&ipt_sockopts);
-			return -ENOMEM;
-		}
-		proc->owner = THIS_MODULE;
-	}
-	}
-#endif
-
-	printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 }
 
 static void __exit fini(void)
 {
 	nf_unregister_sockopt(&ipt_sockopts);
-#ifdef CONFIG_PROC_FS
-	{
-	int i;
-	for (i = 0; ipt_proc_entry[i].name; i++)
-		proc_net_remove(ipt_proc_entry[i].name);
-	}
-#endif
+
+	xt_unregister_match(AF_INET, &icmp_matchstruct);
+	xt_unregister_target(AF_INET, &ipt_error_target);
+	xt_unregister_target(AF_INET, &ipt_standard_target);
+
+	xt_proto_fini(AF_INET);
 }
 
 EXPORT_SYMBOL(ipt_register_table);
 EXPORT_SYMBOL(ipt_unregister_table);
-EXPORT_SYMBOL(ipt_register_match);
-EXPORT_SYMBOL(ipt_unregister_match);
 EXPORT_SYMBOL(ipt_do_table);
-EXPORT_SYMBOL(ipt_register_target);
-EXPORT_SYMBOL(ipt_unregister_target);
-EXPORT_SYMBOL(ipt_find_target);
-
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
deleted file mode 100644
index dab78d8bd49..00000000000
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * This is a module which is used for setting the skb->priority field
- * of an skb for qdisc classification.
- */
-
-/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables qdisc classification target module");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_classify_target_info *clinfo = targinfo;
-
-	if((*pskb)->priority != clinfo->priority) 
-		(*pskb)->priority = clinfo->priority;
-
-	return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ipt_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
-		printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
-		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
-		return 0;
-	}
-	
-	if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
-	                  (1 << NF_IP_POST_ROUTING))) {
-		printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
-		                "and POST_ROUTING.\n");
-		return 0;
-	}
-
-	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_ERR "CLASSIFY: can only be called from "
-		                "\"mangle\" table, not \"%s\".\n",
-		                tablename);
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_target ipt_classify_reg = { 
-	.name 		= "CLASSIFY", 
-	.target 	= target,
-	.checkentry	= checkentry,
-	.me 		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_target(&ipt_classify_reg);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_target(&ipt_classify_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45c52d8f4d9..d9bc971f03a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -379,12 +379,13 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	const struct ipt_entry *e = e_void;
 
 	struct clusterip_config *config;
 
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
deleted file mode 100644
index 8acac5a40a9..00000000000
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/* This kernel module is used to modify the connection mark values, or
- * to optionally restore the skb nfmark from the connection mark
- *
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
-MODULE_DESCRIPTION("IP tables CONNMARK matching module");
-MODULE_LICENSE("GPL");
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_connmark_target_info *markinfo = targinfo;
-	u_int32_t diff;
-	u_int32_t nfmark;
-	u_int32_t newmark;
-	u_int32_t ctinfo;
-	u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
-
-	if (ctmark) {
-	    switch(markinfo->mode) {
-	    case IPT_CONNMARK_SET:
-		newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
-		if (newmark != *ctmark)
-		    *ctmark = newmark;
-		break;
-	    case IPT_CONNMARK_SAVE:
-		newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
-		if (*ctmark != newmark)
-		    *ctmark = newmark;
-		break;
-	    case IPT_CONNMARK_RESTORE:
-		nfmark = (*pskb)->nfmark;
-		diff = (*ctmark ^ nfmark) & markinfo->mask;
-		if (diff != 0)
-		    (*pskb)->nfmark = nfmark ^ diff;
-		break;
-	    }
-	}
-
-	return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_entry *e,
-	   void *targinfo,
-	   unsigned int targinfosize,
-	   unsigned int hook_mask)
-{
-	struct ipt_connmark_target_info *matchinfo = targinfo;
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
-		printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
-		return 0;
-	}
-
-	if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
-	    if (strcmp(tablename, "mangle") != 0) {
-		    printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		    return 0;
-	    }
-	}
-
-	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
-		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_target ipt_connmark_reg = {
-	.name = "CONNMARK",
-	.target = &target,
-	.checkentry = &checkentry,
-	.me = THIS_MODULE
-};
-
-static int __init init(void)
-{
-	need_ip_conntrack();
-	return ipt_register_target(&ipt_connmark_reg);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_target(&ipt_connmark_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 6e319570a28..898cdf79ce1 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -57,7 +57,7 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index a1319693f64..706445426a6 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -113,12 +113,13 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
+	const struct ipt_entry *e = e_void;
 
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
 		printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 30be0f1dae3..6606ddb66a2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -431,7 +431,7 @@ ipt_log_target(struct sk_buff **pskb,
 }
 
 static int ipt_log_checkentry(const char *tablename,
-			      const struct ipt_entry *e,
+			      const void *e,
 			      void *targinfo,
 			      unsigned int targinfosize,
 			      unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
deleted file mode 100644
index 52b4f2c296b..00000000000
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* This is a module which is used for setting the NFMARK field of an skb. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables MARK modification module");
-
-static unsigned int
-target_v0(struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const void *targinfo,
-	  void *userinfo)
-{
-	const struct ipt_mark_target_info *markinfo = targinfo;
-
-	if((*pskb)->nfmark != markinfo->mark)
-		(*pskb)->nfmark = markinfo->mark;
-
-	return IPT_CONTINUE;
-}
-
-static unsigned int
-target_v1(struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const void *targinfo,
-	  void *userinfo)
-{
-	const struct ipt_mark_target_info_v1 *markinfo = targinfo;
-	int mark = 0;
-
-	switch (markinfo->mode) {
-	case IPT_MARK_SET:
-		mark = markinfo->mark;
-		break;
-		
-	case IPT_MARK_AND:
-		mark = (*pskb)->nfmark & markinfo->mark;
-		break;
-		
-	case IPT_MARK_OR:
-		mark = (*pskb)->nfmark | markinfo->mark;
-		break;
-	}
-
-	if((*pskb)->nfmark != mark)
-		(*pskb)->nfmark = mark;
-
-	return IPT_CONTINUE;
-}
-
-
-static int
-checkentry_v0(const char *tablename,
-	      const struct ipt_entry *e,
-	      void *targinfo,
-	      unsigned int targinfosize,
-	      unsigned int hook_mask)
-{
-	struct ipt_mark_target_info *markinfo = targinfo;
-
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
-		return 0;
-	}
-
-	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		return 0;
-	}
-
-	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static int
-checkentry_v1(const char *tablename,
-	      const struct ipt_entry *e,
-	      void *targinfo,
-	      unsigned int targinfosize,
-	      unsigned int hook_mask)
-{
-	struct ipt_mark_target_info_v1 *markinfo = targinfo;
-
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1))){
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1)));
-		return 0;
-	}
-
-	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		return 0;
-	}
-
-	if (markinfo->mode != IPT_MARK_SET
-	    && markinfo->mode != IPT_MARK_AND
-	    && markinfo->mode != IPT_MARK_OR) {
-		printk(KERN_WARNING "MARK: unknown mode %u\n",
-		       markinfo->mode);
-		return 0;
-	}
-
-	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_target ipt_mark_reg_v0 = {
-	.name		= "MARK",
-	.target		= target_v0,
-	.checkentry	= checkentry_v0,
-	.me		= THIS_MODULE,
-	.revision	= 0,
-};
-
-static struct ipt_target ipt_mark_reg_v1 = {
-	.name		= "MARK",
-	.target		= target_v1,
-	.checkentry	= checkentry_v1,
-	.me		= THIS_MODULE,
-	.revision	= 1,
-};
-
-static int __init init(void)
-{
-	int err;
-
-	err = ipt_register_target(&ipt_mark_reg_v0);
-	if (!err) {
-		err = ipt_register_target(&ipt_mark_reg_v1);
-		if (err)
-			ipt_unregister_target(&ipt_mark_reg_v0);
-	}
-	return err;
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_target(&ipt_mark_reg_v0);
-	ipt_unregister_target(&ipt_mark_reg_v1);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe..12c56d3343c 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/types.h>
+#include <linux/inetdevice.h>
 #include <linux/ip.h>
 #include <linux/timer.h>
 #include <linux/module.h>
@@ -18,6 +19,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/checksum.h>
+#include <net/route.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -38,7 +40,7 @@ static DEFINE_RWLOCK(masq_lock);
 /* FIXME: Multiple targets. --RR */
 static int
 masquerade_check(const char *tablename,
-		 const struct ipt_entry *e,
+		 const void *e,
 		 void *targinfo,
 		 unsigned int targinfosize,
 		 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index e6e7b609536..b074467fe67 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
 
 static int
 check(const char *tablename,
-      const struct ipt_entry *e,
+      const void *e,
       void *targinfo,
       unsigned int targinfosize,
       unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c
deleted file mode 100644
index 3cedc9be880..00000000000
--- a/net/ipv4/netfilter/ipt_NFQUEUE.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* iptables module for using new netfilter netlink queue
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as 
- * published by the Free Software Foundation.
- * 
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables NFQUEUE target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_NFQ_info *tinfo = targinfo;
-
-	return NF_QUEUE_NR(tinfo->queuenum);
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) {
-		printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IPT_ALIGN(sizeof(struct ipt_NFQ_info)));
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_target ipt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_target(&ipt_NFQ_reg);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_target(&ipt_NFQ_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
deleted file mode 100644
index e3c69d072c6..00000000000
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* This is a module which is used for setting up fake conntracks
- * on packets so that they are not seen by the conntrack/NAT code.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	/* Previously seen (loopback)? Ignore. */
-	if ((*pskb)->nfct != NULL)
-		return IPT_CONTINUE;
-
-	/* Attach fake conntrack entry. 
-	   If there is a real ct entry correspondig to this packet, 
-	   it'll hang aroun till timing out. We don't deal with it
-	   for performance reasons. JK */
-	nf_ct_untrack(*pskb);
-	(*pskb)->nfctinfo = IP_CT_NEW;
-	nf_conntrack_get((*pskb)->nfct);
-
-	return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != 0) {
-		printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n",
-		       targinfosize);
-		return 0;
-	}
-
-	if (strcmp(tablename, "raw") != 0) {
-		printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename);
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_target ipt_notrack_reg = { 
-	.name = "NOTRACK", 
-	.target = target, 
-	.checkentry = checkentry,
-	.me = THIS_MODULE 
-};
-
-static int __init init(void)
-{
-	if (ipt_register_target(&ipt_notrack_reg))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_target(&ipt_notrack_reg);
-}
-
-module_init(init);
-module_exit(fini);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 5245bfd33d5..140be51f2f0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables REDIRECT target module");
 /* FIXME: Take multiple ranges --RR */
 static int
 redirect_check(const char *tablename,
-	       const struct ipt_entry *e,
+	       const void *e,
 	       void *targinfo,
 	       unsigned int targinfosize,
 	       unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f057025a719..3eb47aae78c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -203,7 +203,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 						sizeof(struct tcphdr), 0));
 
 	/* Adjust IP TTL, DF */
-	nskb->nh.iph->ttl = MAXTTL;
+	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
 	/* Set DF, id = 0 */
 	nskb->nh.iph->frag_off = htons(IP_DF);
 	nskb->nh.iph->id = 0;
@@ -282,12 +282,13 @@ static unsigned int reject(struct sk_buff **pskb,
 }
 
 static int check(const char *tablename,
-		 const struct ipt_entry *e,
+		 const void *e_void,
 		 void *targinfo,
 		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ipt_reject_info *rejinfo = targinfo;
+	const struct ipt_entry *e = e_void;
 
  	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
   		DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7a0536d864a..a22de59bba0 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -49,7 +49,7 @@ MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
 
 static int
 same_check(const char *tablename,
-	      const struct ipt_entry *e,
+	      const void *e,
 	      void *targinfo,
 	      unsigned int targinfosize,
 	      unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 8db70d6908c..c122841e182 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -210,12 +210,13 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
 /* Must specify -p tcp --syn/--tcp-flags SYN */
 static int
 ipt_tcpmss_checkentry(const char *tablename,
-		      const struct ipt_entry *e,
+		      const void *e_void,
 		      void *targinfo,
 		      unsigned int targinfosize,
 		      unsigned int hook_mask)
 {
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+	const struct ipt_entry *e = e_void;
 
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
 		DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index deadb36d442..3a44a56db23 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -52,7 +52,7 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename,
-	   const struct ipt_entry *e,
+	   const void *e_void,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index b9ae6a9382f..b769eb23197 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -66,7 +66,7 @@ ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in,
 }
 
 static int ipt_ttl_checkentry(const char *tablename,
-		const struct ipt_entry *e,
+		const void *e,
 		void *targinfo,
 		unsigned int targinfosize,
 		unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2883ccd8a91..641dbc47765 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -77,15 +77,15 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
 
 static unsigned int nlbufsiz = 4096;
-module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
+module_param(nlbufsiz, uint, 0400);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
 
 static unsigned int flushtimeout = 10;
-module_param(flushtimeout, int, 0600);
+module_param(flushtimeout, uint, 0600);
 MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
 
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
+static int nflog = 1;
+module_param(nflog, bool, 0400);
 MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
 
 /* global data structures */
@@ -330,7 +330,7 @@ static void ipt_logfn(unsigned int pf,
 }
 
 static int ipt_ulog_checkentry(const char *tablename,
-			       const struct ipt_entry *e,
+			       const void *e,
 			       void *targinfo,
 			       unsigned int targinfosize,
 			       unsigned int hookmask)
@@ -376,7 +376,7 @@ static int __init init(void)
 
 	DEBUGP("ipt_ULOG: init module\n");
 
-	if (nlbufsiz >= 128*1024) {
+	if (nlbufsiz > 128*1024) {
 		printk("Netlink buffer has to be <= 128kB\n");
 		return -EINVAL;
 	}
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e19c2a52d00..d6b83a97651 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -29,7 +29,7 @@ static inline int match_type(u_int32_t addr, u_int16_t mask)
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
 	const struct iphdr *iph = skb->nh.iph;
@@ -43,7 +43,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return ret;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a0fea847cb7..144adfec13c 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -41,6 +41,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	struct ip_auth_hdr _ahdr, *ah;
@@ -50,7 +51,7 @@ match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	ah = skb_header_pointer(skb, protoff,
 				sizeof(_ahdr), &_ahdr);
 	if (ah == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -69,12 +70,13 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip_void,
 	   void *matchinfo,
 	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_ah *ahinfo = matchinfo;
+	const struct ipt_ip *ip = ip_void;
 
 	/* Must specify proto == AH, and no unknown invflags */
 	if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_comment.c b/net/ipv4/netfilter/ipt_comment.c
deleted file mode 100644
index 6b76a1ea524..00000000000
--- a/net/ipv4/netfilter/ipt_comment.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Implements a dummy match to allow attaching comments to rules
- *
- * 2003-05-13 Brad Fisher (brad@info-link.net)
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_comment.h>
-
-MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
-MODULE_DESCRIPTION("iptables comment match module");
-MODULE_LICENSE("GPL");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	/* We always match */
-	return 1;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ipt_ip *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	/* Check the size */
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_comment_info)))
-		return 0;
-	return 1;
-}
-
-static struct ipt_match comment_match = {
-	.name		= "comment",
-	.match		= match,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&comment_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&comment_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
deleted file mode 100644
index d68a048b717..00000000000
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* Kernel module to match connection tracking byte counter.
- * GPL (C) 2002 Martin Devera (devik@cdi.cz).
- *
- * 2004-07-20 Harald Welte <laforge@netfilter.org>
- * 	- reimplemented to use per-connection accounting counters
- * 	- add functionality to match number of packets
- * 	- add functionality to match average packet size
- * 	- add support to match directions seperately
- *
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_connbytes.h>
-
-#include <asm/div64.h>
-#include <asm/bitops.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
-
-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
-	u_int32_t d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	do_div(dividend, d);
-	return dividend;
-}
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_connbytes_info *sinfo = matchinfo;
-	u_int64_t what = 0;	/* initialize to make gcc happy */
-	const struct ip_conntrack_counter *counters;
-
-	if (!(counters = nf_ct_get_counters(skb)))
-		return 0; /* no match */
-
-	switch (sinfo->what) {
-	case IPT_CONNBYTES_PKTS:
-		switch (sinfo->direction) {
-		case IPT_CONNBYTES_DIR_ORIGINAL:
-			what = counters[IP_CT_DIR_ORIGINAL].packets;
-			break;
-		case IPT_CONNBYTES_DIR_REPLY:
-			what = counters[IP_CT_DIR_REPLY].packets;
-			break;
-		case IPT_CONNBYTES_DIR_BOTH:
-			what = counters[IP_CT_DIR_ORIGINAL].packets;
-			what += counters[IP_CT_DIR_REPLY].packets;
-			break;
-		}
-		break;
-	case IPT_CONNBYTES_BYTES:
-		switch (sinfo->direction) {
-		case IPT_CONNBYTES_DIR_ORIGINAL:
-			what = counters[IP_CT_DIR_ORIGINAL].bytes;
-			break;
-		case IPT_CONNBYTES_DIR_REPLY:
-			what = counters[IP_CT_DIR_REPLY].bytes;
-			break;
-		case IPT_CONNBYTES_DIR_BOTH:
-			what = counters[IP_CT_DIR_ORIGINAL].bytes;
-			what += counters[IP_CT_DIR_REPLY].bytes;
-			break;
-		}
-		break;
-	case IPT_CONNBYTES_AVGPKT:
-		switch (sinfo->direction) {
-		case IPT_CONNBYTES_DIR_ORIGINAL:
-			what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
-					counters[IP_CT_DIR_ORIGINAL].packets);
-			break;
-		case IPT_CONNBYTES_DIR_REPLY:
-			what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
-					counters[IP_CT_DIR_REPLY].packets);
-			break;
-		case IPT_CONNBYTES_DIR_BOTH:
-			{
-				u_int64_t bytes;
-				u_int64_t pkts;
-				bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
-					counters[IP_CT_DIR_REPLY].bytes;
-				pkts = counters[IP_CT_DIR_ORIGINAL].packets+
-					counters[IP_CT_DIR_REPLY].packets;
-
-				/* FIXME_THEORETICAL: what to do if sum
-				 * overflows ? */
-
-				what = div64_64(bytes, pkts);
-			}
-			break;
-		}
-		break;
-	}
-
-	if (sinfo->count.to)
-		return (what <= sinfo->count.to && what >= sinfo->count.from);
-	else
-		return (what >= sinfo->count.from);
-}
-
-static int check(const char *tablename,
-		 const struct ipt_ip *ip,
-		 void *matchinfo,
-		 unsigned int matchsize,
-		 unsigned int hook_mask)
-{
-	const struct ipt_connbytes_info *sinfo = matchinfo;
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info)))
-		return 0;
-
-	if (sinfo->what != IPT_CONNBYTES_PKTS &&
-	    sinfo->what != IPT_CONNBYTES_BYTES &&
-	    sinfo->what != IPT_CONNBYTES_AVGPKT)
-		return 0;
-
-	if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL &&
-	    sinfo->direction != IPT_CONNBYTES_DIR_REPLY &&
-	    sinfo->direction != IPT_CONNBYTES_DIR_BOTH)
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match state_match = {
-	.name		= "connbytes",
-	.match		= &match,
-	.checkentry	= &check,
-	.me		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&state_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&state_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
deleted file mode 100644
index 5306ef293b9..00000000000
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/* This kernel module matches connection mark values set by the
- * CONNMARK target
- *
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
-MODULE_DESCRIPTION("IP tables connmark match module");
-MODULE_LICENSE("GPL");
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_connmark.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_connmark_info *info = matchinfo;
-	u_int32_t ctinfo;
-	const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
-	if (!ctmark)
-		return 0;
-
-	return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
-	   void *matchinfo,
-	   unsigned int matchsize,
-	   unsigned int hook_mask)
-{
-	struct ipt_connmark_info *cm = 
-				(struct ipt_connmark_info *)matchinfo;
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
-		return 0;
-
-	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
-		printk(KERN_WARNING "connmark: only support 32bit mark\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_match connmark_match = {
-	.name = "connmark",
-	.match = &match,
-	.checkentry = &checkentry,
-	.me = THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&connmark_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&connmark_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
deleted file mode 100644
index c8d18705469..00000000000
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/* Kernel module to match connection tracking information.
- * Superset of Rusty's minimalistic state match.
- *
- * (C) 2001  Marc Boucher (marc@mbsi.ca).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_conntrack.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables connection tracking match module");
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_conntrack_info *sinfo = matchinfo;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int statebit;
-
-	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
-	if (ct == &ip_conntrack_untracked)
-		statebit = IPT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct)
- 		statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
- 	else
- 		statebit = IPT_CONNTRACK_STATE_INVALID;
- 
-	if(sinfo->flags & IPT_CONNTRACK_STATE) {
-		if (ct) {
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
-				statebit |= IPT_CONNTRACK_STATE_SNAT;
-
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
-				statebit |= IPT_CONNTRACK_STATE_DNAT;
-		}
-
-		if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
-                	return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
-		unsigned long expires;
-
-		if(!ct)
-			return 0;
-
-		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
-			return 0;
-	}
-
-	return 1;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_conntrack_info *sinfo = matchinfo;
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int statebit;
-
-	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
-	if (ct == &nf_conntrack_untracked)
-		statebit = IPT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct)
- 		statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
- 	else
- 		statebit = IPT_CONNTRACK_STATE_INVALID;
- 
-	if(sinfo->flags & IPT_CONNTRACK_STATE) {
-		if (ct) {
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
-				statebit |= IPT_CONNTRACK_STATE_SNAT;
-
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
-				statebit |= IPT_CONNTRACK_STATE_DNAT;
-		}
-
-		if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
-                	return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
-			return 0;
-	}
-
-	if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
-		unsigned long expires;
-
-		if(!ct)
-			return 0;
-
-		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
-			return 0;
-	}
-
-	return 1;
-}
-
-#endif /* CONFIG_NF_IP_CONNTRACK */
-
-static int check(const char *tablename,
-		 const struct ipt_ip *ip,
-		 void *matchinfo,
-		 unsigned int matchsize,
-		 unsigned int hook_mask)
-{
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match conntrack_match = {
-	.name		= "conntrack",
-	.match		= &match,
-	.checkentry	= &check,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	need_ip_conntrack();
-	return ipt_register_match(&conntrack_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&conntrack_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c
deleted file mode 100644
index ad3278bba6c..00000000000
--- a/net/ipv4/netfilter/ipt_dccp.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * iptables module for DCCP protocol header matching
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <net/ip.h>
-#include <linux/dccp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_dccp.h>
-
-#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
-		                  || (!!((invflag) & (option)) ^ (cond)))
-
-static unsigned char *dccp_optbuf;
-static DEFINE_SPINLOCK(dccp_buflock);
-
-static inline int
-dccp_find_option(u_int8_t option,
-		 const struct sk_buff *skb,
-		 const struct dccp_hdr *dh,
-		 int *hotdrop)
-{
-	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	unsigned char *op;
-	unsigned int optoff = __dccp_hdr_len(dh);
-	unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
-	unsigned int i;
-
-	if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	if (!optlen)
-		return 0;
-
-	spin_lock_bh(&dccp_buflock);
-	op = skb_header_pointer(skb,
-				skb->nh.iph->ihl*4 + optoff,
-				optlen, dccp_optbuf);
-	if (op == NULL) {
-		/* If we don't have the whole header, drop packet. */
-		spin_unlock_bh(&dccp_buflock);
-		*hotdrop = 1;
-		return 0;
-	}
-
-	for (i = 0; i < optlen; ) {
-		if (op[i] == option) {
-			spin_unlock_bh(&dccp_buflock);
-			return 1;
-		}
-
-		if (op[i] < 2) 
-			i++;
-		else 
-			i += op[i+1]?:1;
-	}
-
-	spin_unlock_bh(&dccp_buflock);
-	return 0;
-}
-
-
-static inline int
-match_types(const struct dccp_hdr *dh, u_int16_t typemask)
-{
-	return (typemask & (1 << dh->dccph_type));
-}
-
-static inline int
-match_option(u_int8_t option, const struct sk_buff *skb,
-	     const struct dccp_hdr *dh, int *hotdrop)
-{
-	return dccp_find_option(option, skb, dh, hotdrop);
-}
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_dccp_info *info = 
-				(const struct ipt_dccp_info *)matchinfo;
-	struct dccp_hdr _dh, *dh;
-
-	if (offset)
-		return 0;
-	
-	dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh);
-	if (dh == NULL) {
-		*hotdrop = 1;
-		return 0;
-       	}
-
-	return  DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) 
-			&& (ntohs(dh->dccph_sport) <= info->spts[1])), 
-		   	IPT_DCCP_SRC_PORTS, info->flags, info->invflags)
-		&& DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) 
-			&& (ntohs(dh->dccph_dport) <= info->dpts[1])), 
-			IPT_DCCP_DEST_PORTS, info->flags, info->invflags)
-		&& DCCHECK(match_types(dh, info->typemask),
-			   IPT_DCCP_TYPE, info->flags, info->invflags)
-		&& DCCHECK(match_option(info->option, skb, dh, hotdrop),
-			   IPT_DCCP_OPTION, info->flags, info->invflags);
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
-	   void *matchinfo,
-	   unsigned int matchsize,
-	   unsigned int hook_mask)
-{
-	const struct ipt_dccp_info *info;
-
-	info = (const struct ipt_dccp_info *)matchinfo;
-
-	return ip->proto == IPPROTO_DCCP
-		&& !(ip->invflags & IPT_INV_PROTO)
-		&& matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info))
-		&& !(info->flags & ~IPT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~IPT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~info->flags);
-}
-
-static struct ipt_match dccp_match = 
-{ 
-	.name 		= "dccp",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me 		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	int ret;
-
-	/* doff is 8 bits, so the maximum option size is (4*256).  Don't put
-	 * this in BSS since DaveM is worried about locked TLB's for kernel
-	 * BSS. */
-	dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
-	if (!dccp_optbuf)
-		return -ENOMEM;
-	ret = ipt_register_match(&dccp_match);
-	if (ret)
-		kfree(dccp_optbuf);
-
-	return ret;
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&dccp_match);
-	kfree(dccp_optbuf);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Match for DCCP protocol packets");
-
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
index 5df52a64a5d..92063b4f860 100644
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ b/net/ipv4/netfilter/ipt_dscp.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_dscp_info *info = matchinfo;
 	const struct iphdr *iph = skb->nh.iph;
@@ -31,7 +31,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b6f7181e89c..e68b0c7981f 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,7 @@ static inline int match_tcp(const struct sk_buff *skb,
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 
@@ -85,11 +85,12 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return 1;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip_void,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	const struct ipt_ecn_info *info = matchinfo;
+	const struct ipt_ip *ip = ip_void;
 
 	if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
 		return 0;
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
index e1d0dd31e11..9de191a8162 100644
--- a/net/ipv4/netfilter/ipt_esp.c
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -42,6 +42,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	struct ip_esp_hdr _esp, *eh;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	eh = skb_header_pointer(skb, protoff,
 				sizeof(_esp), &_esp);
 	if (eh == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -70,12 +71,13 @@ match(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip_void,
 	   void *matchinfo,
 	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_esp *espinfo = matchinfo;
+	const struct ipt_ip *ip = ip_void;
 
 	/* Must specify proto == ESP, and no unknown invflags */
 	if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 2dd1cccbdab..4fe48c1bd5f 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -429,6 +429,7 @@ hashlimit_match(const struct sk_buff *skb,
 		const struct net_device *out,
 		const void *matchinfo,
 		int offset,
+		unsigned int protoff,
 		int *hotdrop)
 {
 	struct ipt_hashlimit_info *r = 
@@ -504,7 +505,7 @@ hashlimit_match(const struct sk_buff *skb,
 
 static int
 hashlimit_checkentry(const char *tablename,
-		     const struct ipt_ip *ip,
+		     const void *inf,
 		     void *matchinfo,
 		     unsigned int matchsize,
 		     unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
deleted file mode 100644
index bf14e1c7798..00000000000
--- a/net/ipv4/netfilter/ipt_helper.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/* iptables module to match on related connections */
-/*
- * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *   19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- *   		 - Port to newnat infrastructure
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter.h>
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#endif
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
-MODULE_DESCRIPTION("iptables helper match module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_helper_info *info = matchinfo;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	int ret = info->invert;
-	
-	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-	if (!ct) {
-		DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
-		return ret;
-	}
-
-	if (!ct->master) {
-		DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
-		return ret;
-	}
-
-	read_lock_bh(&ip_conntrack_lock);
-	if (!ct->master->helper) {
-		DEBUGP("ipt_helper: master ct %p has no helper\n", 
-			exp->expectant);
-		goto out_unlock;
-	}
-
-	DEBUGP("master's name = %s , info->name = %s\n", 
-		ct->master->helper->name, info->name);
-
-	if (info->name[0] == '\0')
-		ret ^= 1;
-	else
-		ret ^= !strncmp(ct->master->helper->name, info->name, 
-		                strlen(ct->master->helper->name));
-out_unlock:
-	read_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_helper_info *info = matchinfo;
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	int ret = info->invert;
-	
-	ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
-	if (!ct) {
-		DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
-		return ret;
-	}
-
-	if (!ct->master) {
-		DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
-		return ret;
-	}
-
-	read_lock_bh(&nf_conntrack_lock);
-	if (!ct->master->helper) {
-		DEBUGP("ipt_helper: master ct %p has no helper\n", 
-			exp->expectant);
-		goto out_unlock;
-	}
-
-	DEBUGP("master's name = %s , info->name = %s\n", 
-		ct->master->helper->name, info->name);
-
-	if (info->name[0] == '\0')
-		ret ^= 1;
-	else
-		ret ^= !strncmp(ct->master->helper->name, info->name, 
-		                strlen(ct->master->helper->name));
-out_unlock:
-	read_unlock_bh(&nf_conntrack_lock);
-	return ret;
-}
-#endif
-
-static int check(const char *tablename,
-		 const struct ipt_ip *ip,
-		 void *matchinfo,
-		 unsigned int matchsize,
-		 unsigned int hook_mask)
-{
-	struct ipt_helper_info *info = matchinfo;
-
-	info->name[29] = '\0';
-
-	/* verify size */
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_helper_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match helper_match = {
-	.name		= "helper",
-	.match		= &match,
-	.checkentry	= &check,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	need_ip_conntrack();
-	return ipt_register_match(&helper_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&helper_match);
-}
-
-module_init(init);
-module_exit(fini);
-
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index b835b7b2e56..13fb16fb789 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -28,7 +28,7 @@ match(const struct sk_buff *skb,
       const struct net_device *in,
       const struct net_device *out,
       const void *matchinfo,
-      int offset, int *hotdrop)
+      int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_iprange_info *info = matchinfo;
 	const struct iphdr *iph = skb->nh.iph;
@@ -63,7 +63,7 @@ match(const struct sk_buff *skb,
 }
 
 static int check(const char *tablename,
-		 const struct ipt_ip *ip,
+		 const void *inf,
 		 void *matchinfo,
 		 unsigned int matchsize,
 		 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_length.c b/net/ipv4/netfilter/ipt_length.c
deleted file mode 100644
index 4eabcfbda9d..00000000000
--- a/net/ipv4/netfilter/ipt_length.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Kernel module to match packet length. */
-/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_length.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("IP tables packet length matching module");
-MODULE_LICENSE("GPL");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
-	
-	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ipt_ip *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_length_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match length_match = {
-	.name		= "length",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&length_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&length_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
deleted file mode 100644
index 0c24dcc703a..00000000000
--- a/net/ipv4/netfilter/ipt_limit.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- *                   `limit', removed logging.  Did I mention that
- *                   Alexey is a fucking genius?
- *                   Rusty Russell (rusty@rustcorp.com.au).  */
-
-/* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
- * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_limit.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
-MODULE_DESCRIPTION("iptables rate limit match");
-
-/* The algorithm used is the Simple Token Bucket Filter (TBF)
- * see net/sched/sch_tbf.c in the linux source tree
- */
-
-static DEFINE_SPINLOCK(limit_lock);
-
-/* Rusty: This is my (non-mathematically-inclined) understanding of
-   this algorithm.  The `average rate' in jiffies becomes your initial
-   amount of credit `credit' and the most credit you can ever have
-   `credit_cap'.  The `peak rate' becomes the cost of passing the
-   test, `cost'.
-
-   `prev' tracks the last packet hit: you gain one credit per jiffy.
-   If you get credit balance more than this, the extra credit is
-   discarded.  Every time the match passes, you lose `cost' credits;
-   if you don't have that many, the test fails.
-
-   See Alexey's formal explanation in net/sched/sch_tbf.c.
-
-   To get the maxmum range, we multiply by this factor (ie. you get N
-   credits per jiffy).  We want to allow a rate as low as 1 per day
-   (slowest userspace tool allows), which means
-   CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
-#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
-
-/* Repeated shift and or gives us all 1s, final shift and add 1 gives
- * us the power of 2 below the theoretical max, so GCC simply does a
- * shift. */
-#define _POW2_BELOW2(x) ((x)|((x)>>1))
-#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
-#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
-#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
-#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
-#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
-
-#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
-
-static int
-ipt_limit_match(const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const void *matchinfo,
-		int offset,
-		int *hotdrop)
-{
-	struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master;
-	unsigned long now = jiffies;
-
-	spin_lock_bh(&limit_lock);
-	r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
-	if (r->credit > r->credit_cap)
-		r->credit = r->credit_cap;
-
-	if (r->credit >= r->cost) {
-		/* We're not limited. */
-		r->credit -= r->cost;
-		spin_unlock_bh(&limit_lock);
-		return 1;
-	}
-
-       	spin_unlock_bh(&limit_lock);
-	return 0;
-}
-
-/* Precision saver. */
-static u_int32_t
-user2credits(u_int32_t user)
-{
-	/* If multiplying would overflow... */
-	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
-		/* Divide first. */
-		return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
-
-	return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE;
-}
-
-static int
-ipt_limit_checkentry(const char *tablename,
-		     const struct ipt_ip *ip,
-		     void *matchinfo,
-		     unsigned int matchsize,
-		     unsigned int hook_mask)
-{
-	struct ipt_rateinfo *r = matchinfo;
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo)))
-		return 0;
-
-	/* Check for overflow. */
-	if (r->burst == 0
-	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in ipt_limit, try lower: %u/%u\n",
-		       r->avg, r->burst);
-		return 0;
-	}
-
-	/* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies *
-	   128. */
-	r->prev = jiffies;
-	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
-	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
-	r->cost = user2credits(r->avg);
-
-	/* For SMP, we only want to use one set of counters. */
-	r->master = r;
-
-	return 1;
-}
-
-static struct ipt_match ipt_limit_reg = {
-	.name		= "limit",
-	.match		= ipt_limit_match,
-	.checkentry	= ipt_limit_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	if (ipt_register_match(&ipt_limit_reg))
-		return -EINVAL;
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&ipt_limit_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
deleted file mode 100644
index 11a459e33f2..00000000000
--- a/net/ipv4/netfilter/ipt_mac.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Kernel module to match MAC address parameters. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-
-#include <linux/netfilter_ipv4/ipt_mac.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables mac matching module");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-    const struct ipt_mac_info *info = matchinfo;
-
-    /* Is mac pointer valid? */
-    return (skb->mac.raw >= skb->head
-	    && (skb->mac.raw + ETH_HLEN) <= skb->data
-	    /* If so, compare... */
-	    && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
-		== 0) ^ info->invert));
-}
-
-static int
-ipt_mac_checkentry(const char *tablename,
-		   const struct ipt_ip *ip,
-		   void *matchinfo,
-		   unsigned int matchsize,
-		   unsigned int hook_mask)
-{
-	/* FORWARD isn't always valid, but it's nice to be able to do --RR */
-	if (hook_mask
-	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
-		| (1 << NF_IP_FORWARD))) {
-		printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
-		return 0;
-	}
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match mac_match = {
-	.name		= "mac",
-	.match		= &match,
-	.checkentry	= &ipt_mac_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&mac_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&mac_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
deleted file mode 100644
index 00bef6cdd3f..00000000000
--- a/net/ipv4/netfilter/ipt_mark.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Kernel module to match NFMARK values. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_mark.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables mark matching module");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_mark_info *info = matchinfo;
-
-	return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ipt_ip *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
-		return 0;
-
-	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
-		printk(KERN_WARNING "mark: only supports 32bit mark\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_match mark_match = {
-	.name		= "mark",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&mark_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&mark_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 99e8188162e..2d52326553f 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -97,6 +97,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	u16 _ports[2], *pptr;
@@ -105,7 +106,7 @@ match(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	pptr = skb_header_pointer(skb, protoff,
 				  sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -128,6 +129,7 @@ match_v1(const struct sk_buff *skb,
 	 const struct net_device *out,
 	 const void *matchinfo,
 	 int offset,
+	 unsigned int protoff,
 	 int *hotdrop)
 {
 	u16 _ports[2], *pptr;
@@ -136,7 +138,7 @@ match_v1(const struct sk_buff *skb,
 	if (offset)
 		return 0;
 
-	pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+	pptr = skb_header_pointer(skb, protoff,
 				  sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -154,7 +156,7 @@ match_v1(const struct sk_buff *skb,
 /* Called when user tries to insert an entry of this type. */
 static int
 checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
+	   const void *ip,
 	   void *matchinfo,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
@@ -164,7 +166,7 @@ checkentry(const char *tablename,
 
 static int
 checkentry_v1(const char *tablename,
-	      const struct ipt_ip *ip,
+	      const void *ip,
 	      void *matchinfo,
 	      unsigned int matchsize,
 	      unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 0cee2862ed8..4843d0c9734 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -27,6 +27,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	const struct ipt_owner_info *info = matchinfo;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
deleted file mode 100644
index 1a53924041f..00000000000
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Kernel module to match the bridge port in and
- * out device for IP packets coming into contact with a bridge. */
-
-/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ipt_physdev.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_bridge.h>
-#define MATCH   1
-#define NOMATCH 0
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("iptables bridge physical device match module");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	int i;
-	static const char nulldevname[IFNAMSIZ];
-	const struct ipt_physdev_info *info = matchinfo;
-	unsigned int ret;
-	const char *indev, *outdev;
-	struct nf_bridge_info *nf_bridge;
-
-	/* Not a bridged IP packet or no info available yet:
-	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
-	 * the destination device will be a bridge. */
-	if (!(nf_bridge = skb->nf_bridge)) {
-		/* Return MATCH if the invert flags of the used options are on */
-		if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
-		    !(info->invert & IPT_PHYSDEV_OP_BRIDGED))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_ISIN) &&
-		    !(info->invert & IPT_PHYSDEV_OP_ISIN))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_ISOUT) &&
-		    !(info->invert & IPT_PHYSDEV_OP_ISOUT))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_IN) &&
-		    !(info->invert & IPT_PHYSDEV_OP_IN))
-			return NOMATCH;
-		if ((info->bitmask & IPT_PHYSDEV_OP_OUT) &&
-		    !(info->invert & IPT_PHYSDEV_OP_OUT))
-			return NOMATCH;
-		return MATCH;
-	}
-
-	/* This only makes sense in the FORWARD and POSTROUTING chains */
-	if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
-	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
-	    !(info->invert & IPT_PHYSDEV_OP_BRIDGED)))
-		return NOMATCH;
-
-	if ((info->bitmask & IPT_PHYSDEV_OP_ISIN &&
-	    (!nf_bridge->physindev ^ !!(info->invert & IPT_PHYSDEV_OP_ISIN))) ||
-	    (info->bitmask & IPT_PHYSDEV_OP_ISOUT &&
-	    (!nf_bridge->physoutdev ^ !!(info->invert & IPT_PHYSDEV_OP_ISOUT))))
-		return NOMATCH;
-
-	if (!(info->bitmask & IPT_PHYSDEV_OP_IN))
-		goto match_outdev;
-	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
-		ret |= (((const unsigned int *)indev)[i]
-			^ ((const unsigned int *)info->physindev)[i])
-			& ((const unsigned int *)info->in_mask)[i];
-	}
-
-	if ((ret == 0) ^ !(info->invert & IPT_PHYSDEV_OP_IN))
-		return NOMATCH;
-
-match_outdev:
-	if (!(info->bitmask & IPT_PHYSDEV_OP_OUT))
-		return MATCH;
-	outdev = nf_bridge->physoutdev ?
-		 nf_bridge->physoutdev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
-		ret |= (((const unsigned int *)outdev)[i]
-			^ ((const unsigned int *)info->physoutdev)[i])
-			& ((const unsigned int *)info->out_mask)[i];
-	}
-
-	return (ret != 0) ^ !(info->invert & IPT_PHYSDEV_OP_OUT);
-}
-
-static int
-checkentry(const char *tablename,
-		       const struct ipt_ip *ip,
-		       void *matchinfo,
-		       unsigned int matchsize,
-		       unsigned int hook_mask)
-{
-	const struct ipt_physdev_info *info = matchinfo;
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_physdev_info)))
-		return 0;
-	if (!(info->bitmask & IPT_PHYSDEV_OP_MASK) ||
-	    info->bitmask & ~IPT_PHYSDEV_OP_MASK)
-		return 0;
-	return 1;
-}
-
-static struct ipt_match physdev_match = {
-	.name		= "physdev",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&physdev_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&physdev_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_pkttype.c b/net/ipv4/netfilter/ipt_pkttype.c
deleted file mode 100644
index 8ddb1dc5e5a..00000000000
--- a/net/ipv4/netfilter/ipt_pkttype.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-
-#include <linux/netfilter_ipv4/ipt_pkttype.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
-MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
-
-static int match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-    const struct ipt_pkttype_info *info = matchinfo;
-
-    return (skb->pkt_type == info->pkttype) ^ info->invert;
-}
-
-static int checkentry(const char *tablename,
-		   const struct ipt_ip *ip,
-		   void *matchinfo,
-		   unsigned int matchsize,
-		   unsigned int hook_mask)
-{
-/*
-	if (hook_mask
-	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
-		| (1 << NF_IP_FORWARD))) {
-		printk("ipt_pkttype: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
-		return 0;
-	}
-*/
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_pkttype_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match pkttype_match = {
-	.name		= "pkttype",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&pkttype_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&pkttype_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 00000000000..18ca8258a1c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,173 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
+{
+#define MATCH(x,y)	(!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+	return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
+	       MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
+	       MATCH(proto, x->id.proto) &&
+	       MATCH(mode, x->props.mode) &&
+	       MATCH(spi, x->id.spi) &&
+	       MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+	const struct ipt_policy_elem *e;
+	struct sec_path *sp = skb->sp;
+	int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (sp == NULL)
+		return -1;
+	if (strict && info->len != sp->len)
+		return 0;
+
+	for (i = sp->len - 1; i >= 0; i--) {
+		pos = strict ? i - sp->len + 1 : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(sp->x[i].xvec, e)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+	const struct ipt_policy_elem *e;
+	struct dst_entry *dst = skb->dst;
+	int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (dst->xfrm == NULL)
+		return -1;
+
+	for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+		pos = strict ? i : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(dst->xfrm, e)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const void *matchinfo,
+                 int offset,
+                 unsigned int protoff,
+                 int *hotdrop)
+{
+	const struct ipt_policy_info *info = matchinfo;
+	int ret;
+
+	if (info->flags & IPT_POLICY_MATCH_IN)
+		ret = match_policy_in(skb, info);
+	else
+		ret = match_policy_out(skb, info);
+
+	if (ret < 0)
+		ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
+	else if (info->flags & IPT_POLICY_MATCH_NONE)
+		ret = 0;
+
+	return ret;
+}
+
+static int checkentry(const char *tablename, const void *ip_void,
+                      void *matchinfo, unsigned int matchsize,
+                      unsigned int hook_mask)
+{
+	struct ipt_policy_info *info = matchinfo;
+
+	if (matchsize != IPT_ALIGN(sizeof(*info))) {
+		printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
+		       matchsize, IPT_ALIGN(sizeof(*info)));
+		return 0;
+	}
+	if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
+		printk(KERN_ERR "ipt_policy: neither incoming nor "
+		                "outgoing policy selected\n");
+		return 0;
+	}
+	if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
+	    && info->flags & IPT_POLICY_MATCH_OUT) {
+		printk(KERN_ERR "ipt_policy: output policy not valid in "
+		                "PRE_ROUTING and INPUT\n");
+		return 0;
+	}
+	if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
+	    && info->flags & IPT_POLICY_MATCH_IN) {
+		printk(KERN_ERR "ipt_policy: input policy not valid in "
+		                "POST_ROUTING and OUTPUT\n");
+		return 0;
+	}
+	if (info->len > IPT_POLICY_MAX_ELEM) {
+		printk(KERN_ERR "ipt_policy: too many policy elements\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match policy_match = {
+	.name		= "policy",
+	.match		= match,
+	.checkentry 	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_realm.c b/net/ipv4/netfilter/ipt_realm.c
deleted file mode 100644
index 54a6897ebaa..00000000000
--- a/net/ipv4/netfilter/ipt_realm.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* IP tables module for matching the routing realm
- *
- * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
- *
- * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ipt_realm.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables realm match");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_realm_info *info = matchinfo;
-	struct dst_entry *dst = skb->dst;
-    
-	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
-}
-
-static int check(const char *tablename,
-                 const struct ipt_ip *ip,
-                 void *matchinfo,
-                 unsigned int matchsize,
-                 unsigned int hook_mask)
-{
-	if (hook_mask
-	    & ~((1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) |
-	        (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN))) {
-		printk("ipt_realm: only valid for POST_ROUTING, LOCAL_OUT, "
-		       "LOCAL_IN or FORWARD.\n");
-		return 0;
-	}
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_realm_info))) {
-		printk("ipt_realm: invalid matchsize.\n");
-		return 0;
-	}
-	return 1;
-}
-
-static struct ipt_match realm_match = {
-	.name		= "realm",
-	.match		= match, 
-	.checkentry	= check,
-	.me		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&realm_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&realm_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 261cbb4d4c4..44611d6d14f 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -24,10 +24,10 @@
 #define HASH_LOG 9
 
 /* Defaults, these can be overridden on the module command-line. */
-static int ip_list_tot = 100;
-static int ip_pkt_list_tot = 20;
-static int ip_list_hash_size = 0;
-static int ip_list_perms = 0644;
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
 #ifdef DEBUG
 static int debug = 1;
 #endif
@@ -38,13 +38,13 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>.  htt
 MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
 MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
 MODULE_LICENSE("GPL");
-module_param(ip_list_tot, int, 0400);
-module_param(ip_pkt_list_tot, int, 0400);
-module_param(ip_list_hash_size, int, 0400);
-module_param(ip_list_perms, int, 0400);
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
 #ifdef DEBUG
-module_param(debug, int, 0600);
-MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
+module_param(debug, bool, 0600);
+MODULE_PARM_DESC(debug,"enable debugging output");
 #endif
 MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
@@ -104,6 +104,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop);
 
 /* Function to hash a given address into the hash table of table_size size */
@@ -317,7 +318,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
 	skb->nh.iph->daddr = 0;
 	/* Clear ttl since we have no way of knowing it */
 	skb->nh.iph->ttl = 0;
-	match(skb,NULL,NULL,info,0,NULL);
+	match(skb,NULL,NULL,info,0,0,NULL);
 
 	kfree(skb->nh.iph);
 out_free_skb:
@@ -357,6 +358,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	int pkt_count, hits_found, ans;
@@ -654,7 +656,7 @@ match(const struct sk_buff *skb,
  */
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/ipv4/netfilter/ipt_sctp.c
deleted file mode 100644
index fe2b327bcaa..00000000000
--- a/net/ipv4/netfilter/ipt_sctp.c
+++ /dev/null
@@ -1,203 +0,0 @@
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <linux/sctp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_sctp.h>
-
-#ifdef DEBUG_SCTP
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
-					      || (!!((invflag) & (option)) ^ (cond)))
-
-static int
-match_flags(const struct ipt_sctp_flag_info *flag_info,
-	    const int flag_count,
-	    u_int8_t chunktype,
-	    u_int8_t chunkflags)
-{
-	int i;
-
-	for (i = 0; i < flag_count; i++) {
-		if (flag_info[i].chunktype == chunktype) {
-			return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
-		}
-	}
-
-	return 1;
-}
-
-static int
-match_packet(const struct sk_buff *skb,
-	     const u_int32_t *chunkmap,
-	     int chunk_match_type,
-	     const struct ipt_sctp_flag_info *flag_info,
-	     const int flag_count,
-	     int *hotdrop)
-{
-	int offset;
-	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
-	sctp_chunkhdr_t _sch, *sch;
-
-#ifdef DEBUG_SCTP
-	int i = 0;
-#endif
-
-	if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
-		SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
-	}
-
-	offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
-	do {
-		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
-		if (sch == NULL) {
-			duprintf("Dropping invalid SCTP packet.\n");
-			*hotdrop = 1;
-			return 0;
-        	}
-
-		duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", 
-				++i, offset, sch->type, htons(sch->length), sch->flags);
-
-		offset += (htons(sch->length) + 3) & ~3;
-
-		duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
-
-		if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) {
-			switch (chunk_match_type) {
-			case SCTP_CHUNK_MATCH_ANY:
-				if (match_flags(flag_info, flag_count, 
-					sch->type, sch->flags)) {
-					return 1;
-				}
-				break;
-
-			case SCTP_CHUNK_MATCH_ALL:
-				if (match_flags(flag_info, flag_count, 
-					sch->type, sch->flags)) {
-					SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
-				}
-				break;
-
-			case SCTP_CHUNK_MATCH_ONLY:
-				if (!match_flags(flag_info, flag_count, 
-					sch->type, sch->flags)) {
-					return 0;
-				}
-				break;
-			}
-		} else {
-			switch (chunk_match_type) {
-			case SCTP_CHUNK_MATCH_ONLY:
-				return 0;
-			}
-		}
-	} while (offset < skb->len);
-
-	switch (chunk_match_type) {
-	case SCTP_CHUNK_MATCH_ALL:
-		return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
-	case SCTP_CHUNK_MATCH_ANY:
-		return 0;
-	case SCTP_CHUNK_MATCH_ONLY:
-		return 1;
-	}
-
-	/* This will never be reached, but required to stop compiler whine */
-	return 0;
-}
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_sctp_info *info;
-	sctp_sctphdr_t _sh, *sh;
-
-	info = (const struct ipt_sctp_info *)matchinfo;
-
-	if (offset) {
-		duprintf("Dropping non-first fragment.. FIXME\n");
-		return 0;
-	}
-	
-	sh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_sh), &_sh);
-	if (sh == NULL) {
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-       	}
-	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
-
-	return  SCCHECK(((ntohs(sh->source) >= info->spts[0]) 
-			&& (ntohs(sh->source) <= info->spts[1])), 
-		   	IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
-		&& SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) 
-			&& (ntohs(sh->dest) <= info->dpts[1])), 
-			IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
-		&& SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
- 					info->flag_info, info->flag_count, 
-					hotdrop),
-			   IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ipt_ip *ip,
-	   void *matchinfo,
-	   unsigned int matchsize,
-	   unsigned int hook_mask)
-{
-	const struct ipt_sctp_info *info;
-
-	info = (const struct ipt_sctp_info *)matchinfo;
-
-	return ip->proto == IPPROTO_SCTP
-		&& !(ip->invflags & IPT_INV_PROTO)
-		&& matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
-		&& !(info->flags & ~IPT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~info->flags)
-		&& ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) || 
-			(info->chunk_match_type &
-				(SCTP_CHUNK_MATCH_ALL 
-				| SCTP_CHUNK_MATCH_ANY
-				| SCTP_CHUNK_MATCH_ONLY)));
-}
-
-static struct ipt_match sctp_match = 
-{ 
-	.list = { NULL, NULL},
-	.name = "sctp",
-	.match = &match,
-	.checkentry = &checkentry,
-	.destroy = NULL,
-	.me = THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&sctp_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&sctp_match);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Match for SCTP protocol packets");
-
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
deleted file mode 100644
index 4d7f16b70ce..00000000000
--- a/net/ipv4/netfilter/ipt_state.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Kernel module to match connection tracking information. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_state.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("iptables connection tracking state match module");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_state_info *sinfo = matchinfo;
-	enum ip_conntrack_info ctinfo;
-	unsigned int statebit;
-
-	if (nf_ct_is_untracked(skb))
-		statebit = IPT_STATE_UNTRACKED;
-	else if (!nf_ct_get_ctinfo(skb, &ctinfo))
-		statebit = IPT_STATE_INVALID;
-	else
-		statebit = IPT_STATE_BIT(ctinfo);
-
-	return (sinfo->statemask & statebit);
-}
-
-static int check(const char *tablename,
-		 const struct ipt_ip *ip,
-		 void *matchinfo,
-		 unsigned int matchsize,
-		 unsigned int hook_mask)
-{
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ipt_match state_match = {
-	.name		= "state",
-	.match		= &match,
-	.checkentry	= &check,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	need_ip_conntrack();
-	return ipt_register_match(&state_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&state_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c
deleted file mode 100644
index b5def204d79..00000000000
--- a/net/ipv4/netfilter/ipt_string.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/* String matching match for iptables
- * 
- * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_string.h>
-#include <linux/textsearch.h>
-
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
-MODULE_DESCRIPTION("IP tables string match module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 const void *matchinfo,
-		 int offset,
-		 int *hotdrop)
-{
-	struct ts_state state;
-	struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
-
-	memset(&state, 0, sizeof(struct ts_state));
-
-	return (skb_find_text((struct sk_buff *)skb, conf->from_offset, 
-			     conf->to_offset, conf->config, &state) 
-			     != UINT_MAX) && !conf->invert;
-}
-
-#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
-
-static int checkentry(const char *tablename,
-		      const struct ipt_ip *ip,
-		      void *matchinfo,
-		      unsigned int matchsize,
-		      unsigned int hook_mask)
-{
-	struct ipt_string_info *conf = matchinfo;
-	struct ts_config *ts_conf;
-
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
-		return 0;
-
-	/* Damn, can't handle this case properly with iptables... */
-	if (conf->from_offset > conf->to_offset)
-		return 0;
-
-	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
-				     GFP_KERNEL, TS_AUTOLOAD);
-	if (IS_ERR(ts_conf))
-		return 0;
-
-	conf->config = ts_conf;
-
-	return 1;
-}
-
-static void destroy(void *matchinfo, unsigned int matchsize)
-{
-	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
-}
-
-static struct ipt_match string_match = {
-	.name 		= "string",
-	.match 		= match,
-	.checkentry	= checkentry,
-	.destroy 	= destroy,
-	.me 		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&string_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&string_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c
deleted file mode 100644
index 4dc9b16ab4a..00000000000
--- a/net/ipv4/netfilter/ipt_tcpmss.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Kernel module to match TCP MSS values. */
-
-/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ipt_tcpmss.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-#define TH_SYN 0x02
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables TCP MSS match module");
-
-/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
-static inline int
-mssoption_match(u_int16_t min, u_int16_t max,
-		const struct sk_buff *skb,
-		int invert,
-		int *hotdrop)
-{
-	struct tcphdr _tcph, *th;
-	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	u8 _opt[15 * 4 - sizeof(_tcph)], *op;
-	unsigned int i, optlen;
-
-	/* If we don't have the whole header, drop packet. */
-	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		goto dropit;
-
-	/* Malformed. */
-	if (th->doff*4 < sizeof(*th))
-		goto dropit;
-
-	optlen = th->doff*4 - sizeof(*th);
-	if (!optlen)
-		goto out;
-
-	/* Truncated options. */
-	op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th),
-				optlen, _opt);
-	if (op == NULL)
-		goto dropit;
-
-	for (i = 0; i < optlen; ) {
-		if (op[i] == TCPOPT_MSS
-		    && (optlen - i) >= TCPOLEN_MSS
-		    && op[i+1] == TCPOLEN_MSS) {
-			u_int16_t mssval;
-
-			mssval = (op[i+2] << 8) | op[i+3];
-			
-			return (mssval >= min && mssval <= max) ^ invert;
-		}
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
-	}
-out:
-	return invert;
-
- dropit:
-	*hotdrop = 1;
-	return 0;
-}
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      int *hotdrop)
-{
-	const struct ipt_tcpmss_match_info *info = matchinfo;
-
-	return mssoption_match(info->mss_min, info->mss_max, skb,
-			       info->invert, hotdrop);
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ipt_ip *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)))
-		return 0;
-
-	/* Must specify -p tcp */
-	if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
-		printk("tcpmss: Only works on TCP packets\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ipt_match tcpmss_match = {
-	.name		= "tcpmss",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ipt_register_match(&tcpmss_match);
-}
-
-static void __exit fini(void)
-{
-	ipt_unregister_match(&tcpmss_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 086a1bb61e3..9ab765e126f 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -23,6 +23,7 @@ match(const struct sk_buff *skb,
       const struct net_device *out,
       const void *matchinfo,
       int offset,
+      unsigned int protoff,
       int *hotdrop)
 {
 	const struct ipt_tos_info *info = matchinfo;
@@ -32,7 +33,7 @@ match(const struct sk_buff *skb,
 
 static int
 checkentry(const char *tablename,
-           const struct ipt_ip *ip,
+           const void *ip,
            void *matchinfo,
            unsigned int matchsize,
            unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 219aa9de88c..82da53f430a 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
 
 static int match(const struct sk_buff *skb, const struct net_device *in,
 		 const struct net_device *out, const void *matchinfo,
-		 int offset, int *hotdrop)
+		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
 
@@ -47,7 +47,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
 	return 0;
 }
 
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void  *ip,
 		      void *matchinfo, unsigned int matchsize,
 		      unsigned int hook_mask)
 {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 260a4f0a2a9..212a3079085 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -78,7 +78,8 @@ static struct ipt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
-	.me		= THIS_MODULE
+	.me		= THIS_MODULE,
+	.af		= AF_INET,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 160eb11b6e2..3212a5cc4b6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -109,6 +109,7 @@ static struct ipt_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= AF_INET,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 47449ba83eb..fdb9e9c81e8 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -83,7 +83,8 @@ static struct ipt_table packet_raw = {
 	.name = "raw", 
 	.valid_hooks =  RAW_VALID_HOOKS, 
 	.lock = RW_LOCK_UNLOCKED, 
-	.me = THIS_MODULE
+	.me = THIS_MODULE,
+	.af = AF_INET,
 };
 
 /* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afa..167619f638c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
+#include <net/route.h>
 #include <net/ip.h>
 
 #include <linux/netfilter_ipv4.h>
@@ -180,30 +181,6 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_refrag(unsigned int hooknum,
-				struct sk_buff **pskb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
-{
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
-	/* We've seen it coming out the other side: confirm */
-	if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
-		return NF_DROP;
-
-	/* Local packets are never produced too large for their
-	   interface.  We degfragment them at LOCAL_OUT, however,
-	   so we have to refragment them here. */
-	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
-	    !skb_shinfo(*pskb)->tso_size) {
-		/* No hook can be after us, so this should be OK. */
-		ip_fragment(*pskb, okfn);
-		return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
 				      struct sk_buff **pskb,
 				      const struct net_device *in,
@@ -283,7 +260,7 @@ static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
 
 /* Refragmenter; last chance. */
 static struct nf_hook_ops ipv4_conntrack_out_ops = {
-	.hook		= ipv4_refrag,
+	.hook		= ipv4_confirm,
 	.owner		= THIS_MODULE,
 	.pf		= PF_INET,
 	.hooknum	= NF_IP_POST_ROUTING,
@@ -300,7 +277,7 @@ static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
 
 #ifdef CONFIG_SYSCTL
 /* From nf_conntrack_proto_icmp.c */
-extern unsigned long nf_ct_icmp_timeout;
+extern unsigned int nf_ct_icmp_timeout;
 static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
 
 static ctl_table nf_ct_sysctl_table[] = {
@@ -392,6 +369,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	return -ENOENT;
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
+		&tuple->src.u3.ip);
+	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
+		&tuple->dst.u3.ip);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
+	[CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+};
+
+static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	t->src.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+	t->dst.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+
+	return 0;
+}
+#endif
+
 static struct nf_sockopt_ops so_getorigdst = {
 	.pf		= PF_INET,
 	.get_optmin	= SO_ORIGINAL_DST,
@@ -408,6 +427,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.print_conntrack = ipv4_print_conntrack,
 	.prepare	 = ipv4_prepare,
 	.get_features	 = ipv4_get_features,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr = ipv4_tuple_to_nfattr,
+	.nfattr_to_tuple = ipv4_nfattr_to_tuple,
+#endif
 	.me		 = THIS_MODULE,
 };
 
@@ -551,7 +575,7 @@ MODULE_LICENSE("GPL");
 
 static int __init init(void)
 {
-	need_nf_conntrack();
+	need_conntrack();
 	return init_or_cleanup(1);
 }
 
@@ -563,9 +587,4 @@ static void __exit fini(void)
 module_init(init);
 module_exit(fini);
 
-void need_ip_conntrack(void)
-{
-}
-
-EXPORT_SYMBOL(need_ip_conntrack);
 EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b..52dc175be39 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 			     const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
 static int icmp_new(struct nf_conn *conntrack,
 		    const struct sk_buff *skb, unsigned int dataoff)
 {
-	static u_int8_t valid_new[]
-		= { [ICMP_ECHO] = 1,
-		    [ICMP_TIMESTAMP] = 1,
-		    [ICMP_INFO_REQUEST] = 1,
-		    [ICMP_ADDRESS] = 1 };
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
 
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+	innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
 	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
 	return icmp_error_message(skb, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMP_TYPE-1]
+	    || !tb[CTA_PROTO_ICMP_CODE-1]
+	    || !tb[CTA_PROTO_ICMP_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = 
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 {
 	.list			= { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 	.new			= icmp_new,
 	.error			= icmp_error,
 	.destroy		= NULL,
-	.me			= NULL
+	.me			= NULL,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmp_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmp_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db4..39d49dc333a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/sock.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d626..f29a12da510 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -40,12 +40,12 @@
  */
  
 #include <linux/config.h> 
+#include <linux/types.h>
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
-#include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
+	nf_reset(skb);
 
 	skb_push(skb, skb->data - skb->nh.raw);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f701a136a6a..d82c242ea70 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,9 +240,9 @@ static unsigned			rt_hash_mask;
 static int			rt_hash_log;
 static unsigned int		rt_hash_rnd;
 
-static struct rt_cache_stat *rt_cache_stat;
-#define RT_CACHE_STAT_INC(field)					  \
-		(per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
+static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
+#define RT_CACHE_STAT_INC(field) \
+	(per_cpu(rt_cache_stat, raw_smp_processor_id()).field++)
 
 static int rt_intern_hash(unsigned hash, struct rtable *rth,
 				struct rtable **res);
@@ -401,7 +401,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return per_cpu_ptr(rt_cache_stat, cpu);
+		return &per_cpu(rt_cache_stat, cpu);
 	}
 	return NULL;
 }
@@ -414,7 +414,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return per_cpu_ptr(rt_cache_stat, cpu);
+		return &per_cpu(rt_cache_stat, cpu);
 	}
 	return NULL;
 	
@@ -3160,10 +3160,6 @@ int __init ip_rt_init(void)
 	ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
 	ip_rt_max_size = (rt_hash_mask + 1) * 16;
 
-	rt_cache_stat = alloc_percpu(struct rt_cache_stat);
-	if (!rt_cache_stat)
-		return -ENOMEM;
-
 	devinet_init();
 	ip_fib_init();
 
@@ -3191,7 +3187,6 @@ int __init ip_rt_init(void)
 	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
 	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
 			    		     proc_net_stat))) {
-		free_percpu(rt_cache_stat);
 		return -ENOMEM;
 	}
 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a..e20be3331f6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 					   struct request_sock *req,
 					   struct dst_entry *dst)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
 
-	child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
 	if (child)
 		inet_csk_reqsk_queue_add(sk, req, child);
 	else
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48..16984d4a8a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
 #include <linux/sysctl.h>
 #include <linux/config.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -22,6 +23,7 @@
 extern int sysctl_ip_nonlocal_bind;
 
 #ifdef CONFIG_SYSCTL
+static int zero;
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = {
 		.strategy	= &sysctl_jiffies
 	},
 	{
+		.ctl_name	= NET_IPV4_IPFRAG_MAX_DIST,
+		.procname	= "ipfrag_max_dist",
+		.data		= &sysctl_ipfrag_max_dist,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero
+	},
+	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
 		.data		= &sysctl_tcp_nometrics_save,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56..00aa80e9324 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int err = 0;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->setsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
+						     optval, optlen);
 
 	/* This is a string value all the others are int's */
 	if (optname == TCP_CONGESTION) {
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
 
-	info->tcpi_pmtu = tp->pmtu_cookie;
+	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
 	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
 	info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
 	info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int val, len;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->getsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->getsockopt(sk, level, optname,
+						     optval, optlen);
 
 	if (get_user(len, optlen))
 		return -EFAULT;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 1d0cd86621b..035f2092d73 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -30,8 +30,6 @@ static int fast_convergence = 1;
 static int max_increment = 16;
 static int low_window = 14;
 static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
-static int low_utilization_threshold = 153;
-static int low_utilization_period = 2;
 static int initial_ssthresh = 100;
 static int smooth_part = 20;
 
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644);
 MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)");
 module_param(beta, int, 0644);
 MODULE_PARM_DESC(beta, "beta for multiplicative increase");
-module_param(low_utilization_threshold, int, 0644);
-MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode");
-module_param(low_utilization_period, int, 0644);
-MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)");
 module_param(initial_ssthresh, int, 0644);
 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
 module_param(smooth_part, int, 0644);
@@ -60,11 +54,6 @@ struct bictcp {
 	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
-	u32	delay_min;	/* min delay */
-	u32	delay_max;	/* max delay */
-	u32	last_delay;
-	u8	low_utilization;/* 0: high; 1: low */
-	u32	low_utilization_start;	/* starting time of low utilization detection*/
 	u32	epoch_start;	/* beginning of an epoch */
 #define ACK_RATIO_SHIFT	4
 	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca)
 	ca->loss_cwnd = 0;
 	ca->last_cwnd = 0;
 	ca->last_time = 0;
-	ca->delay_min = 0;
-	ca->delay_max = 0;
-	ca->last_delay = 0;
-	ca->low_utilization = 0;
-	ca->low_utilization_start = 0;
 	ca->epoch_start = 0;
 	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
 }
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	}
 
 	/* if in slow start or link utilization is very low */
-	if ( ca->loss_cwnd == 0 ||
-	     (cwnd > ca->loss_cwnd && ca->low_utilization)) {
+	if (ca->loss_cwnd == 0) {
 		if (ca->cnt > 20) /* increase cwnd 5% per RTT */
 			ca->cnt = 20;
 	}
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-
-/* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct sock *sk, int flag)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct bictcp *ca = inet_csk_ca(sk);
-	u32 dist, delay;
-
-	/* No time stamp */
-	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
-	     /* Discard delay samples right after fast recovery */
-	     tcp_time_stamp < ca->epoch_start + HZ ||
-	     /* this delay samples may not be accurate */
-	     flag == 0) {
-		ca->last_delay = 0;
-		goto notlow;
-	}
-
-	delay = ca->last_delay<<3;	/* use the same scale as tp->srtt*/
-	ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-	if (delay == 0) 		/* no previous delay sample */
-		goto notlow;
-
-	/* first time call or link delay decreases */
-	if (ca->delay_min == 0 || ca->delay_min > delay) {
-		ca->delay_min = ca->delay_max = delay;
-		goto notlow;
-	}
-
-	if (ca->delay_max < delay)
-		ca->delay_max = delay;
-
-	/* utilization is low, if avg delay < dist*threshold
-	   for checking_period time */
-	dist = ca->delay_max - ca->delay_min;
-	if (dist <= ca->delay_min>>6 ||
-	    tp->srtt - ca->delay_min >=  (dist*low_utilization_threshold)>>10)
-		goto notlow;
-
-	if (ca->low_utilization_start == 0) {
-		ca->low_utilization = 0;
-		ca->low_utilization_start = tcp_time_stamp;
-	} else if ((s32)(tcp_time_stamp - ca->low_utilization_start)
-			> low_utilization_period*HZ) {
-		ca->low_utilization = 1;
-	}
-
-	return;
-
- notlow:
-	ca->low_utilization = 0;
-	ca->low_utilization_start = 0;
-
-}
-
 static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 			      u32 seq_rtt, u32 in_flight, int data_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	bictcp_low_utilization(sk, data_acked);
-
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 
 	ca->epoch_start = 0;	/* end of epoch */
 
-	/* in case of wrong delay_max*/
-	if (ca->delay_min > 0 && ca->delay_max > ca->delay_min)
-		ca->delay_max = ca->delay_min
-			+ ((ca->delay_max - ca->delay_min)* 90) / 100;
-
 	/* Wmax and fast convergence */
 	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
 		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 		bictcp_reset(inet_csk_ca(sk));
 }
 
-/* Track delayed acknowledgement ratio using sliding window
+/* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
 static void bictcp_acked(struct sock *sk, u32 cnt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
-	if (cnt > 0 && 	icsk->icsk_ca_state == TCP_CA_Open) {
+	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
 		struct bictcp *ca = inet_csk_ca(sk);
 		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
 		ca->delayed_ack += cnt;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc1..e688c687d62 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	return err;
 }
 
+
+/*
+ * Linear increase during slow start
+ */
+void tcp_slow_start(struct tcp_sock *tp)
+{
+	if (sysctl_tcp_abc) {
+		/* RFC3465: Slow Start
+		 * TCP sender SHOULD increase cwnd by the number of
+		 * previously unacknowledged bytes ACKed by each incoming
+		 * acknowledgment, provided the increase is not more than L
+		 */
+		if (tp->bytes_acked < tp->mss_cache)
+			return;
+
+		/* We MAY increase by 2 if discovered delayed ack */
+		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+		}
+	}
+	tp->bytes_acked = 0;
+
+	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+		tp->snd_cwnd++;
+}
+EXPORT_SYMBOL_GPL(tcp_slow_start);
+
 /*
  * TCP Reno congestion control
  * This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
new file mode 100644
index 00000000000..31a4986dfbf
--- /dev/null
+++ b/net/ipv4/tcp_cubic.c
@@ -0,0 +1,411 @@
+/*
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ *
+ * This is from the implementation of CUBIC TCP in
+ * Injong Rhee, Lisong Xu.
+ *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant
+ *  in PFLDnet 2005
+ * Available from:
+ *  http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+ *
+ * Unless CUBIC is enabled and congestion window is large
+ * this behaves the same as the original Reno.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <asm/div64.h>
+
+#define BICTCP_BETA_SCALE    1024	/* Scale factor beta calculation
+					 * max_cwnd = snd_cwnd * beta
+					 */
+#define BICTCP_B		4	 /*
+					  * In binary search,
+					  * go to point (max+min)/N
+					  */
+#define	BICTCP_HZ		10	/* BIC HZ 2^10 = 1024 */
+
+static int fast_convergence = 1;
+static int max_increment = 16;
+static int beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh = 100;
+static int bic_scale = 41;
+static int tcp_friendliness = 1;
+
+static u32 cube_rtt_scale;
+static u32 beta_scale;
+static u64 cube_factor;
+
+/* Note parameters that are used for precomputing scale factors are read-only */
+module_param(fast_convergence, int, 0644);
+MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
+module_param(max_increment, int, 0644);
+MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
+module_param(beta, int, 0444);
+MODULE_PARM_DESC(beta, "beta for multiplicative increase");
+module_param(initial_ssthresh, int, 0644);
+MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
+module_param(bic_scale, int, 0444);
+MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
+module_param(tcp_friendliness, int, 0644);
+MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
+
+#include <asm/div64.h>
+
+/* BIC TCP Parameters */
+struct bictcp {
+	u32	cnt;		/* increase cwnd by 1 after ACKs */
+	u32 	last_max_cwnd;	/* last maximum snd_cwnd */
+	u32	loss_cwnd;	/* congestion window at last loss */
+	u32	last_cwnd;	/* the last snd_cwnd */
+	u32	last_time;	/* time when updated last_cwnd */
+	u32	bic_origin_point;/* origin point of bic function */
+	u32	bic_K;		/* time to origin point from the beginning of the current epoch */
+	u32	delay_min;	/* min delay */
+	u32	epoch_start;	/* beginning of an epoch */
+	u32	ack_cnt;	/* number of acks */
+	u32	tcp_cwnd;	/* estimated tcp cwnd */
+#define ACK_RATIO_SHIFT	4
+	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
+};
+
+static inline void bictcp_reset(struct bictcp *ca)
+{
+	ca->cnt = 0;
+	ca->last_max_cwnd = 0;
+	ca->loss_cwnd = 0;
+	ca->last_cwnd = 0;
+	ca->last_time = 0;
+	ca->bic_origin_point = 0;
+	ca->bic_K = 0;
+	ca->delay_min = 0;
+	ca->epoch_start = 0;
+	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
+	ca->ack_cnt = 0;
+	ca->tcp_cwnd = 0;
+}
+
+static void bictcp_init(struct sock *sk)
+{
+	bictcp_reset(inet_csk_ca(sk));
+	if (initial_ssthresh)
+		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+}
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
+{
+	u_int32_t d = divisor;
+
+	if (divisor > 0xffffffffULL) {
+		unsigned int shift = fls(divisor >> 32);
+
+		d = divisor >> shift;
+		dividend >>= shift;
+	}
+
+	/* avoid 64 bit division if possible */
+	if (dividend >> 32)
+		do_div(dividend, d);
+	else
+		dividend = (uint32_t) dividend / d;
+
+	return dividend;
+}
+
+/*
+ * calculate the cubic root of x using Newton-Raphson
+ */
+static u32 cubic_root(u64 a)
+{
+	u32 x, x1;
+
+	/* Initial estimate is based on:
+	 * cbrt(x) = exp(log(x) / 3)
+	 */
+	x = 1u << (fls64(a)/3);
+
+	/*
+	 * Iteration based on:
+	 *                         2
+	 * x    = ( 2 * x  +  a / x  ) / 3
+	 *  k+1          k         k
+	 */
+	do {
+		x1 = x;
+		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
+	} while (abs(x1 - x) > 1);
+
+	return x;
+}
+
+/*
+ * Compute congestion window to use.
+ */
+static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+{
+	u64 offs;
+	u32 delta, t, bic_target, min_cnt, max_cnt;
+
+	ca->ack_cnt++;	/* count the number of ACKs */
+
+	if (ca->last_cwnd == cwnd &&
+	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+		return;
+
+	ca->last_cwnd = cwnd;
+	ca->last_time = tcp_time_stamp;
+
+	if (ca->epoch_start == 0) {
+		ca->epoch_start = tcp_time_stamp;	/* record the beginning of an epoch */
+		ca->ack_cnt = 1;			/* start counting */
+		ca->tcp_cwnd = cwnd;			/* syn with cubic */
+
+		if (ca->last_max_cwnd <= cwnd) {
+			ca->bic_K = 0;
+			ca->bic_origin_point = cwnd;
+		} else {
+			/* Compute new K based on
+			 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+			 */
+			ca->bic_K = cubic_root(cube_factor
+					       * (ca->last_max_cwnd - cwnd));
+			ca->bic_origin_point = ca->last_max_cwnd;
+		}
+	}
+
+        /* cubic function - calc*/
+        /* calculate c * time^3 / rtt,
+         *  while considering overflow in calculation of time^3
+	 * (so time^3 is done by using 64 bit)
+	 * and without the support of division of 64bit numbers
+	 * (so all divisions are done by using 32 bit)
+         *  also NOTE the unit of those veriables
+         *	  time  = (t - K) / 2^bictcp_HZ
+         *	  c = bic_scale >> 10
+	 * rtt  = (srtt >> 3) / HZ
+	 * !!! The following code does not have overflow problems,
+	 * if the cwnd < 1 million packets !!!
+         */
+
+	/* change the unit from HZ to bictcp_HZ */
+        t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
+	     << BICTCP_HZ) / HZ;
+
+        if (t < ca->bic_K)		/* t - K */
+		offs = ca->bic_K - t;
+        else
+                offs = t - ca->bic_K;
+
+	/* c/rtt * (t-K)^3 */
+	delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
+        if (t < ca->bic_K)                                	/* below origin*/
+                bic_target = ca->bic_origin_point - delta;
+        else                                                	/* above origin*/
+                bic_target = ca->bic_origin_point + delta;
+
+        /* cubic function - calc bictcp_cnt*/
+        if (bic_target > cwnd) {
+		ca->cnt = cwnd / (bic_target - cwnd);
+        } else {
+                ca->cnt = 100 * cwnd;              /* very small increment*/
+        }
+
+	if (ca->delay_min > 0) {
+		/* max increment = Smax * rtt / 0.1  */
+		min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
+		if (ca->cnt < min_cnt)
+			ca->cnt = min_cnt;
+	}
+
+        /* slow start and low utilization  */
+	if (ca->loss_cwnd == 0)		/* could be aggressive in slow start */
+		ca->cnt = 50;
+
+	/* TCP Friendly */
+	if (tcp_friendliness) {
+		u32 scale = beta_scale;
+		delta = (cwnd * scale) >> 3;
+	        while (ca->ack_cnt > delta) {		/* update tcp cwnd */
+	                ca->ack_cnt -= delta;
+        	        ca->tcp_cwnd++;
+		}
+
+		if (ca->tcp_cwnd > cwnd){	/* if bic is slower than tcp */
+			delta = ca->tcp_cwnd - cwnd;
+			max_cnt = cwnd / delta;
+			if (ca->cnt > max_cnt)
+				ca->cnt = max_cnt;
+		}
+        }
+
+	ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
+	if (ca->cnt == 0)			/* cannot be zero */
+		ca->cnt = 1;
+}
+
+
+/* Keep track of minimum rtt */
+static inline void measure_delay(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+	u32 delay;
+
+	/* No time stamp */
+	if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
+	     /* Discard delay samples right after fast recovery */
+	    (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+		return;
+
+	delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+	if (delay == 0)
+		delay = 1;
+
+	/* first time call or link delay decreases */
+	if (ca->delay_min == 0 || ca->delay_min > delay)
+		ca->delay_min = delay;
+}
+
+static void bictcp_cong_avoid(struct sock *sk, u32 ack,
+			      u32 seq_rtt, u32 in_flight, int data_acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	if (data_acked)
+		measure_delay(sk);
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
+		bictcp_update(ca, tp->snd_cwnd);
+
+		/* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= ca->cnt) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		} else
+			tp->snd_cwnd_cnt++;
+	}
+
+}
+
+static u32 bictcp_recalc_ssthresh(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	ca->epoch_start = 0;	/* end of epoch */
+
+	/* Wmax and fast convergence */
+	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+			/ (2 * BICTCP_BETA_SCALE);
+	else
+		ca->last_max_cwnd = tp->snd_cwnd;
+
+	ca->loss_cwnd = tp->snd_cwnd;
+
+	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+}
+
+static u32 bictcp_undo_cwnd(struct sock *sk)
+{
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
+}
+
+static u32 bictcp_min_cwnd(struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+static void bictcp_state(struct sock *sk, u8 new_state)
+{
+	if (new_state == TCP_CA_Loss)
+		bictcp_reset(inet_csk_ca(sk));
+}
+
+/* Track delayed acknowledgment ratio using sliding window
+ * ratio = (15*ratio + sample) / 16
+ */
+static void bictcp_acked(struct sock *sk, u32 cnt)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
+		struct bictcp *ca = inet_csk_ca(sk);
+		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+		ca->delayed_ack += cnt;
+	}
+}
+
+
+static struct tcp_congestion_ops cubictcp = {
+	.init		= bictcp_init,
+	.ssthresh	= bictcp_recalc_ssthresh,
+	.cong_avoid	= bictcp_cong_avoid,
+	.set_state	= bictcp_state,
+	.undo_cwnd	= bictcp_undo_cwnd,
+	.min_cwnd	= bictcp_min_cwnd,
+	.pkts_acked     = bictcp_acked,
+	.owner		= THIS_MODULE,
+	.name		= "cubic",
+};
+
+static int __init cubictcp_register(void)
+{
+	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+
+	/* Precompute a bunch of the scaling factors that are used per-packet
+	 * based on SRTT of 100ms
+	 */
+
+	beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+
+	cube_rtt_scale = (bic_scale << 3) / 10;	/* 1024*c/rtt */
+
+	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+	 *  so K = cubic_root( (wmax-cwnd)*rtt/c )
+	 * the unit of K is bictcp_HZ=2^10, not HZ
+	 *
+	 *  c = bic_scale >> 10
+	 *  rtt = 100ms
+	 *
+	 * the following code has been designed and tested for
+	 * cwnd < 1 million packets
+	 * RTT < 100 seconds
+	 * HZ < 1,000,00  (corresponding to 10 nano-second)
+	 */
+
+	/* 1/c * 2^2*bictcp_HZ * srtt */
+	cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
+
+	/* divide by bic_scale and by constant Srtt (100ms) */
+	do_div(cube_factor, bic_scale * 10);
+
+	return tcp_register_congestion_control(&cubictcp);
+}
+
+static void __exit cubictcp_unregister(void)
+{
+	tcp_unregister_congestion_control(&cubictcp);
+}
+
+module_init(cubictcp_register);
+module_exit(cubictcp_unregister);
+
+MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CUBIC TCP");
+MODULE_VERSION("2.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bc..a97ed5416c2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
 /* Adapt the MSS value used to make delayed ack decision to the 
  * real world.
  */ 
-static inline void tcp_measure_rcv_mss(struct sock *sk,
-				       const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk,
+				const struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const unsigned int lss = icsk->icsk_ack.last_seg_size; 
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 	return 0;
 }
 
-static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
-				   struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+			    struct sk_buff *skb)
 {
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
 }
 
+
+/* Initialize RCV_MSS value.
+ * RCV_MSS is an our guess about MSS used by the peer.
+ * We haven't any direct information about the MSS.
+ * It's better to underestimate the RCV_MSS rather than overestimate.
+ * Overestimations make us ACKing less frequently than needed.
+ * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
+ */
+void tcp_initialize_rcv_mss(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
+
+	hint = min(hint, tp->rcv_wnd/2);
+	hint = min(hint, TCP_MIN_RCVMSS);
+	hint = max(hint, TCP_MIN_MSS);
+
+	inet_csk(sk)->icsk_ack.rcv_mss = hint;
+}
+
 /* Receiver "autotuning" code.
  *
  * The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
+/* Set slow start threshold and cwnd not falling to slow start */
+void tcp_enter_cwr(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->prior_ssthresh = 0;
+	tp->bytes_acked = 0;
+	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+		tp->undo_marker = 0;
+		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+		tp->snd_cwnd = min(tp->snd_cwnd,
+				   tcp_packets_in_flight(tp) + 1U);
+		tp->snd_cwnd_cnt = 0;
+		tp->high_seq = tp->snd_nxt;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+		TCP_ECN_queue_cwr(tp);
+
+		tcp_set_ca_state(sk, TCP_CA_CWR);
+	}
+}
+
 /* Initialize metrics on socket. */
 
 static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
-static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
-				  u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+			   u32 in_flight, int good)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * RFC2988 recommends to restart timer to now+rto.
  */
 
-static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
 {
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
-static inline u32 tcp_usrtt(const struct sk_buff *skb)
+static u32 tcp_usrtt(const struct sk_buff *skb)
 {
 	struct timeval tv, now;
 
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
 			}
 		}
 	}
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
-static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-					 struct tcp_sock *tp)
+static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
+				  struct tcp_sock *tp)
 {
 	if (th->doff == sizeof(struct tcphdr)>>2) {
 		tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	}
 }
 
-static __inline__ int
-tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
 {
 	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
 		if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
 	return 0;
 }
 
-static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
 		if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 	}
 }
 
-static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (!tp->rx_opt.dsack)
 		tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 	}
 }
 
-static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
 {
 	__u32 tmp;
 
@@ -3307,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			int offset = start - TCP_SKB_CB(skb)->seq;
 			int size = TCP_SKB_CB(skb)->end_seq - start;
 
-			if (offset < 0) BUG();
+			BUG_ON(offset < 0);
 			if (size > 0) {
 				size = min(copy, size);
 				if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
 {
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
 	sk->sk_write_space(sk);
 }
 
-static inline void tcp_check_space(struct sock *sk)
+static void tcp_check_space(struct sock *sk)
 {
 	if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
 		sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
 	}
 }
 
-static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
 {
 	tcp_push_pending_frames(sk, tp);
 	tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	}
 }
 
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
+static inline void tcp_ack_snd_check(struct sock *sk)
 {
 	if (!inet_csk_ack_scheduled(sk)) {
 		/* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 	return result;
 }
 
-static __inline__ int
-tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
 	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
 		__tcp_checksum_complete_user(sk, skb);
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
 	tcp_parse_options(skb, &tp->rx_opt, 0);
 
 	if (th->ack) {
-		struct inet_connection_sock *icsk;
 		/* rfc793:
 		 * "If the state is SYN-SENT then
 		 *    first check the ACK bit
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
 			tp->rx_opt.sack_ok |= 2;
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 		/* Remember, tcp_poll() does not lock socket!
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
 		/* Make sure socket is routed, for correct metrics.  */
-		tp->af_specific->rebuild_header(sk);
+		icsk->icsk_af_ops->rebuild_header(sk);
 
 		tcp_init_metrics(sk);
 
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			sk_wake_async(sk, 0, POLL_OUT);
 		}
 
-		icsk = inet_csk(sk);
-
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
 		    icsk->icsk_ack.pingpong) {
@@ -4173,7 +4210,7 @@ discard:
 		if (tp->ecn_flags&TCP_ECN_OK)
 			sock_set_flag(sk, SOCK_NO_LARGESEND);
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			  struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int queued = 0;
 
 	tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			goto discard;
 
 		if(th->syn) {
-			if(tp->af_specific->conn_request(sk, skb) < 0)
+			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
 			/* Now we have several options: In theory there is 
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				/* Make sure socket is routed, for
 				 * correct metrics.
 				 */
-				tp->af_specific->rebuild_header(sk);
+				icsk->icsk_af_ops->rebuild_header(sk);
 
 				tcp_init_metrics(sk);
 
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
+EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d5021e1929..6ea353907af 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
+#include <net/timewait_sock.h>
 #include <net/xfrm.h>
 
 #include <linux/inet.h>
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency;
 /* Socket used for sending RSTs */
 static struct socket *tcp_socket;
 
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb);
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 	.lhash_lock	= RW_LOCK_UNLOCKED,
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 
 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 {
-	return inet_csk_get_port(&tcp_hashinfo, sk, snum);
+	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+				 inet_csk_bind_conflict);
 }
 
 static void tcp_v4_hash(struct sock *sk)
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 					  skb->h.th->source);
 }
 
-/* called with local bh disabled */
-static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
-				      struct inet_timewait_sock **twp)
+int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 {
-	struct inet_sock *inet = inet_sk(sk);
-	u32 daddr = inet->rcv_saddr;
-	u32 saddr = inet->daddr;
-	int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			/* With PAWS, it is safe from the viewpoint
-			   of data integrity. Even without PAWS it
-			   is safe provided sequence spaces do not
-			   overlap i.e. at data rates <= 80Mbit/sec.
-
-			   Actually, the idea is close to VJ's one,
-			   only timestamp cache is held not per host,
-			   but per port pair and TW bucket is used
-			   as state holder.
+	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+	struct tcp_sock *tp = tcp_sk(sk);
 
-			   If TW bucket has been already destroyed we
-			   fall back to VJ's scheme and use initial
-			   timestamp retrieved from peer table.
-			 */
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp || (sysctl_tcp_tw_reuse &&
-				      xtime.tv_sec -
-				      tcptw->tw_ts_recent_stamp > 1))) {
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (tp->write_seq == 0)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
-				goto unique;
-			} else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
+	/* With PAWS, it is safe from the viewpoint
+	   of data integrity. Even without PAWS it is safe provided sequence
+	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
+	   Actually, the idea is close to VJ's one, only timestamp cache is
+	   held not per host, but per port pair and TW bucket is used as state
+	   holder.
 
-unique:
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
+	   If TW bucket has been already destroyed we fall back to VJ's scheme
+	   and use initial timestamp retrieved from peer table.
+	 */
+	if (tcptw->tw_ts_recent_stamp &&
+	    (twp == NULL || (sysctl_tcp_tw_reuse &&
+			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+		if (tp->write_seq == 0)
+			tp->write_seq = 1;
+		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
+		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+		sock_hold(sktw);
+		return 1;
 	}
 
 	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
 }
 
-static inline u32 connect_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-
-	return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, 
-					 inet->dport);
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static inline int tcp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + connect_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v4_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__inet_hash(&tcp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);;
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb  = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet_hash(&tcp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
+EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	tp->ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt)
-		tp->ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 
 	tp->rx_opt.mss_clamp = 536;
 
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	 * complete initialization after this.
 	 */
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v4_hash_connect(sk);
+	err = inet_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto failure;
 
@@ -433,12 +270,10 @@ failure:
 /*
  * This routine does path mtu discovery as defined in RFC1191.
  */
-static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
-				     u32 mtu)
+static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 {
 	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 	 * send out by Linux are always <576bytes so they should go through
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    tp->pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		tcp_sync_mss(sk, mtu);
 
 		/* Resend the TCP packet because it's
@@ -644,10 +479,10 @@ out:
 }
 
 /* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-static inline void syn_flood_warning(struct sk_buff *skb)
+#ifdef CONFIG_SYN_COOKIES
+static void syn_flood_warning(struct sk_buff *skb)
 {
 	static unsigned long warntime;
 
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb)
 		       ntohs(skb->h.th->dest));
 	}
 }
+#endif
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
  */
-static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
-						     struct sk_buff *skb)
+static struct ip_options *tcp_v4_save_options(struct sock *sk,
+					      struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	struct ip_options *dopt = NULL;
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = {
 	.send_reset	=	tcp_v4_send_reset,
 };
 
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
+
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = skb->nh.iph->ttl;
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
-		newtp->ext_header_len = newinet->opt->optlen;
+		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
 	newinet->id = newtp->write_seq ^ jiffies;
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1238,6 +1080,7 @@ process:
 
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
+	nf_reset(skb);
 
 	if (sk_filter(sk, skb, 0))
 		goto discard_and_relse;
@@ -1314,16 +1157,6 @@ do_time_wait:
 	goto discard_it;
 }
 
-static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
-	struct inet_sock *inet = inet_sk(sk);
-
-	sin->sin_family		= AF_INET;
-	sin->sin_addr.s_addr	= inet->daddr;
-	sin->sin_port		= inet->dport;
-}
-
 /* VJ's idea. Save last timestamp seen from this destination
  * and hold it at least for normal timewait interval to use for duplicate
  * segment detection in subsequent connections, before they enter synchronized
@@ -1382,7 +1215,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 	return 0;
 }
 
-struct tcp_func ipv4_specific = {
+struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1392,7 +1225,7 @@ struct tcp_func ipv4_specific = {
 	.net_header_len	=	sizeof(struct iphdr),
 	.setsockopt	=	ip_setsockopt,
 	.getsockopt	=	ip_getsockopt,
-	.addr2sockaddr	=	v4_addr2sockaddr,
+	.addr2sockaddr	=	inet_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in),
 };
 
@@ -1433,7 +1266,8 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
-	tp->af_specific = &ipv4_specific;
+	icsk->icsk_af_ops = &ipv4_specific;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1989,7 +1823,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.twsk_obj_size		= sizeof(struct tcp_timewait_sock),
+	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac432..2b9b7f6c7f7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
 void tcp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	int recycle_ok = 0;
 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-		recycle_ok = tp->af_specific->remember_stamp(sk);
+		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
 
 	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
 		struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
 
 		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+			struct inet6_timewait_sock *tw6;
 
-			ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+			tw6 = inet6_twsk((struct sock *)tw);
+			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock **prev)
 {
 	struct tcphdr *th = skb->h.th;
-	struct tcp_sock *tp = tcp_sk(sk);
 	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		 * ESTABLISHED STATE. If it will be dropped after
 		 * socket is created, wait for troubles.
 		 */
-		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
+								 req, NULL);
 		if (child == NULL)
 			goto listen_overflow;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7325e0b406..a7623ead39a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
  */
 int sysctl_tcp_tso_win_divisor = 3;
 
-static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
-				    struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct tcp_sock *tp,
+			     struct sk_buff *skb)
 {
 	sk->sk_send_head = skb->next;
 	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
 	tp->snd_cwnd_used = 0;
 }
 
-static inline void tcp_event_data_sent(struct tcp_sock *tp,
-				       struct sk_buff *skb, struct sock *sk)
+static void tcp_event_data_sent(struct tcp_sock *tp,
+				struct sk_buff *skb, struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
 		icsk->icsk_ack.pingpong = 1;
 }
 
-static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 {
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
  * value can be stuffed directly into th->window for an outgoing
  * frame.
  */
-static __inline__ u16 tcp_select_window(struct sock *sk)
+static u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
 	return new_win;
 }
 
+static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
+					 __u32 tstamp)
+{
+	if (tp->rx_opt.tstamp_ok) {
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					  (TCPOPT_NOP << 16) |
+					  (TCPOPT_TIMESTAMP << 8) |
+					  TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);
+		*ptr++ = htonl(tp->rx_opt.ts_recent);
+	}
+	if (tp->rx_opt.eff_sacks) {
+		struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+		int this_sack;
+
+		*ptr++ = htonl((TCPOPT_NOP  << 24) |
+			       (TCPOPT_NOP  << 16) |
+			       (TCPOPT_SACK <<  8) |
+			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+						     TCPOLEN_SACK_PERBLOCK)));
+		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+			*ptr++ = htonl(sp[this_sack].start_seq);
+			*ptr++ = htonl(sp[this_sack].end_seq);
+		}
+		if (tp->rx_opt.dsack) {
+			tp->rx_opt.dsack = 0;
+			tp->rx_opt.eff_sacks--;
+		}
+	}
+}
+
+/* Construct a tcp options header for a SYN or SYN_ACK packet.
+ * If this is every changed make sure to change the definition of
+ * MAX_SYN_SIZE to match the new maximum number of options that you
+ * can generate.
+ */
+static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+				  int offer_wscale, int wscale, __u32 tstamp,
+				  __u32 ts_recent)
+{
+	/* We always get an MSS option.
+	 * The option bytes which will be seen in normal data
+	 * packets should timestamps be used, must be in the MSS
+	 * advertised.  But we subtract them from tp->mss_cache so
+	 * that calculations in tcp_sendmsg are simpler etc.
+	 * So account for this fact here if necessary.  If we
+	 * don't do this correctly, as a receiver we won't
+	 * recognize data packets as being full sized when we
+	 * should, and thus we won't abide by the delayed ACK
+	 * rules correctly.
+	 * SACKs don't matter, we never delay an ACK when we
+	 * have any of those going out.
+	 */
+	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+	if (ts) {
+		if(sack)
+			*ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		else
+			*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tstamp);		/* TSVAL */
+		*ptr++ = htonl(ts_recent);	/* TSECR */
+	} else if(sack)
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
+	if (offer_wscale)
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
+}
 
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
@@ -371,7 +440,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		TCP_ECN_send(sk, tp, skb, tcp_header_size);
 	}
 
-	tp->af_specific->send_check(sk, th, skb->len, skb);
+	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
 	if (likely(tcb->flags & TCPCB_FLAG_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -381,7 +450,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-	err = tp->af_specific->queue_xmit(skb, 0);
+	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (unlikely(err <= 0))
 		return err;
 
@@ -621,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
-   tp->pmtu_cookie is last pmtu, seen by this function.
+   inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
 
    tp->mss_cache is current effective sending mss, including
    all tcp options except for SACKs. It is evaluated,
@@ -631,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    NOTE1. rfc1122 clearly states that advertised MSS
    DOES NOT include either tcp or ip options.
 
-   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
-   this function.			--ANK (980731)
+   NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
+   are READ ONLY outside this function.		--ANK (980731)
  */
 
 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mss_now;
-
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	/* Calculate base mss without TCP options:
 	   It is MMS_S - sizeof(tcphdr) of rfc1122
 	 */
-	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
+		       sizeof(struct tcphdr));
 
 	/* Clamp it (mss_clamp does not include tcp options) */
 	if (mss_now > tp->rx_opt.mss_clamp)
 		mss_now = tp->rx_opt.mss_clamp;
 
 	/* Now subtract optional transport overhead */
-	mss_now -= tp->ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
 	if (mss_now < 48)
@@ -664,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
 
 	/* And store cached results */
-	tp->pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	tp->mss_cache = mss_now;
 
 	return mss_now;
@@ -694,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
-		if (mtu != tp->pmtu_cookie)
+		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
 			mss_now = tcp_sync_mss(sk, mtu);
 	}
 
@@ -705,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 	xmit_size_goal = mss_now;
 
 	if (doing_tso) {
-		xmit_size_goal = 65535 -
-			tp->af_specific->net_header_len -
-			tp->ext_header_len - tp->tcp_header_len;
+		xmit_size_goal = (65535 -
+				  inet_csk(sk)->icsk_af_ops->net_header_len -
+				  inet_csk(sk)->icsk_ext_hdr_len -
+				  tp->tcp_header_len);
 
 		if (tp->max_window &&
 		    (xmit_size_goal > (tp->max_window >> 1)))
@@ -723,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 /* Congestion window validation. (RFC2861) */
 
-static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
 {
 	__u32 packets_out = tp->packets_out;
 
@@ -772,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
 /* This must be invoked the first time we consider transmitting
  * SKB onto the wire.
  */
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);
 
@@ -1422,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	   (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
-	if(tp->af_specific->rebuild_header(sk))
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
 	/* Some Solaris stacks overoptimize and ignore the FIN on a
@@ -1793,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 /* 
  * Do all connect socket setups that can be done AF independent.
  */ 
-static inline void tcp_connect_init(struct sock *sk)
+static void tcp_connect_init(struct sock *sk)
 {
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13e7e6e8df1..3b740349505 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -330,6 +330,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 		vegas->cntRTT = 0;
 		vegas->minRTT = 0x7fffffff;
 	}
+	/* Use normal slow start */
+	else if (tp->snd_cwnd <= tp->snd_ssthresh) 
+		tcp_slow_start(tp);
+	
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195..00840474a44 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
+#include <linux/igmp.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -846,20 +847,7 @@ out:
 csum_copy_err:
 	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (noblock)
 		return -EAGAIN;	
@@ -1001,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		kfree_skb(skb);
 		return -1;
 	}
+	nf_reset(skb);
 
 	if (up->encap_type) {
 		/*
@@ -1094,7 +1083,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
  * Otherwise, csum completion requires chacksumming packet body,
  * including udp header and folding it to skb->csum.
  */
-static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 			     unsigned short ulen, u32 saddr, u32 daddr)
 {
 	if (uh->check == 0) {
@@ -1108,7 +1097,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 	/* Probably, we should checksum udp header (it should be in cache
 	 * in any case) and data in tiny packets (< rx copybreak).
 	 */
-	return 0;
 }
 
 /*
@@ -1141,8 +1129,7 @@ int udp_rcv(struct sk_buff *skb)
 	if (pskb_trim_rcsum(skb, ulen))
 		goto short_packet;
 
-	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
-		goto csum_error;
+	udp_checksum_init(skb, uh, ulen, saddr, daddr);
 
 	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
@@ -1163,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto drop;
+	nf_reset(skb);
 
 	/* No socket. Drop packet silently, if checksum is wrong */
 	if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0..850d919591d 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
 	return xfrm_parse_spi(skb, nexthdr, spi, seq);
 }
 
+#ifdef CONFIG_NETFILTER
+static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+
+	if (skb->dst == NULL) {
+		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+		                   skb->dev))
+			goto drop;
+	}
+	return dst_input(skb);
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+#endif
+
 int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 {
 	int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
 	skb->sp->len += xfrm_nr;
 
+	nf_reset(skb);
+
 	if (decaps) {
 		if (!(skb->dev->flags&IFF_LOOPBACK)) {
 			dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		netif_rx(skb);
 		return 0;
 	} else {
+#ifdef CONFIG_NETFILTER
+		__skb_push(skb, skb->data - skb->nh.raw);
+		skb->nh.iph->tot_len = htons(skb->len);
+		ip_send_check(skb->nh.iph);
+
+		NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+		        xfrm4_rcv_encap_finish);
+		return 0;
+#else
 		return -skb->nh.iph->protocol;
+#endif
 	}
 
 drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942..d4df0ddd424 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
 	return ret;
 }
 
-int xfrm4_output(struct sk_buff *skb)
+static int xfrm4_output_one(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	spin_lock_bh(&x->lock);
-	err = xfrm_state_check(x, skb);
-	if (err)
-		goto error;
+	do {
+		spin_lock_bh(&x->lock);
+		err = xfrm_state_check(x, skb);
+		if (err)
+			goto error;
 
-	xfrm4_encap(skb);
+		xfrm4_encap(skb);
 
-	err = x->type->output(x, skb);
-	if (err)
-		goto error;
+		err = x->type->output(x, skb);
+		if (err)
+			goto error;
 
-	x->curlft.bytes += skb->len;
-	x->curlft.packets++;
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
 
-	spin_unlock_bh(&x->lock);
+		spin_unlock_bh(&x->lock);
 	
-	if (!(skb->dst = dst_pop(dst))) {
-		err = -EHOSTUNREACH;
-		goto error_nolock;
-	}
-	err = NET_XMIT_BYPASS;
+		if (!(skb->dst = dst_pop(dst))) {
+			err = -EHOSTUNREACH;
+			goto error_nolock;
+		}
+		dst = skb->dst;
+		x = dst->xfrm;
+	} while (x && !x->props.mode);
+
+	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+	err = 0;
 
 out_exit:
 	return err;
@@ -143,3 +151,33 @@ error_nolock:
 	kfree_skb(skb);
 	goto out_exit;
 }
+
+int xfrm4_output_finish(struct sk_buff *skb)
+{
+	int err;
+
+	while (likely((err = xfrm4_output_one(skb)) == 0)) {
+		nf_reset(skb);
+
+		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+			      skb->dst->dev, dst_output);
+		if (unlikely(err != 1))
+			break;
+
+		if (!skb->dst->xfrm)
+			return dst_output(skb);
+
+		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+			      skb->dst->dev, xfrm4_output_finish);
+		if (unlikely(err != 1))
+			break;
+	}
+
+	return err;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+		       xfrm4_output_finish);
+}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cd..42196ba3b0b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_UDP:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				u16 *ports = (u16 *)xprth;
 
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d23e07fc81f..dbabf81a9b7 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,6 +42,21 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
+	if (tmpl->mode && x->props.saddr.a4 == 0) {
+		struct rtable *rt;
+	        struct flowi fl_tunnel = {
+        	        .nl_u = {
+        			.ip4_u = {
+					.daddr = x->id.daddr.a4,
+				}
+			}
+		};
+		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
+		                     &fl_tunnel, AF_INET)) {
+			x->props.saddr.a4 = rt->rt_src;
+			dst_release(&rt->u.dst);
+		}
+	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET;