10 files changed, 186 insertions, 68 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 74f2207e131..4ec4b2ca6ab 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 			break;
 		ret = 0;
 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+			u32 old_mask = ifa->ifa_mask;
 			inet_del_ifa(in_dev, ifap, 0);
 			ifa->ifa_mask = sin->sin_addr.s_addr;
 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 			if ((dev->flags & IFF_BROADCAST) &&
 			    (ifa->ifa_prefixlen < 31) &&
 			    (ifa->ifa_broadcast ==
-			     (ifa->ifa_local|~ifa->ifa_mask))) {
+			     (ifa->ifa_local|~old_mask))) {
 				ifa->ifa_broadcast = (ifa->ifa_local |
 						      ~sin->sin_addr.s_addr);
 			}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e61bc7177eb..990633c09df 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -591,7 +591,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa);
-		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+		if (ifa->ifa_dev->ifa_list == NULL) {
 			/* Last address was deleted from this interface.
 			   Disable IP.
 			 */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0093ea08c7f..66247f38b37 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		prefix = htonl(l->key);
 
 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
-			const struct fib_info *fi = rcu_dereference(fa->fa_info);
+			const struct fib_info *fi = fa->fa_info;
 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
 
 			if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 90dca711ac9..175e093ec56 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
 	struct inet_sock *inet;
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		int err;
 
-		if (!cpu_possible(i))
-			continue;
-
 		err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
 				       &per_cpu(__icmp_socket, i));
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3f1a263e124..17758234a3e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
 {
 	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 
-	if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
+	if (skb->len > dst_mtu(skb->dst) &&
+		!(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
 		return ip_fragment(skb, ip_finish_output);
 	else
 		return ip_finish_output(skb);
@@ -391,6 +392,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->nfct = from->nfct;
 	nf_conntrack_get(to->nfct);
 	to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	to->ipvs_property = from->ipvs_property;
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(to->nf_bridge);
 	to->nf_bridge = from->nf_bridge;
@@ -685,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
 	return csum;
 }
 
+inline int ip_ufo_append_data(struct sock *sk,
+			int getfrag(void *from, char *to, int offset, int len,
+			       int odd, struct sk_buff *skb),
+			void *from, int length, int hh_len, int fragheaderlen,
+			int transhdrlen, int mtu,unsigned int flags)
+{
+	struct sk_buff *skb;
+	int err;
+
+	/* There is support for UDP fragmentation offload by network
+	 * device, so create one single skb packet containing complete
+	 * udp datagram
+	 */
+	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+		skb = sock_alloc_send_skb(sk,
+			hh_len + fragheaderlen + transhdrlen + 20,
+			(flags & MSG_DONTWAIT), &err);
+
+		if (skb == NULL)
+			return err;
+
+		/* reserve space for Hardware header */
+		skb_reserve(skb, hh_len);
+
+		/* create space for UDP/IP header */
+		skb_put(skb,fragheaderlen + transhdrlen);
+
+		/* initialize network header pointer */
+		skb->nh.raw = skb->data;
+
+		/* initialize protocol header pointer */
+		skb->h.raw = skb->data + fragheaderlen;
+
+		skb->ip_summed = CHECKSUM_HW;
+		skb->csum = 0;
+		sk->sk_sndmsg_off = 0;
+	}
+
+	err = skb_append_datato_frags(sk,skb, getfrag, from,
+			       (length - transhdrlen));
+	if (!err) {
+		/* specify the length of each IP datagram fragment*/
+		skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+		__skb_queue_tail(&sk->sk_write_queue, skb);
+
+		return 0;
+	}
+	/* There is not enough support do UFO ,
+	 * so follow normal path
+	 */
+	kfree_skb(skb);
+	return err;
+}
+
 /*
  *	ip_append_data() and ip_append_page() can make one large IP datagram
  *	from many pieces of data. Each pieces will be holded on the socket
@@ -774,6 +832,15 @@ int ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_HW;
 
 	inet->cork.length += length;
+	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
+			(rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+		if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
+			       fragheaderlen, transhdrlen, mtu, flags))
+			goto error;
+
+		return 0;
+	}
 
 	/* So, what's going on in the loop below?
 	 *
@@ -1005,14 +1072,23 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		return -EINVAL;
 
 	inet->cork.length += size;
+	if ((sk->sk_protocol == IPPROTO_UDP) &&
+	    (rt->u.dst.dev->features & NETIF_F_UFO))
+		skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+
 
 	while (size > 0) {
 		int i;
 
-		/* Check if the remaining data fits into current packet. */
-		len = mtu - skb->len;
-		if (len < size)
-			len = maxfraglen - skb->len;
+		if (skb_shinfo(skb)->ufo_size)
+			len = size;
+		else {
+
+			/* Check if the remaining data fits into current packet. */
+			len = mtu - skb->len;
+			if (len < size)
+				len = maxfraglen - skb->len;
+		}
 		if (len <= 0) {
 			struct sk_buff *skb_prev;
 			char *data;
@@ -1020,10 +1096,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			int alloclen;
 
 			skb_prev = skb;
-			if (skb_prev)
-				fraggap = skb_prev->len - maxfraglen;
-			else
-				fraggap = 0;
+			fraggap = skb_prev->len - maxfraglen;
 
 			alloclen = fragheaderlen + hh_len + fraggap + 15;
 			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 07a80b56e8d..422ab68ee7f 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
 
-#define IP_CONNTRACK_VERSION	"2.3"
+#define IP_CONNTRACK_VERSION	"2.4"
 
 #if 0
 #define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 static int ip_conntrack_hash_rnd_initted;
 static unsigned int ip_conntrack_hash_rnd;
 
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
+static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
+			    unsigned int size, unsigned int rnd)
 {
-#if 0
-	dump_tuple(tuple);
-#endif
 	return (jhash_3words(tuple->src.ip,
 	                     (tuple->dst.ip ^ tuple->dst.protonum),
 	                     (tuple->src.u.all | (tuple->dst.u.all << 16)),
-	                     ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+	                     rnd) % size);
+}
+
+static u_int32_t
+hash_conntrack(const struct ip_conntrack_tuple *tuple)
+{
+	return __hash_conntrack(tuple, ip_conntrack_htable_size,
+				ip_conntrack_hash_rnd);
 }
 
 int
@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
 	return 1;
 }
 
-static void free_conntrack_hash(void)
+static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 {
-	if (ip_conntrack_vmalloc)
-		vfree(ip_conntrack_hash);
+	if (vmalloced)
+		vfree(hash);
 	else
-		free_pages((unsigned long)ip_conntrack_hash, 
-			   get_order(sizeof(struct list_head)
-				     * ip_conntrack_htable_size));
+		free_pages((unsigned long)hash, 
+			   get_order(sizeof(struct list_head) * size));
 }
 
 void ip_conntrack_flush()
@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void)
 	ip_conntrack_flush();
 	kmem_cache_destroy(ip_conntrack_cachep);
 	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	free_conntrack_hash();
+	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+			    ip_conntrack_htable_size);
 	nf_unregister_sockopt(&so_getorigdst);
 }
 
-static int hashsize;
-module_param(hashsize, int, 0400);
+static struct list_head *alloc_hashtable(int size, int *vmalloced)
+{
+	struct list_head *hash;
+	unsigned int i;
+
+	*vmalloced = 0; 
+	hash = (void*)__get_free_pages(GFP_KERNEL, 
+				       get_order(sizeof(struct list_head)
+						 * size));
+	if (!hash) { 
+		*vmalloced = 1;
+		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
+		hash = vmalloc(sizeof(struct list_head) * size);
+	}
+
+	if (hash)
+		for (i = 0; i < size; i++)
+			INIT_LIST_HEAD(&hash[i]);
+
+	return hash;
+}
+
+int set_hashsize(const char *val, struct kernel_param *kp)
+{
+	int i, bucket, hashsize, vmalloced;
+	int old_vmalloced, old_size;
+	int rnd;
+	struct list_head *hash, *old_hash;
+	struct ip_conntrack_tuple_hash *h;
+
+	/* On boot, we can set this without any fancy locking. */
+	if (!ip_conntrack_htable_size)
+		return param_set_int(val, kp);
+
+	hashsize = simple_strtol(val, NULL, 0);
+	if (!hashsize)
+		return -EINVAL;
+
+	hash = alloc_hashtable(hashsize, &vmalloced);
+	if (!hash)
+		return -ENOMEM;
+
+	/* We have to rehash for the new table anyway, so we also can 
+	 * use a new random seed */
+	get_random_bytes(&rnd, 4);
+
+	write_lock_bh(&ip_conntrack_lock);
+	for (i = 0; i < ip_conntrack_htable_size; i++) {
+		while (!list_empty(&ip_conntrack_hash[i])) {
+			h = list_entry(ip_conntrack_hash[i].next,
+				       struct ip_conntrack_tuple_hash, list);
+			list_del(&h->list);
+			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			list_add_tail(&h->list, &hash[bucket]);
+		}
+	}
+	old_size = ip_conntrack_htable_size;
+	old_vmalloced = ip_conntrack_vmalloc;
+	old_hash = ip_conntrack_hash;
+
+	ip_conntrack_htable_size = hashsize;
+	ip_conntrack_vmalloc = vmalloced;
+	ip_conntrack_hash = hash;
+	ip_conntrack_hash_rnd = rnd;
+	write_unlock_bh(&ip_conntrack_lock);
+
+	free_conntrack_hash(old_hash, old_vmalloced, old_size);
+	return 0;
+}
+
+module_param_call(hashsize, set_hashsize, param_get_uint,
+		  &ip_conntrack_htable_size, 0600);
 
 int __init ip_conntrack_init(void)
 {
@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void)
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
 	 * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
- 	if (hashsize) {
- 		ip_conntrack_htable_size = hashsize;
- 	} else {
+ 	if (!ip_conntrack_htable_size) {
 		ip_conntrack_htable_size
 			= (((num_physpages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct list_head));
@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void)
 		return ret;
 	}
 
-	/* AK: the hash table is twice as big than needed because it
-	   uses list_head.  it would be much nicer to caches to use a
-	   single pointer list head here. */
-	ip_conntrack_vmalloc = 0; 
-	ip_conntrack_hash 
-		=(void*)__get_free_pages(GFP_KERNEL, 
-					 get_order(sizeof(struct list_head)
-						   *ip_conntrack_htable_size));
-	if (!ip_conntrack_hash) { 
-		ip_conntrack_vmalloc = 1;
-		printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
-		ip_conntrack_hash = vmalloc(sizeof(struct list_head)
-					    * ip_conntrack_htable_size);
-	}
+	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+					    &ip_conntrack_vmalloc);
 	if (!ip_conntrack_hash) {
 		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
 		goto err_unreg_sockopt;
@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void)
 	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
 	write_unlock_bh(&ip_conntrack_lock);
 
-	for (i = 0; i < ip_conntrack_htable_size; i++)
-		INIT_LIST_HEAD(&ip_conntrack_hash[i]);
-
 	/* For use by ipt_REJECT */
 	ip_ct_attach = ip_conntrack_attach;
 
@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void)
 err_free_conntrack_slab:
 	kmem_cache_destroy(ip_conntrack_cachep);
 err_free_hash:
-	free_conntrack_hash();
+	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+			    ip_conntrack_htable_size);
 err_unreg_sockopt:
 	nf_unregister_sockopt(&so_getorigdst);
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index f5909a4c3fc..e19c2a52d00 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -48,7 +48,7 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip,
 		      unsigned int hook_mask)
 {
 	if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) {
-		printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n.",
+		printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n",
 		       matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info)));
 		return 0;
 	}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43..a65e508fbd4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
 	unsigned long res = 0;
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_possible(i))
-			continue;
+	for_each_cpu(i) {
 		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
 		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 677419d0c9a..3e98b57578d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2239,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 			/* Note, it is the only place, where
 			 * fast path is recovered for sending TCP.
 			 */
+			tp->pred_flags = 0;
 			tcp_fast_path_check(sk, tp);
 
 			if (nwin > tp->max_window) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7114031fdc7..b907456a79f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -435,17 +435,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	int nsize, old_factor;
 	u16 flags;
 
-	if (unlikely(len >= skb->len)) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, "
-			       "end_seq=%u, skb->len=%u.\n", len, mss_now,
-			       TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-			       skb->len);
-			WARN_ON(1);
-		}
-		return 0;
-	}
-
+	BUG_ON(len > skb->len);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;