From 1cf9e8a7865c0ac216034e519cf6b8505055ea50 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Thu, 1 Dec 2005 21:22:37 +0100
Subject: [PATCH] ieee80211_crypt_tkip depends on NET_RADIO

*** Warning: ".wireless_send_event" [net/ieee80211/ieee80211_crypt_tkip.ko] undefined!

Signed-off-by: Olaf Hering <olh@suse.de>

 net/ieee80211/Kconfig |    2 +-
 1 files changed, 1 insertion(+), 1 deletion(-)
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 net/ieee80211/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 91b16fbf91f..d18ccba3ea9 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -55,7 +55,7 @@ config IEEE80211_CRYPT_CCMP
 
 config IEEE80211_CRYPT_TKIP
 	tristate "IEEE 802.11i TKIP encryption"
-	depends on IEEE80211
+	depends on IEEE80211 && NET_RADIO
 	select CRYPTO
 	select CRYPTO_MICHAEL_MIC
 	---help---
-- 
cgit v1.2.3-70-g09d2


From a1493d9cd1aaed06860d128a37df1bdfbc61f7c8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 13 Dec 2005 22:59:36 -0800
Subject: [IPV6] addrconf: Do not print device pointer in privacy log message.

Noticed by Andi Kleen, it is pointless to emit the device
structure pointer in the kernel logs like this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 76ff9f4fe89..73a23b4130a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -379,8 +379,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 		    dev->type == ARPHRD_NONE ||
 		    dev->type == ARPHRD_SIT) {
 			printk(KERN_INFO
-				"Disabled Privacy Extensions on device %p(%s)\n",
-				dev, dev->name);
+			       "%s: Disabled Privacy Extensions\n",
+			       dev->name);
 			ndev->cnf.use_tempaddr = -1;
 		} else {
 			in6_dev_hold(ndev);
-- 
cgit v1.2.3-70-g09d2


From 2edc2689f8183dd21c45621a01580b340ac420ba Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 13 Dec 2005 22:59:50 -0800
Subject: [PKT_SCHED]: Disable debug tracing logs by default in packet action
 API.

Noticed by Andi Kleen.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8aebe8f6d27..2ce1cb2aa2e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -34,7 +34,7 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
-#if 1 /* control */
+#if 0 /* control */
 #define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
 #else
 #define DPRINTK(format, args...)
-- 
cgit v1.2.3-70-g09d2


From 1542272a60ab9c0655a13ead8b7d7a661365f9fb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 14 Dec 2005 12:55:24 -0800
Subject: [GRE]: Fix hardware checksum modification

The skb_postpull_rcsum introduced a bug to the checksum modification.
Although the length pulled is offset bytes, the origin of the pulling
is the GRE header, not the IP header.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4c347c3b8e..46f9d9cf7a5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -618,7 +618,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = __pskb_pull(skb, offset);
-		skb_postpull_rcsum(skb, skb->mac.raw, offset);
+		skb_postpull_rcsum(skb, skb->h.raw, offset);
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
-- 
cgit v1.2.3-70-g09d2


From a388442c3798a345d131ff8b9d6dea0bfda3fefc Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 14 Dec 2005 16:23:16 -0800
Subject: [VLAN]: Fix hardware rx csum errors

Receiving VLAN packets over a device (without VLAN assist) that is
doing hardware checksumming (CHECKSUM_HW), causes errors because the
VLAN code forgets to adjust the hardware checksum.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b7486488967..f2a8750bbf1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -165,6 +165,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */
 
+	/* Need to correct hardware checksum */
+	skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+
 	/* Ok, lets check to make sure the device (dev) we
 	 * came in on is what this VLAN is attached to.
 	 */
-- 
cgit v1.2.3-70-g09d2


From d3a880e1ff6713b4c846e4d2526a8c7e6ad8469c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 15 Dec 2005 09:18:30 +0000
Subject: [PATCH] Address of void __user * is void __user * *, not void *
 __user *

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d890dfa8818..1f7f244806b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3425,7 +3425,7 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
 }
 
 static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
-				    void * __user *to, size_t space_left)
+				    void __user **to, size_t space_left)
 {
 	struct list_head *pos;
 	struct sctp_sockaddr_entry *addr;
-- 
cgit v1.2.3-70-g09d2


From 0476f171affa6eca62021fca2ae9f5140acc3713 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 19 Dec 2005 13:53:09 -0800
Subject: [NETFILTER]: Fix NAT init order

As noticed by Phil Oester, the GRE NAT protocol helper is initialized
before the NAT core, which makes registration fail.

Change the linking order to make NAT be initialized first.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258f..d0a447e520a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
 
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
 
 # conntrack netlink interface
 obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,7 +42,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
 # matches
-- 
cgit v1.2.3-70-g09d2


From 31cb5bd4dc89ba14e6347b094e15a2f6778a01fc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 19 Dec 2005 13:53:26 -0800
Subject: [NETFILTER]: Fix incorrect dependency for IP6_NF_TARGET_NFQUEUE

IP6_NF_TARGET_NFQUEUE depends on IP6_NF_IPTABLES, not IP_NF_IPTABLES.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 060d6120241..04912f9b35c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -211,7 +211,7 @@ config IP6_NF_TARGET_REJECT
 
 config IP6_NF_TARGET_NFQUEUE
 	tristate "NFQUEUE Target Support"
-	depends on IP_NF_IPTABLES
+	depends on IP6_NF_IPTABLES
 	help
 	  This Target replaced the old obsolete QUEUE target.
 
-- 
cgit v1.2.3-70-g09d2


From b03664869aa6f84c3c98a06ac9d6905b195909bc Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Mon, 19 Dec 2005 14:00:08 -0800
Subject: [BRIDGE-NF]: Fix bridge-nf ipv6 length check

A typo caused some bridged IPv6 packets to get dropped randomly,
as reported by Sebastien Chaumontet. The patch below fixes this
(using skb->nh.raw instead of raw) and also makes the jumbo packet
length checking up-to-date with the code in
net/ipv6/exthdrs.c::ipv6_hop_jumbo.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d8e36b77512..43a0b35dfe6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -295,7 +295,7 @@ static int check_hbh_len(struct sk_buff *skb)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = raw[off+1]+2;
+		int optlen = skb->nh.raw[off+1]+2;
 
 		switch (skb->nh.raw[off]) {
 		case IPV6_TLV_PAD0:
@@ -308,18 +308,15 @@ static int check_hbh_len(struct sk_buff *skb)
 		case IPV6_TLV_JUMBO:
 			if (skb->nh.raw[off+1] != 4 || (off&3) != 2)
 				goto bad;
-
 			pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2));
-
+			if (pkt_len <= IPV6_MAXPLEN ||
+			    skb->nh.ipv6h->payload_len)
+				goto bad;
 			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
 				goto bad;
-			if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
-				if (__pskb_trim(skb,
-				    pkt_len + sizeof(struct ipv6hdr)))
-					goto bad;
-				if (skb->ip_summed == CHECKSUM_HW)
-					skb->ip_summed = CHECKSUM_NONE;
-			}
+			if (pskb_trim_rcsum(skb,
+			    pkt_len+sizeof(struct ipv6hdr)))
+				goto bad;
 			break;
 		default:
 			if (optlen > len)
-- 
cgit v1.2.3-70-g09d2


From 3dd4bc68fac5df16b6d3ed6ed3c29cf05f29a47e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 19 Dec 2005 14:02:45 -0800
Subject: [IPV6]: Fix route lifetime.

The route expiration time is stored in rt6i_expires in jiffies.
The argument of rt6_route_add() for adding a route is not the
expiration time in jiffies nor in clock_t, but the lifetime
(or time left before expiration) in clock_t.

Because of the confusion, we sometimes saw several strange errors
(FAILs) in TAHI IPv6 Ready Logo Phase-2 Self Test.
The symptoms were analyzed by Mitsuru Chinen <CHINEN@jp.ibm.com>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 16 ++++++++++++----
 net/ipv6/route.c    |  2 +-
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 73a23b4130a..4ea8cf7c0cc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1596,9 +1596,17 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	   not good.
 	 */
 	if (valid_lft >= 0x7FFFFFFF/HZ)
-		rt_expires = 0;
+		rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
 	else
-		rt_expires = jiffies + valid_lft * HZ;
+		rt_expires = valid_lft * HZ;
+
+	/*
+	 * We convert this (in jiffies) to clock_t later.
+	 * Avoid arithmetic overflow there as well.
+	 * Overflow can happen only if HZ < USER_HZ.
+	 */
+	if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
+		rt_expires = 0x7FFFFFFF / USER_HZ;
 
 	if (pinfo->onlink) {
 		struct rt6_info *rt;
@@ -1610,12 +1618,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 					ip6_del_rt(rt, NULL, NULL, NULL);
 					rt = NULL;
 				} else {
-					rt->rt6i_expires = rt_expires;
+					rt->rt6i_expires = jiffies + rt_expires;
 				}
 			}
 		} else if (valid_lft) {
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-					      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+					      dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
 		}
 		if (rt)
 			dst_release(&rt->u.dst);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a7a537b5059..7c68bfbee36 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -829,7 +829,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	}
 
 	rt->u.dst.obsolete = -1;
-	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
+	rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
 	if (nlh && (r = NLMSG_DATA(nlh))) {
 		rt->rt6i_protocol = r->rtm_protocol;
 	} else {
-- 
cgit v1.2.3-70-g09d2


From 9e999993c71e1506378d26d81f842277aff8a250 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 19 Dec 2005 14:03:46 -0800
Subject: [XFRM]: Handle DCCP in xfrm{4,6}_decode_session

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 1 +
 net/ipv6/xfrm6_policy.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cd..42196ba3b0b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_UDP:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				u16 *ports = (u16 *)xprth;
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cf1d91e74c8..69bd957380e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,6 +214,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_UDP:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
 			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
 				u16 *ports = (u16 *)exthdr;
 
-- 
cgit v1.2.3-70-g09d2


From 399c180ac5f0cb66ef9479358e0b8b6bafcbeafe Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 19 Dec 2005 14:23:23 -0800
Subject: [IPSEC]: Perform SA switchover immediately.

When we insert a new xfrm_state which potentially
subsumes an existing one, make sure all cached
bundles are flushed so that the new SA is used
immediately.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 +
 net/xfrm/xfrm_policy.c | 19 ++++++++++++++-----
 net/xfrm/xfrm_state.c  |  5 +++++
 3 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5beae1ccd57..1cdb8791213 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -890,6 +890,7 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 extern void xfrm_policy_flush(void);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
+extern void xfrm_flush_all_bundles(void);
 extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0db9e57013f..54a4be6a7d2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1014,13 +1014,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
-/* Optimize later using cookies and generation ids. */
-
 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 {
-	if (!stale_bundle(dst))
-		return dst;
-
+	/* If it is marked obsolete, which is how we even get here,
+	 * then we have purged it from the policy bundle list and we
+	 * did that for a good reason.
+	 */
 	return NULL;
 }
 
@@ -1104,6 +1103,16 @@ int xfrm_flush_bundles(void)
 	return 0;
 }
 
+static int always_true(struct dst_entry *dst)
+{
+	return 1;
+}
+
+void xfrm_flush_all_bundles(void)
+{
+	xfrm_prune_bundles(always_true);
+}
+
 void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7cf48aa6c95..479effc9766 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -431,6 +431,8 @@ void xfrm_state_insert(struct xfrm_state *x)
 	spin_lock_bh(&xfrm_state_lock);
 	__xfrm_state_insert(x);
 	spin_unlock_bh(&xfrm_state_lock);
+
+	xfrm_flush_all_bundles();
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
@@ -478,6 +480,9 @@ out:
 	spin_unlock_bh(&xfrm_state_lock);
 	xfrm_state_put_afinfo(afinfo);
 
+	if (!err)
+		xfrm_flush_all_bundles();
+
 	if (x1) {
 		xfrm_state_delete(x1);
 		xfrm_state_put(x1);
-- 
cgit v1.2.3-70-g09d2


From 9bffc4ace1ed875667dbe5b29065d96bec558c62 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 19 Dec 2005 14:24:40 -0800
Subject: [SCTP]: Fix sctp to not return erroneous POLLOUT events.

Make sctp_writeable() use sk_wmem_alloc rather than sk_wmem_queued to
determine the sndbuf space available. It also removes all the modifications
to sk_wmem_queued as it is not currently used in SCTP.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1f7f244806b..9df888e932c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,10 +156,6 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	sk->sk_wmem_queued += SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
 	atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 }
 
@@ -4426,7 +4422,7 @@ cleanup:
  * tcp_poll().  Note that, based on these implementations, we don't
  * lock the socket in this function, even though it seems that,
  * ideally, locking or some other mechanisms can be used to ensure
- * the integrity of the counters (sndbuf and wmem_queued) used
+ * the integrity of the counters (sndbuf and wmem_alloc) used
  * in this place.  We assume that we don't need locks either until proven
  * otherwise.
  *
@@ -4833,10 +4829,6 @@ static void sctp_wfree(struct sk_buff *skb)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	sk->sk_wmem_queued -= SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
 	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 
 	sock_wfree(skb);
@@ -4920,7 +4912,7 @@ void sctp_write_space(struct sock *sk)
 
 /* Is there any sndbuf space available on the socket?
  *
- * Note that wmem_queued is the sum of the send buffers on all of the
+ * Note that sk_wmem_alloc is the sum of the send buffers on all of the
  * associations on the same socket.  For a UDP-style socket with
  * multiple associations, it is possible for it to be "unwriteable"
  * prematurely.  I assume that this is acceptable because
@@ -4933,7 +4925,7 @@ static int sctp_writeable(struct sock *sk)
 {
 	int amt = 0;
 
-	amt = sk->sk_sndbuf - sk->sk_wmem_queued;
+	amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
 	if (amt < 0)
 		amt = 0;
 	return amt;
-- 
cgit v1.2.3-70-g09d2


From b079fa7baa86b47579f3f60f86d03d21c76159b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 13 Dec 2005 16:13:52 -0500
Subject: RPC: Do not block on skb allocation

 If we get something like the following,
 [  125.300636]  [<c04086e1>] schedule_timeout+0x54/0xa5
 [  125.305931]  [<c040866e>] io_schedule_timeout+0x29/0x33
 [  125.311495]  [<c02880c4>] blk_congestion_wait+0x70/0x85
 [  125.317058]  [<c014136b>] throttle_vm_writeout+0x69/0x7d
 [  125.322720]  [<c014714d>] shrink_zone+0xe0/0xfa
 [  125.327560]  [<c01471d4>] shrink_caches+0x6d/0x6f
 [  125.332581]  [<c01472a6>] try_to_free_pages+0xd0/0x1b5
 [  125.338056]  [<c013fa4b>] __alloc_pages+0x135/0x2e8
 [  125.343258]  [<c03b74ad>] tcp_sendmsg+0xaa0/0xb78
 [  125.348281]  [<c03d4666>] inet_sendmsg+0x48/0x53
 [  125.353212]  [<c0388716>] sock_sendmsg+0xb8/0xd3
 [  125.358147]  [<c0388773>] kernel_sendmsg+0x42/0x4f
 [  125.363259]  [<c038bc00>] sock_no_sendpage+0x5e/0x77
 [  125.368556]  [<c03ee7af>] xs_tcp_send_request+0x2af/0x375
 then the socket is blocked until memory is reclaimed, and no
 progress can ever be made.

 Try to access the emergency pools by using GFP_ATOMIC.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0a51fd46a84..77e8800d412 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -990,6 +990,7 @@ static void xs_udp_connect_worker(void *args)
 		sk->sk_data_ready = xs_udp_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
 		sk->sk_no_check = UDP_CSUM_NORCV;
+		sk->sk_allocation = GFP_ATOMIC;
 
 		xprt_set_connected(xprt);
 
@@ -1074,6 +1075,7 @@ static void xs_tcp_connect_worker(void *args)
 		sk->sk_data_ready = xs_tcp_data_ready;
 		sk->sk_state_change = xs_tcp_state_change;
 		sk->sk_write_space = xs_tcp_write_space;
+		sk->sk_allocation = GFP_ATOMIC;
 
 		/* socket options */
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-- 
cgit v1.2.3-70-g09d2


From 48e49187753ec3b4fa84a7165c9b7a59f3875b56 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 19 Dec 2005 17:11:22 -0500
Subject: SUNRPC: Fix "EPIPE" error on mount of rpcsec_gss-protected partitions

 gss_create_upcall() should not error just because rpc.gssd closed the
 pipe on its end. Instead, it should requeue the pending requests and then
 retry.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 6 ++++--
 net/sunrpc/rpc_pipe.c          | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f44f46f1d8e..8d782282ec1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -638,7 +638,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 				gss_msg);
 		atomic_inc(&gss_msg->count);
 		gss_unhash_msg(gss_msg);
-		if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+		if (msg->errno == -ETIMEDOUT) {
 			unsigned long now = jiffies;
 			if (time_after(now, ratelimit)) {
 				printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
@@ -786,7 +786,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
 	cred->gc_flags = 0;
 	cred->gc_base.cr_ops = &gss_credops;
 	cred->gc_service = gss_auth->service;
-	err = gss_create_upcall(gss_auth, cred);
+	do {
+		err = gss_create_upcall(gss_auth, cred);
+	} while (err == -EAGAIN);
 	if (err < 0)
 		goto out_err;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c76ea221798..16a2458f38f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -174,7 +174,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
 		goto out;
 	msg = (struct rpc_pipe_msg *)filp->private_data;
 	if (msg != NULL) {
-		msg->errno = -EPIPE;
+		msg->errno = -EAGAIN;
 		list_del_init(&msg->list);
 		rpci->ops->destroy_msg(msg);
 	}
@@ -183,7 +183,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
 	if (filp->f_mode & FMODE_READ)
 		rpci->nreaders --;
 	if (!rpci->nreaders)
-		__rpc_purge_upcall(inode, -EPIPE);
+		__rpc_purge_upcall(inode, -EAGAIN);
 	if (rpci->ops->release_pipe)
 		rpci->ops->release_pipe(inode);
 out:
-- 
cgit v1.2.3-70-g09d2


From 58c4fb86eabcbc385d954843a635b7f4327be6b0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 21 Dec 2005 22:56:42 +0900
Subject: [IPV6]: Flag RTF_ANYCAST for anycast routes.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/ipv6_route.h |  1 +
 net/ipv6/route.c           | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/ipv6_route.h b/include/linux/ipv6_route.h
index e2f93503801..d7c41d1d706 100644
--- a/include/linux/ipv6_route.h
+++ b/include/linux/ipv6_route.h
@@ -18,6 +18,7 @@
 					   fallback, no routers on link */
 #define RTF_ADDRCONF	0x00040000	/* addrconf route - RA		*/
 #define RTF_PREFIX_RT	0x00080000	/* A prefix only route - RA	*/
+#define RTF_ANYCAST	0x00100000	/* Anycast			*/
 
 #define RTF_NONEXTHOP	0x00200000	/* route with no nexthop	*/
 #define RTF_EXPIRES	0x00400000
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7c68bfbee36..66140f13d11 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -413,11 +413,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
 	rt = ip6_rt_copy(ort);
 
 	if (rt) {
-		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
-
-		if (!(rt->rt6i_flags&RTF_GATEWAY))
+		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
+			if (rt->rt6i_dst.plen != 128 &&
+			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
+				rt->rt6i_flags |= RTF_ANYCAST;
 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+		}
 
+		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
 		rt->u.dst.flags |= DST_HOST;
@@ -1413,7 +1416,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->u.dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-	if (!anycast)
+	if (anycast)
+		rt->rt6i_flags |= RTF_ANYCAST;
+	else
 		rt->rt6i_flags |= RTF_LOCAL;
 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (rt->rt6i_nexthop == NULL) {
-- 
cgit v1.2.3-70-g09d2


From 8de3351e6e0a1081fbf6864ae37839e327699a08 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 21 Dec 2005 22:57:06 +0900
Subject: [IPV6]: Try not to send icmp to anycast address.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/icmp.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 34a332225c1..6ec6a2b549b 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -328,8 +328,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		iif = skb->dev->ifindex;
 
 	/*
-	 *	Must not send if we know that source is Anycast also.
-	 *	for now we don't know that.
+	 *	Must not send error if the source does not uniquely
+	 *	identify a single node (RFC2463 Section 2.4).
+	 *	We check unspecified / multicast addresses here,
+	 *	and anycast addresses will be checked later.
 	 */
 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
@@ -373,6 +375,16 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto out;
+
+	/*
+	 * We won't send icmp if the destination is known
+	 * anycast.
+	 */
+	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+		goto out_dst_release;
+	}
+
 	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 		goto out;
 
-- 
cgit v1.2.3-70-g09d2


From 3c21edbd113788b110116141c8078623a0900b6a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 21 Dec 2005 22:57:24 +0900
Subject: [IPV6]: Defer IPv6 device initialization until the link becomes
 ready.

NETDEV_UP might be sent even if the link attached to the interface was
not ready.  DAD does not make sense in such case, so we won't do so.
After interface

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/if_inet6.h |  1 +
 net/ipv6/addrconf.c    | 74 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 65 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index e97a9accb71..d8234f9bd4c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -24,6 +24,7 @@
 #define IF_RA_MANAGED	0x40
 #define IF_RA_RCVD	0x20
 #define IF_RS_SENT	0x10
+#define IF_READY	0x80000000
 
 /* prefix flags */
 #define IF_PREFIX_ONLINK	0x01
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ea8cf7c0cc..d012f6ac704 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -388,6 +388,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 		}
 #endif
 
+		if (netif_carrier_ok(dev))
+			ndev->if_flags |= IF_READY;
+
 		write_lock_bh(&addrconf_lock);
 		dev->ip6_ptr = ndev;
 		write_unlock_bh(&addrconf_lock);
@@ -1215,10 +1218,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 
 /* Gets referenced address, destroys ifaddr */
 
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+void addrconf_dad_stop(struct inet6_ifaddr *ifp)
 {
-	if (net_ratelimit())
-		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
 		addrconf_del_timer(ifp);
@@ -1244,6 +1245,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		ipv6_del_addr(ifp);
 }
 
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+	if (net_ratelimit())
+		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+	addrconf_dad_stop(ifp);
+}
 
 /* Join to solicited addr multicast group. */
 
@@ -2136,6 +2143,37 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 
 	switch(event) {
 	case NETDEV_UP:
+	case NETDEV_CHANGE:
+		if (event == NETDEV_UP) {
+			if (!netif_carrier_ok(dev)) {
+				/* device is not ready yet. */
+				printk(KERN_INFO
+					"ADDRCONF(NETDEV_UP): %s: "
+					"link is not ready\n",
+					dev->name);
+				break;
+			}
+		} else {
+			if (!netif_carrier_ok(dev)) {
+				/* device is still not ready. */
+				break;
+			}
+
+			if (idev) {
+				if (idev->if_flags & IF_READY) {
+					/* device is already configured. */
+					break;
+				}
+				idev->if_flags |= IF_READY;
+			}
+
+			printk(KERN_INFO
+					"ADDRCONF(NETDEV_CHANGE): %s: "
+					"link becomes ready\n",
+					dev->name);
+
+		}
+
 		switch(dev->type) {
 		case ARPHRD_SIT:
 			addrconf_sit_config(dev);
@@ -2186,8 +2224,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		 */
 		addrconf_ifdown(dev, event != NETDEV_DOWN);
 		break;
-	case NETDEV_CHANGE:
-		break;
+
 	case NETDEV_CHANGENAME:
 #ifdef CONFIG_SYSCTL
 		if (idev) {
@@ -2268,7 +2305,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	/* Step 3: clear flags for stateless addrconf */
 	if (how != 1)
-		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
+		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
 	/* Step 4: clear address list */
 #ifdef CONFIG_IPV6_PRIVACY
@@ -2377,11 +2414,20 @@ out:
 /*
  *	Duplicate Address Detection
  */
+static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
+{
+	unsigned long rand_num;
+	struct inet6_dev *idev = ifp->idev;
+
+	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+	ifp->probes = idev->cnf.dad_transmits;
+	addrconf_mod_timer(ifp, AC_DAD, rand_num);
+}
+
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
-	unsigned long rand_num;
 
 	addrconf_join_solict(dev, &ifp->addr);
 
@@ -2390,7 +2436,6 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 					flags);
 
 	net_srandom(ifp->addr.s6_addr32[3]);
-	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
 
 	read_lock_bh(&idev->lock);
 	if (ifp->dead)
@@ -2407,8 +2452,17 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		return;
 	}
 
-	ifp->probes = idev->cnf.dad_transmits;
-	addrconf_mod_timer(ifp, AC_DAD, rand_num);
+	if (idev->if_flags & IF_READY)
+		addrconf_dad_kick(ifp);
+	else {
+		/*
+		 * If the defice is not ready:
+		 * - keep it tentative if it is a permanent address.
+		 * - otherwise, kill it.
+		 */
+		in6_ifa_hold(ifp);
+		addrconf_dad_stop(ifp);
+	}
 
 	spin_unlock_bh(&ifp->lock);
 out:
-- 
cgit v1.2.3-70-g09d2


From c5e33bddd3c798f681f8f3027270127be6b61a3b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 21 Dec 2005 22:57:44 +0900
Subject: [IPV6]: Run DAD when the link becomes ready.

If the link was not available when the interface was created,
run DAD for pending tentative addresses when the link becomes ready.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/addrconf.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d012f6ac704..f6ead6a843e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,6 +137,7 @@ static int addrconf_ifdown(struct net_device *dev, int how);
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
 static void addrconf_dad_timer(unsigned long data);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -418,6 +419,7 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
 		if ((idev = ipv6_add_dev(dev)) == NULL)
 			return NULL;
 	}
+
 	if (dev->flags&IFF_UP)
 		ipv6_mc_up(idev);
 	return idev;
@@ -2140,6 +2142,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = (struct net_device *) data;
 	struct inet6_dev *idev = __in6_dev_get(dev);
+	int run_pending = 0;
 
 	switch(event) {
 	case NETDEV_UP:
@@ -2172,6 +2175,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					"link becomes ready\n",
 					dev->name);
 
+			run_pending = 1;
 		}
 
 		switch(dev->type) {
@@ -2190,6 +2194,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			break;
 		};
 		if (idev) {
+			if (run_pending)
+				addrconf_dad_run(idev);
+
 			/* If the MTU changed during the interface down, when the
 			   interface up, the changed MTU must be reflected in the
 			   idev as well as routers.
@@ -2546,6 +2553,22 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	}
 }
 
+static void addrconf_dad_run(struct inet6_dev *idev) {
+	struct inet6_ifaddr *ifp;
+
+	read_lock_bh(&idev->lock);
+	for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+		spin_lock_bh(&ifp->lock);
+		if (!(ifp->flags & IFA_F_TENTATIVE)) {
+			spin_unlock_bh(&ifp->lock);
+			continue;
+		}
+		spin_unlock_bh(&ifp->lock);
+		addrconf_dad_kick(ifp);
+	}
+	read_unlock_bh(&idev->lock);
+}
+
 #ifdef CONFIG_PROC_FS
 struct if6_iter_state {
 	int bucket;
-- 
cgit v1.2.3-70-g09d2


From 6b3ae80a63e47f6e97d68a1ddd520e3509e62821 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 21 Dec 2005 22:58:01 +0900
Subject: [IPV6]: Don't select a tentative address as a source address.

A tentative address is not considered "assigned to an interface"
in the traditional sense (RFC2462 Section 4).
Don't try to select such an address for the source address.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/addrconf.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f6ead6a843e..fd03c394436 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -908,11 +908,18 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 
 			score.addr_type = __ipv6_addr_type(&ifa->addr);
 
-			/* Rule 0: Candidate Source Address (section 4)
+			/* Rule 0:
+			 * - Tentative Address (RFC2462 section 5.4)
+			 *  - A tentative address is not considered
+			 *    "assigned to an interface" in the traditional
+			 *    sense.
+			 * - Candidate Source Address (section 4)
 			 *  - In any case, anycast addresses, multicast
 			 *    addresses, and the unspecified address MUST
 			 *    NOT be included in a candidate set.
 			 */
+			if (ifa->flags & IFA_F_TENTATIVE)
+				continue;
 			if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
 				     score.addr_type & IPV6_ADDR_MULTICAST)) {
 				LIMIT_NETDEBUG(KERN_DEBUG
-- 
cgit v1.2.3-70-g09d2


From 0d77d59f6293438f25e0560172699c0d3e4ef5ac Mon Sep 17 00:00:00 2001
From: Mika Kukkonen <mikukkon@iki.fi>
Date: Wed, 21 Dec 2005 18:38:26 -0800
Subject: [NETROM]: Fix three if-statements in nr_state1_machine()

I found these while compiling with extra gcc warnings;
considering the indenting surely they are not intentional?

Signed-off-by: Mika Kukkonen <mikukkon@iki.fi>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/nr_in.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 004e8599b8f..a7d88b5ad75 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -99,7 +99,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
 		break;
 
 	case NR_RESET:
-		if (sysctl_netrom_reset_circuit);
+		if (sysctl_netrom_reset_circuit)
 			nr_disconnect(sk, ECONNRESET);
 		break;
 
@@ -130,7 +130,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
 		break;
 
 	case NR_RESET:
-		if (sysctl_netrom_reset_circuit);
+		if (sysctl_netrom_reset_circuit)
 			nr_disconnect(sk, ECONNRESET);
 		break;
 
@@ -265,7 +265,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
 		break;
 
 	case NR_RESET:
-		if (sysctl_netrom_reset_circuit);
+		if (sysctl_netrom_reset_circuit)
 			nr_disconnect(sk, ECONNRESET);
 		break;
 
-- 
cgit v1.2.3-70-g09d2


From 7eb1b3d372a53fe9220b9e3b579886db0fe2f897 Mon Sep 17 00:00:00 2001
From: Mika Kukkonen <mikukkon@iki.fi>
Date: Wed, 21 Dec 2005 18:39:49 -0800
Subject: [VLAN]: Add two missing checks to vlan_ioctl_handler()

In vlan_ioctl_handler() the code misses couple checks for
error return values.

Signed-off-by: Mika Kukkonen <mikukkon@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 91e412b0ab0..67465b65abe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -753,6 +753,8 @@ static int vlan_ioctl_handler(void __user *arg)
 		break;
 	case GET_VLAN_REALDEV_NAME_CMD:
 		err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
+		if (err)
+			goto out;
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args))) {
 			err = -EFAULT;
@@ -761,6 +763,8 @@ static int vlan_ioctl_handler(void __user *arg)
 
 	case GET_VLAN_VID_CMD:
 		err = vlan_dev_get_vid(args.device1, &vid);
+		if (err)
+			goto out;
 		args.u.VID = vid;
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args))) {
@@ -774,7 +778,7 @@ static int vlan_ioctl_handler(void __user *arg)
 			__FUNCTION__, args.cmd);
 		return -EINVAL;
 	};
-
+out:
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1d1428045c54ef3d172d480806e2066dde0b4b76 Mon Sep 17 00:00:00 2001
From: Kristian Slavov <kristian.slavov@nomadiclab.com>
Date: Wed, 21 Dec 2005 18:47:24 -0800
Subject: [IPV6]: Fix address deletion

If you add more than one IPv6 address belonging to the same prefix and
delete the address that was last added, routing table entry for that
prefix is also deleted.
Tested on 2.6.14.4

To reproduce:
ip addr add 3ffe::1/64 dev eth0
ip addr add 3ffe::2/64 dev eth0
/* wait DAD */
sleep 1
ip addr del 3ffe::2/64 dev eth0
ip -6 route

(route to 3ffe::/64 should be gone)

In ipv6_del_addr(), if ifa == ifp, we set ifa->if_next to NULL, and later
assign ifap = &ifa->if_next, effectively terminating the for-loop.
This prevents us from checking if there are other addresses using the same
prefix that are valid, and thus resulting in deletion of the prefix.
This applies only if the first entry in idev->addr_list is the address to
be deleted.

Signed-off-by: Kristian Slavov <kristian.slavov@nomadiclab.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ea8cf7c0cc..e717a034c95 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -634,8 +634,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	}
 #endif
 
-	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
-	     ifap = &ifa->if_next) {
+	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
 		if (ifa == ifp) {
 			*ifap = ifa->if_next;
 			__in6_ifa_put(ifp);
@@ -643,6 +642,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
 				break;
 			deleted = 1;
+			continue;
 		} else if (ifp->flags & IFA_F_PERMANENT) {
 			if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
 					      ifp->prefix_len)) {
@@ -666,6 +666,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				}
 			}
 		}
+		ifap = &ifa->if_next;
 	}
 	write_unlock_bh(&idev->lock);
 
-- 
cgit v1.2.3-70-g09d2


From 4c7e6895027362889422e5dc437dc3238b6b4745 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdnzl@gmail.com>
Date: Wed, 21 Dec 2005 19:02:39 -0800
Subject: [DCCP]: Comment typo

I hope to actually change this behaviour shortly but this will help
anybody grepping code at present.

Signed-off-by: Ian McDonald <imcdnzl@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ca03521112c..656e13e38cf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1251,7 +1251,7 @@ static int dccp_v4_destroy_sock(struct sock *sk)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	/*
-	 * DCCP doesn't use sk_qrite_queue, just sk_send_head
+	 * DCCP doesn't use sk_write_queue, just sk_send_head
 	 * for retransmissions
 	 */
 	if (sk->sk_send_head != NULL) {
-- 
cgit v1.2.3-70-g09d2


From 9b78a82c1cf19aa813bdaa184fa840a3ba811750 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 22 Dec 2005 07:39:48 -0800
Subject: [IPSEC]: Fix policy updates missed by sockets

The problem is that when new policies are inserted, sockets do not see
the update (but all new route lookups do).

This bug is related to the SA insertion stale route issue solved
recently, and this policy visibility problem can be fixed in a similar
way.

The fix is to flush out the bundles of all policies deeper than the
policy being inserted.  Consider beginning state of "outgoing"
direction policy list:

	policy A --> policy B --> policy C --> policy D

First, realize that inserting a policy into a list only potentially
changes IPSEC routes for that direction.  Therefore we need not bother
considering the policies for other directions.  We need only consider
the existing policies in the list we are doing the inserting.

Consider new policy "B'", inserted after B.

	policy A --> policy B --> policy B' --> policy C --> policy D

Two rules:

1) If policy A or policy B matched before the insertion, they
   appear before B' and thus would still match after inserting
   B'

2) Policy C and D, now "shadowed" and after policy B', potentially
   contain stale routes because policy B' might be selected
   instead of them.

Therefore we only need flush routes assosciated with policies
appearing after a newly inserted policy, if any.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 54a4be6a7d2..d19e274b9c4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -346,6 +346,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct xfrm_policy *pol, **p;
 	struct xfrm_policy *delpol = NULL;
 	struct xfrm_policy **newpos = NULL;
+	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
@@ -381,9 +382,36 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 		xfrm_pol_hold(policy);
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (delpol) {
+	if (delpol)
 		xfrm_policy_kill(delpol);
+
+	read_lock_bh(&xfrm_policy_lock);
+	gc_list = NULL;
+	for (policy = policy->next; policy; policy = policy->next) {
+		struct dst_entry *dst;
+
+		write_lock(&policy->lock);
+		dst = policy->bundles;
+		if (dst) {
+			struct dst_entry *tail = dst;
+			while (tail->next)
+				tail = tail->next;
+			tail->next = gc_list;
+			gc_list = dst;
+
+			policy->bundles = NULL;
+		}
+		write_unlock(&policy->lock);
 	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		struct dst_entry *dst = gc_list;
+
+		gc_list = dst->next;
+		dst_free(dst);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
-- 
cgit v1.2.3-70-g09d2


From 3dd3bf83574e38578fc9741c0e23e4fa7f7ff96e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 23 Dec 2005 11:23:21 -0800
Subject: [IPV6]: Fix dead lock.

We need to relesae ifp->lock before we call addrconf_dad_stop(),
which will hold ifp->lock.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 510220f2ae8..d805241e439 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2467,9 +2467,11 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		return;
 	}
 
-	if (idev->if_flags & IF_READY)
+	if (idev->if_flags & IF_READY) {
 		addrconf_dad_kick(ifp);
-	else {
+		spin_unlock_bh(&ifp->lock);
+	} else {
+		spin_unlock_bh(&ifp->lock);
 		/*
 		 * If the defice is not ready:
 		 * - keep it tentative if it is a permanent address.
@@ -2478,8 +2480,6 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		in6_ifa_hold(ifp);
 		addrconf_dad_stop(ifp);
 	}
-
-	spin_unlock_bh(&ifp->lock);
 out:
 	read_unlock_bh(&idev->lock);
 }
-- 
cgit v1.2.3-70-g09d2


From 291d809ba5c8d4d6d8812e3f185bdf57d539f594 Mon Sep 17 00:00:00 2001
From: Hiroyuki YAMAMORI <h-yamamo@db3.so-net.ne.jp>
Date: Fri, 23 Dec 2005 11:24:05 -0800
Subject: [IPV6]: Fix Temporary Address Generation

From: Hiroyuki YAMAMORI <h-yamamo@db3.so-net.ne.jp>

Since regen_count is stored in the public address, we need to reset it
when we start renewing temporary address.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d805241e439..2a6439e3c91 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2782,6 +2782,9 @@ restart:
 						in6_ifa_hold(ifpub);
 						spin_unlock(&ifp->lock);
 						read_unlock(&addrconf_hash_lock);
+						spin_lock(&ifpub->lock);
+						ifpub->regen_count = 0;
+						spin_unlock(&ifpub->lock);
 						ipv6_create_tempaddr(ifpub, ifp);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
-- 
cgit v1.2.3-70-g09d2


From 6f4353d891b5e477528cd0b996e0263fecdf5d5f Mon Sep 17 00:00:00 2001
From: David L Stevens <dlstevens@us.ibm.com>
Date: Mon, 26 Dec 2005 17:03:46 -0800
Subject: [IPV6]: Increase default MLD_MAX_MSF to 64.

The existing default of 10 is just way too low.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fd939da090c..057d8619ba1 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -170,7 +170,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 #define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
 #define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
 
-#define IPV6_MLD_MAX_MSF	10
+#define IPV6_MLD_MAX_MSF	64
 
 int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
 
-- 
cgit v1.2.3-70-g09d2


From 79cac2a221ce18642550a13bed0f0203514923ea Mon Sep 17 00:00:00 2001
From: David Kimdon <david.kimdon@devicescape.com>
Date: Mon, 26 Dec 2005 17:27:10 -0800
Subject: [BR_NETFILTER]: Fix leak if skb traverses > 1 bridge

Call nf_bridge_put() before allocating a new nf_bridge structure and
potentially overwriting the pointer to a previously allocated one.
This fixes a memory leak which can occur when the bridge topology
allows for an skb to traverse more than one bridge.

Signed-off-by: David Kimdon <david.kimdon@devicescape.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 43a0b35dfe6..23422bd53a5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -369,6 +369,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
 			goto inhdr_error;
 
+ 	nf_bridge_put(skb->nf_bridge);
 	if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
 		return NF_DROP;
 	setup_pre_routing(skb);
@@ -452,6 +453,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 			skb->ip_summed = CHECKSUM_NONE;
 	}
 
+ 	nf_bridge_put(skb->nf_bridge);
 	if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
 		return NF_DROP;
 	setup_pre_routing(skb);
-- 
cgit v1.2.3-70-g09d2


From 6732badee0dad467fcc9dd0168af8677b2b1bc2f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 27 Dec 2005 13:35:15 -0800
Subject: [IPV6]: Fix addrconf dead lock.

We need to release idev->lcok before we call addrconf_dad_stop().
It calls ipv6_addr_del(), which will hold idev->lock.

Bug spotted by Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2a6439e3c91..a60585fd85a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2467,11 +2467,9 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		return;
 	}
 
-	if (idev->if_flags & IF_READY) {
-		addrconf_dad_kick(ifp);
-		spin_unlock_bh(&ifp->lock);
-	} else {
+	if (!(idev->if_flags & IF_READY)) {
 		spin_unlock_bh(&ifp->lock);
+		read_unlock_bh(&idev->lock);
 		/*
 		 * If the defice is not ready:
 		 * - keep it tentative if it is a permanent address.
@@ -2479,7 +2477,10 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		 */
 		in6_ifa_hold(ifp);
 		addrconf_dad_stop(ifp);
+		return;
 	}
+	addrconf_dad_kick(ifp);
+	spin_unlock_bh(&ifp->lock);
 out:
 	read_unlock_bh(&idev->lock);
 }
-- 
cgit v1.2.3-70-g09d2


From 1b93ae64cabe5e28dd5a1f35f96f938ca4f6ae20 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 27 Dec 2005 13:57:59 -0800
Subject: [NET]: Validate socket filters against BPF_MAXINSNS in one spot.

Currently the checks are scattered all over and this leads
to inconsistencies and even cases where the check is not made.

Based upon a patch from Kris Katterjohn.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c | 3 ---
 net/core/filter.c         | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 50430f79f8c..1c6d328165b 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -524,9 +524,6 @@ static int get_filter(void __user *arg, struct sock_filter **p)
 	if (copy_from_user(&uprog, arg, sizeof(uprog)))
 		return -EFAULT;
 
-	if (uprog.len > BPF_MAXINSNS)
-		return -EINVAL;
-
 	if (!uprog.len) {
 		*p = NULL;
 		return 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index 2841bfce29d..3a10e0bc90e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -293,7 +293,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 	struct sock_filter *ftest;
 	int pc;
 
-	if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
+	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
 
 	/* check the filter code now */
@@ -360,7 +360,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	int err;
 
 	/* Make sure new filter is there and in the right amounts. */
-        if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+        if (fprog->filter == NULL)
                 return -EINVAL;
 
 	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
-- 
cgit v1.2.3-70-g09d2


From 5ab4a6c81eb3dbe32361791d1535f9153f79b0ed Mon Sep 17 00:00:00 2001
From: David L Stevens <dlstevens@us.ibm.com>
Date: Tue, 27 Dec 2005 14:03:00 -0800
Subject: [IPV6] mcast: Fix multiple issues in MLDv2 reports.

The below "jumbo" patch fixes the following problems in MLDv2.

1) Add necessary "ntohs" to recent "pskb_may_pull" check [breaks
        all nonzero source queries on little-endian (!)]

2) Add locking to source filter list [resend of prior patch]

3) fix "mld_marksources()" to
        a) send nothing when all queried sources are excluded
        b) send full exclude report when source queried sources are
                not excluded
        c) don't schedule a timer when there's nothing to report

NOTE: RFC 3810 specifies the source list should be saved and each
  source reported individually as an IS_IN. This is an obvious DOS
  path, requiring the host to store and then multicast as many sources
  as are queried (e.g., millions...). This alternative sends a full,
  relevant report that's limited to number of sources present on the
  machine.

4) fix "add_grec()" to send empty-source records when it should
        The original check doesn't account for a non-empty source
        list with all sources inactive; the new code keeps that
        short-circuit case, and also generates the group header
        with an empty list if needed.

5) fix mca_crcount decrement to be after add_grec(), which needs
        its original value

These issues (other than item #1 ;-) ) were all found by Yan Zheng,
much thanks!

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |   1 +
 net/ipv6/mcast.c       | 140 ++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 111 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d8234f9bd4c..eb8afe3499a 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -83,6 +83,7 @@ struct ipv6_mc_socklist
 	struct in6_addr		addr;
 	int			ifindex;
 	struct ipv6_mc_socklist *next;
+	rwlock_t		sflock;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip6_sf_socklist	*sflist;
 };
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 057d8619ba1..f829a4ad3cc 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -224,6 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 
 	mc_lst->ifindex = dev->ifindex;
 	mc_lst->sfmode = MCAST_EXCLUDE;
+	mc_lst->sflock = RW_LOCK_UNLOCKED;
 	mc_lst->sflist = NULL;
 
 	/*
@@ -360,6 +361,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	struct ip6_sf_socklist *psl;
 	int i, j, rv;
 	int leavegroup = 0;
+	int pmclocked = 0;
 	int err;
 
 	if (pgsr->gsr_group.ss_family != AF_INET6 ||
@@ -403,6 +405,9 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
+	write_lock_bh(&pmc->sflock);
+	pmclocked = 1;
+
 	psl = pmc->sflist;
 	if (!add) {
 		if (!psl)
@@ -475,6 +480,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	/* update the interface list */
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
+	if (pmclocked)
+		write_unlock_bh(&pmc->sflock);
 	read_unlock_bh(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	in6_dev_put(idev);
@@ -510,6 +517,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	dev = idev->dev;
 
 	err = 0;
+	read_lock_bh(&ipv6_sk_mc_lock);
+
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
 		goto done;
@@ -549,6 +558,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		newpsl = NULL;
 		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
 	}
+
+	write_lock_bh(&pmc->sflock);
 	psl = pmc->sflist;
 	if (psl) {
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -558,8 +569,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
 	pmc->sflist = newpsl;
 	pmc->sfmode = gsf->gf_fmode;
+	write_unlock_bh(&pmc->sflock);
 	err = 0;
 done:
+	read_unlock_bh(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	in6_dev_put(idev);
 	dev_put(dev);
@@ -592,6 +605,11 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
+	/*
+	 * changes to the ipv6_mc_list require the socket lock and
+	 * a read lock on ip6_sk_mc_lock. We have the socket lock,
+	 * so reading the list is safe.
+	 */
 
 	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
 		if (pmc->ifindex != gsf->gf_interface)
@@ -614,6 +632,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
 		return -EFAULT;
 	}
+	/* changes to psl require the socket lock, a read lock on
+	 * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
+	 * have the socket lock, so reading here is safe.
+	 */
 	for (i=0; i<copycount; i++) {
 		struct sockaddr_in6 *psin6;
 		struct sockaddr_storage ss;
@@ -650,6 +672,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
 		read_unlock(&ipv6_sk_mc_lock);
 		return 1;
 	}
+	read_lock(&mc->sflock);
 	psl = mc->sflist;
 	if (!psl) {
 		rv = mc->sfmode == MCAST_EXCLUDE;
@@ -665,6 +688,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
 		if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
 			rv = 0;
 	}
+	read_unlock(&mc->sflock);
 	read_unlock(&ipv6_sk_mc_lock);
 
 	return rv;
@@ -1068,7 +1092,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 	ma->mca_flags |= MAF_TIMER_RUNNING;
 }
 
-static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+/* mark EXCLUDE-mode sources */
+static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
 	struct in6_addr *srcs)
 {
 	struct ip6_sf_list *psf;
@@ -1078,13 +1103,53 @@ static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
 		if (scount == nsrcs)
 			break;
-		for (i=0; i<nsrcs; i++)
+		for (i=0; i<nsrcs; i++) {
+			/* skip inactive filters */
+			if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+			    pmc->mca_sfcount[MCAST_EXCLUDE] !=
+			    psf->sf_count[MCAST_EXCLUDE])
+				continue;
+			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+				scount++;
+				break;
+			}
+		}
+	}
+	pmc->mca_flags &= ~MAF_GSQUERY;
+	if (scount == nsrcs)	/* all sources excluded */
+		return 0;
+	return 1;
+}
+
+static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+	struct in6_addr *srcs)
+{
+	struct ip6_sf_list *psf;
+	int i, scount;
+
+	if (pmc->mca_sfmode == MCAST_EXCLUDE)
+		return mld_xmarksources(pmc, nsrcs, srcs);
+
+	/* mark INCLUDE-mode sources */
+
+	scount = 0;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
+		for (i=0; i<nsrcs; i++) {
 			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
 				psf->sf_gsresp = 1;
 				scount++;
 				break;
 			}
+		}
+	}
+	if (!scount) {
+		pmc->mca_flags &= ~MAF_GSQUERY;
+		return 0;
 	}
+	pmc->mca_flags |= MAF_GSQUERY;
+	return 1;
 }
 
 int igmp6_event_query(struct sk_buff *skb)
@@ -1167,7 +1232,7 @@ int igmp6_event_query(struct sk_buff *skb)
 		/* mark sources to include, if group & source-specific */
 		if (mlh2->nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset + 
-				mlh2->nsrcs * sizeof(struct in6_addr))) {
+			    ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) {
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
@@ -1203,10 +1268,9 @@ int igmp6_event_query(struct sk_buff *skb)
 				else
 					ma->mca_flags &= ~MAF_GSQUERY;
 			}
-			if (ma->mca_flags & MAF_GSQUERY)
-				mld_marksources(ma, ntohs(mlh2->nsrcs),
-					mlh2->srcs);
-			igmp6_group_queried(ma, max_delay);
+			if (!(ma->mca_flags & MAF_GSQUERY) ||
+			   mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs))
+				igmp6_group_queried(ma, max_delay);
 			spin_unlock_bh(&ma->mca_lock);
 			if (group_type != IPV6_ADDR_ANY)
 				break;
@@ -1281,7 +1345,18 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
 	case MLD2_MODE_IS_EXCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
-		return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
+		if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
+			if (pmc->mca_sfmode == MCAST_INCLUDE)
+				return 1;
+			/* don't include if this source is excluded
+			 * in all filters
+			 */
+			if (psf->sf_count[MCAST_INCLUDE])
+				return 0;
+			return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+				psf->sf_count[MCAST_EXCLUDE];
+		}
+		return 0;
 	case MLD2_CHANGE_TO_INCLUDE:
 		if (gdeleted || sdeleted)
 			return 0;
@@ -1450,7 +1525,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	struct mld2_report *pmr;
 	struct mld2_grec *pgr = NULL;
 	struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
-	int scount, first, isquery, truncate;
+	int scount, stotal, first, isquery, truncate;
 
 	if (pmc->mca_flags & MAF_NOREPORT)
 		return skb;
@@ -1460,25 +1535,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	truncate = type == MLD2_MODE_IS_EXCLUDE ||
 		    type == MLD2_CHANGE_TO_EXCLUDE;
 
+	stotal = scount = 0;
+
 	psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
 
-	if (!*psf_list) {
-		if (type == MLD2_ALLOW_NEW_SOURCES ||
-		    type == MLD2_BLOCK_OLD_SOURCES)
-			return skb;
-		if (pmc->mca_crcount || isquery) {
-			/* make sure we have room for group header and at
-			 * least one source.
-			 */
-			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
-			    sizeof(struct in6_addr)) {
-				mld_sendpack(skb);
-				skb = NULL; /* add_grhead will get a new one */
-			}
-			skb = add_grhead(skb, pmc, type, &pgr);
-		}
-		return skb;
-	}
+	if (!*psf_list)
+		goto empty_source;
+
 	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
@@ -1491,7 +1554,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 	}
 	first = 1;
-	scount = 0;
 	psf_prev = NULL;
 	for (psf=*psf_list; psf; psf=psf_next) {
 		struct in6_addr *psrc;
@@ -1525,7 +1587,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 		psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
 		*psrc = psf->sf_addr;
-		scount++;
+		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
 		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
 			psf->sf_crcount--;
@@ -1540,6 +1602,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 		psf_prev = psf;
 	}
+
+empty_source:
+	if (!stotal) {
+		if (type == MLD2_ALLOW_NEW_SOURCES ||
+		    type == MLD2_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->mca_crcount || isquery) {
+			/* make sure we have room for group header */
+			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
+				mld_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+	}
 	if (pgr)
 		pgr->grec_nsrcs = htons(scount);
 
@@ -1621,11 +1698,11 @@ static void mld_send_cr(struct inet6_dev *idev)
 			skb = add_grec(skb, pmc, dtype, 1, 1);
 		}
 		if (pmc->mca_crcount) {
-			pmc->mca_crcount--;
 			if (pmc->mca_sfmode == MCAST_EXCLUDE) {
 				type = MLD2_CHANGE_TO_INCLUDE;
 				skb = add_grec(skb, pmc, type, 1, 0);
 			}
+			pmc->mca_crcount--;
 			if (pmc->mca_crcount == 0) {
 				mld_clear_zeros(&pmc->mca_tomb);
 				mld_clear_zeros(&pmc->mca_sources);
@@ -1659,12 +1736,12 @@ static void mld_send_cr(struct inet6_dev *idev)
 
 		/* filter mode changes */
 		if (pmc->mca_crcount) {
-			pmc->mca_crcount--;
 			if (pmc->mca_sfmode == MCAST_EXCLUDE)
 				type = MLD2_CHANGE_TO_EXCLUDE;
 			else
 				type = MLD2_CHANGE_TO_INCLUDE;
 			skb = add_grec(skb, pmc, type, 0, 0);
+			pmc->mca_crcount--;
 		}
 		spin_unlock_bh(&pmc->mca_lock);
 	}
@@ -2023,6 +2100,9 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 {
 	int err;
 
+	/* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+	 * so no other readers or writers of iml or its sflist
+	 */
 	if (iml->sflist == 0) {
 		/* any-source empty exclude case */
 		return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
-- 
cgit v1.2.3-70-g09d2