From 54db0cc2ba0d38166acc2d6bae21721405305537 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Fri, 8 Jun 2012 01:21:40 +0000
Subject: inetpeer: add parameter net for inet_getpeer_v4,v6

add struct net as a parameter of inet_getpeer_v[4,6],
use net to replace &init_net.

and modify some places to provide net for inet_getpeer_v[4,6]

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 999a982ad3f..4eca0130cce 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -306,9 +306,10 @@ static u32 rt6_peer_genid(void)
 
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+	peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
 	else
-- 
cgit v1.2.3-70-g09d2


From fbfe95a42e90b3dd079cc9019ba7d7700feee0f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 8 Jun 2012 23:24:18 -0700
Subject: inet: Create and use rt{,6}_get_peer_create().

There's a lot of places that open-code rt{,6}_get_peer() only because
they want to set 'create' to one.  So add an rt{,6}_get_peer_create()
for their sake.

There were also a few spots open-coding plain rt{,6}_get_peer() and
those are transformed here as well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 17 +++++++++++++----
 include/net/route.h     | 14 ++++++++++++--
 net/ipv4/icmp.c         |  5 ++---
 net/ipv4/route.c        | 35 +++++++++--------------------------
 net/ipv4/tcp_ipv4.c     |  4 +---
 net/ipv6/icmp.c         |  6 +++---
 net/ipv6/ip6_output.c   | 11 ++++-------
 net/ipv6/ndisc.c        |  6 +++---
 net/ipv6/route.c        |  5 +----
 net/ipv6/tcp_ipv6.c     |  4 +---
 10 files changed, 49 insertions(+), 58 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 37c1a1ed82c..73d75028812 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -53,18 +53,27 @@ static inline unsigned int rt6_flags2srcprefs(int flags)
 	return (flags >> 3) & 7;
 }
 
-extern void			rt6_bind_peer(struct rt6_info *rt,
-					      int create);
+extern void rt6_bind_peer(struct rt6_info *rt, int create);
 
-static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt)
+static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
 {
 	if (rt->rt6i_peer)
 		return rt->rt6i_peer;
 
-	rt6_bind_peer(rt, 0);
+	rt6_bind_peer(rt, create);
 	return rt->rt6i_peer;
 }
 
+static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt)
+{
+	return __rt6_get_peer(rt, 0);
+}
+
+static inline struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
+{
+	return __rt6_get_peer(rt, 1);
+}
+
 extern void			ip6_route_input(struct sk_buff *skb);
 
 extern struct dst_entry *	ip6_route_output(struct net *net,
diff --git a/include/net/route.h b/include/net/route.h
index ed2b78e2375..433fc6c1d40 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -296,15 +296,25 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable
 
 extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create);
 
-static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr)
+static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create)
 {
 	if (rt->peer)
 		return rt->peer;
 
-	rt_bind_peer(rt, daddr, 0);
+	rt_bind_peer(rt, daddr, create);
 	return rt->peer;
 }
 
+static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr)
+{
+	return __rt_get_peer(rt, daddr, 0);
+}
+
+static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr)
+{
+	return __rt_get_peer(rt, daddr, 1);
+}
+
 static inline int inet_iif(const struct sk_buff *skb)
 {
 	return skb_rtable(skb)->rt_iif;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c75efbdc71c..0c78ef1e5dd 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 
 	/* Limit if icmp type is enabled in ratemask. */
 	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
-		if (!rt->peer)
-			rt_bind_peer(rt, fl4->daddr, 1);
-		rc = inet_peer_xrlim_allow(rt->peer,
+		struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
+		rc = inet_peer_xrlim_allow(peer,
 					   net->ipv4.sysctl_icmp_ratelimit);
 	}
 out:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2c9f73f3b7c..7a4d724765e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 	struct inet_peer *peer;
 	u32 *p = NULL;
 
-	if (!rt->peer)
-		rt_bind_peer(rt, rt->rt_dst, 1);
-
-	peer = rt->peer;
+	peer = rt_get_peer_create(rt, rt->rt_dst);
 	if (peer) {
 		u32 *old_p = __DST_METRICS_PTR(old);
 		unsigned long prev, new;
@@ -1364,14 +1361,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 	struct rtable *rt = (struct rtable *) dst;
 
 	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		if (rt->peer == NULL)
-			rt_bind_peer(rt, rt->rt_dst, 1);
+		struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst);
 
 		/* If peer is attached to destination, it is never detached,
 		   so that we need not to grab a lock to dereference it.
 		 */
-		if (rt->peer) {
-			iph->id = htons(inet_getid(rt->peer, more));
+		if (peer) {
+			iph->id = htons(inet_getid(peer, more));
 			return;
 		}
 	} else if (!rt)
@@ -1481,10 +1477,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				    rt->rt_gateway != old_gw)
 					continue;
 
-				if (!rt->peer)
-					rt_bind_peer(rt, rt->rt_dst, 1);
-
-				peer = rt->peer;
+				peer = rt_get_peer_create(rt, rt->rt_dst);
 				if (peer) {
 					if (peer->redirect_learned.a4 != new_gw) {
 						peer->redirect_learned.a4 = new_gw;
@@ -1579,9 +1572,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
 	rcu_read_unlock();
 
-	if (!rt->peer)
-		rt_bind_peer(rt, rt->rt_dst, 1);
-	peer = rt->peer;
+	peer = rt_get_peer_create(rt, rt->rt_dst);
 	if (!peer) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
 		return;
@@ -1646,9 +1637,7 @@ static int ip_error(struct sk_buff *skb)
 		break;
 	}
 
-	if (!rt->peer)
-		rt_bind_peer(rt, rt->rt_dst, 1);
-	peer = rt->peer;
+	peer = rt_get_peer_create(rt, rt->rt_dst);
 
 	send = true;
 	if (peer) {
@@ -1754,9 +1743,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 	dst_confirm(dst);
 
-	if (!rt->peer)
-		rt_bind_peer(rt, rt->rt_dst, 1);
-	peer = rt->peer;
+	peer = rt_get_peer_create(rt, rt->rt_dst);
 	if (peer) {
 		unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
 
@@ -1782,12 +1769,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 static void ipv4_validate_peer(struct rtable *rt)
 {
 	if (rt->rt_peer_genid != rt_peer_genid()) {
-		struct inet_peer *peer;
-
-		if (!rt->peer)
-			rt_bind_peer(rt, rt->rt_dst, 0);
+		struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst);
 
-		peer = rt->peer;
 		if (peer) {
 			check_peer_pmtu(&rt->dst, peer);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f485b451f92..833e8d96a63 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1832,9 +1832,7 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
 		peer = inet_getpeer_v4(net, inet->inet_daddr, 1);
 		*release_it = true;
 	} else {
-		if (!rt->peer)
-			rt_bind_peer(rt, inet->inet_daddr, 1);
-		peer = rt->peer;
+		peer = rt_get_peer_create(rt, inet->inet_daddr);
 		*release_it = false;
 	}
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 091a2971c7b..ed89bba745a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -188,14 +188,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 	} else {
 		struct rt6_info *rt = (struct rt6_info *)dst;
 		int tmo = net->ipv6.sysctl.icmpv6_time;
+		struct inet_peer *peer;
 
 		/* Give more bandwidth to wider prefixes. */
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		if (!rt->rt6i_peer)
-			rt6_bind_peer(rt, 1);
-		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
+		peer = rt6_get_peer_create(rt);
+		res = inet_peer_xrlim_allow(peer, tmo);
 	}
 	dst_release(dst);
 	return res;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 17b8c67998b..62fcf3e48ac 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -463,6 +463,7 @@ int ip6_forward(struct sk_buff *skb)
 	 */
 	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 		struct in6_addr *target = NULL;
+		struct inet_peer *peer;
 		struct rt6_info *rt;
 
 		/*
@@ -476,13 +477,12 @@ int ip6_forward(struct sk_buff *skb)
 		else
 			target = &hdr->daddr;
 
-		if (!rt->rt6i_peer)
-			rt6_bind_peer(rt, 1);
+		peer = rt6_get_peer_create(rt);
 
 		/* Limit redirects both by destination (here)
 		   and by source (inside ndisc_send_redirect)
 		 */
-		if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+		if (inet_peer_xrlim_allow(peer, 1*HZ))
 			ndisc_send_redirect(skb, target);
 	} else {
 		int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -602,11 +602,8 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 	int old, new;
 
 	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		struct inet_peer *peer;
+		struct inet_peer *peer = rt6_get_peer_create(rt);
 
-		if (!rt->rt6i_peer)
-			rt6_bind_peer(rt, 1);
-		peer = rt->rt6i_peer;
 		if (peer) {
 			fhdr->identification = htonl(inet_getid(peer, 0));
 			return;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 54f62d3b8dd..69a6330dea9 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1472,6 +1472,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	struct net *net = dev_net(dev);
 	struct sock *sk = net->ipv6.ndisc_sk;
 	int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	struct inet_peer *peer;
 	struct sk_buff *buff;
 	struct icmp6hdr *icmph;
 	struct in6_addr saddr_buf;
@@ -1518,9 +1519,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			  "Redirect: destination is not a neighbour\n");
 		goto release;
 	}
-	if (!rt->rt6i_peer)
-		rt6_bind_peer(rt, 1);
-	if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+	peer = rt6_get_peer_create(rt);
+	if (!inet_peer_xrlim_allow(peer, 1*HZ))
 		goto release;
 
 	if (dev->addr_len) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4eca0130cce..8a986be4aed 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -99,10 +99,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 	if (!(rt->dst.flags & DST_HOST))
 		return NULL;
 
-	if (!rt->rt6i_peer)
-		rt6_bind_peer(rt, 1);
-
-	peer = rt->rt6i_peer;
+	peer = rt6_get_peer_create(rt);
 	if (peer) {
 		u32 *old_p = __DST_METRICS_PTR(old);
 		unsigned long prev, new;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1a9cdd09f11..218433cb992 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1744,9 +1744,7 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 		peer = inet_getpeer_v6(net, &np->daddr, 1);
 		*release_it = true;
 	} else {
-		if (!rt->rt6i_peer)
-			rt6_bind_peer(rt, 1);
-		peer = rt->rt6i_peer;
+		peer = rt6_get_peer_create(rt);
 		*release_it = false;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From c3426b47190d7c6785230c91a706fd42208b4120 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 9 Jun 2012 16:27:05 -0700
Subject: inet: Initialize per-netns inetpeer roots in net/ipv{4,6}/route.c

Instead of net/ipv4/inetpeer.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  9 +++++++
 net/ipv4/inetpeer.c    | 64 +++++++++++++-------------------------------------
 net/ipv4/route.c       | 25 ++++++++++++++++++++
 net/ipv6/route.c       | 34 ++++++++++++++++++++++++++-
 4 files changed, 83 insertions(+), 49 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 20e67db69ac..5a96c8edc52 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -65,6 +65,14 @@ struct inet_peer {
 	atomic_t		refcnt;
 };
 
+struct inet_peer_base {
+	struct inet_peer __rcu	*root;
+	seqlock_t		lock;
+	int			total;
+};
+
+extern void inet_peer_base_init(struct inet_peer_base *);
+
 void			inet_initpeers(void) __init;
 
 #define INETPEER_METRICS_NEW	(~(u32) 0)
@@ -105,6 +113,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net,
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
+extern void __inetpeer_invalidate_tree(struct inet_peer_base *);
 extern void inetpeer_invalidate_tree(struct net *net, int family);
 
 /*
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 1c8527349c8..d0d72f89b27 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -82,11 +82,13 @@ static const struct inet_peer peer_fake_node = {
 	.avl_height	= 0
 };
 
-struct inet_peer_base {
-	struct inet_peer __rcu *root;
-	seqlock_t	lock;
-	int		total;
-};
+void inet_peer_base_init(struct inet_peer_base *bp)
+{
+	bp->root = peer_avl_empty_rcu;
+	seqlock_init(&bp->lock);
+	bp->total = 0;
+}
+EXPORT_SYMBOL_GPL(inet_peer_base_init);
 
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
 
@@ -141,46 +143,6 @@ static void inetpeer_gc_worker(struct work_struct *work)
 	schedule_delayed_work(&gc_work, gc_delay);
 }
 
-static int __net_init inetpeer_net_init(struct net *net)
-{
-	net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base),
-				  GFP_KERNEL);
-	if (net->ipv4.peers == NULL)
-		return -ENOMEM;
-
-	net->ipv4.peers->root = peer_avl_empty_rcu;
-	seqlock_init(&net->ipv4.peers->lock);
-
-	net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base),
-				  GFP_KERNEL);
-	if (net->ipv6.peers == NULL)
-		goto out_ipv6;
-
-	net->ipv6.peers->root = peer_avl_empty_rcu;
-	seqlock_init(&net->ipv6.peers->lock);
-
-	return 0;
-out_ipv6:
-	kfree(net->ipv4.peers);
-	return -ENOMEM;
-}
-
-static void __net_exit inetpeer_net_exit(struct net *net)
-{
-	inetpeer_invalidate_tree(net, AF_INET);
-	kfree(net->ipv4.peers);
-	net->ipv4.peers = NULL;
-
-	inetpeer_invalidate_tree(net, AF_INET6);
-	kfree(net->ipv6.peers);
-	net->ipv6.peers = NULL;
-}
-
-static struct pernet_operations inetpeer_ops = {
-	.init = inetpeer_net_init,
-	.exit = inetpeer_net_exit,
-};
-
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -205,7 +167,6 @@ void __init inet_initpeers(void)
 			NULL);
 
 	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
-	register_pernet_subsys(&inetpeer_ops);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -603,10 +564,9 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
 	schedule_delayed_work(&gc_work, gc_delay);
 }
 
-void inetpeer_invalidate_tree(struct net *net, int family)
+void __inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
 	struct inet_peer *old, *new, *prev;
-	struct inet_peer_base *base = family_to_base(net, family);
 
 	write_seqlock_bh(&base->lock);
 
@@ -625,4 +585,12 @@ void inetpeer_invalidate_tree(struct net *net, int family)
 out:
 	write_sequnlock_bh(&base->lock);
 }
+EXPORT_SYMBOL(__inetpeer_invalidate_tree);
+
+void inetpeer_invalidate_tree(struct net *net, int family)
+{
+	struct inet_peer_base *base = family_to_base(net, family);
+
+	__inetpeer_invalidate_tree(base);
+}
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7a4d724765e..4cd35c3904d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3385,6 +3385,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
 	.init = rt_genid_init,
 };
 
+static int __net_init ipv4_inetpeer_init(struct net *net)
+{
+	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+	if (!bp)
+		return -ENOMEM;
+	inet_peer_base_init(bp);
+	net->ipv4.peers = bp;
+	return 0;
+}
+
+static void __net_exit ipv4_inetpeer_exit(struct net *net)
+{
+	struct inet_peer_base *bp = net->ipv4.peers;
+
+	net->ipv4.peers = NULL;
+	__inetpeer_invalidate_tree(bp);
+	kfree(bp);
+}
+
+static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
+	.init	=	ipv4_inetpeer_init,
+	.exit	=	ipv4_inetpeer_exit,
+};
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
@@ -3465,6 +3489,7 @@ int __init ip_rt_init(void)
 	register_pernet_subsys(&sysctl_route_ops);
 #endif
 	register_pernet_subsys(&rt_genid_ops);
+	register_pernet_subsys(&ipv4_inetpeer_ops);
 	return rc;
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8a986be4aed..3e1e4e0da09 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2996,6 +2996,31 @@ static struct pernet_operations ip6_route_net_ops = {
 	.exit = ip6_route_net_exit,
 };
 
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+	if (!bp)
+		return -ENOMEM;
+	inet_peer_base_init(bp);
+	net->ipv6.peers = bp;
+	return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+	struct inet_peer_base *bp = net->ipv6.peers;
+
+	net->ipv6.peers = NULL;
+	__inetpeer_invalidate_tree(bp);
+	kfree(bp);
+}
+
+static __net_initdata struct pernet_operations ipv6_inetpeer_ops = {
+	.init	=	ipv6_inetpeer_init,
+	.exit	=	ipv6_inetpeer_exit,
+};
+
 static struct notifier_block ip6_route_dev_notifier = {
 	.notifier_call = ip6_route_dev_notify,
 	.priority = 0,
@@ -3020,6 +3045,10 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_dst_entries;
 
+	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
+	if (ret)
+		goto out_register_subsys;
+
 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
 
 	/* Registering of the loopback is done before this portion of code,
@@ -3035,7 +3064,7 @@ int __init ip6_route_init(void)
   #endif
 	ret = fib6_init();
 	if (ret)
-		goto out_register_subsys;
+		goto out_register_inetpeer;
 
 	ret = xfrm6_init();
 	if (ret)
@@ -3064,6 +3093,8 @@ xfrm6_init:
 	xfrm6_fini();
 out_fib6_init:
 	fib6_gc_cleanup();
+out_register_inetpeer:
+	unregister_pernet_subsys(&ipv6_inetpeer_ops);
 out_register_subsys:
 	unregister_pernet_subsys(&ip6_route_net_ops);
 out_dst_entries:
@@ -3079,6 +3110,7 @@ void ip6_route_cleanup(void)
 	fib6_rules_cleanup();
 	xfrm6_fini();
 	fib6_gc_cleanup();
+	unregister_pernet_subsys(&ipv6_inetpeer_ops);
 	unregister_pernet_subsys(&ip6_route_net_ops);
 	dst_entries_destroy(&ip6_dst_blackhole_ops);
 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
-- 
cgit v1.2.3-70-g09d2


From 56a6b248eb345c1948ee60bf426de1ff7dd81509 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 9 Jun 2012 16:32:41 -0700
Subject: inet: Consolidate inetpeer_invalidate_tree() interfaces.

We only need one interface for this operation, since we always know
which inetpeer root we want to flush.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  3 +--
 net/ipv4/inetpeer.c    | 10 +---------
 net/ipv4/route.c       |  4 ++--
 net/ipv6/route.c       |  2 +-
 4 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 5a96c8edc52..733edc641b7 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -113,8 +113,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net,
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
-extern void __inetpeer_invalidate_tree(struct inet_peer_base *);
-extern void inetpeer_invalidate_tree(struct net *net, int family);
+extern void inetpeer_invalidate_tree(struct inet_peer_base *);
 
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d0d72f89b27..9d89a381f0e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -564,7 +564,7 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
 	schedule_delayed_work(&gc_work, gc_delay);
 }
 
-void __inetpeer_invalidate_tree(struct inet_peer_base *base)
+void inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
 	struct inet_peer *old, *new, *prev;
 
@@ -585,12 +585,4 @@ void __inetpeer_invalidate_tree(struct inet_peer_base *base)
 out:
 	write_sequnlock_bh(&base->lock);
 }
-EXPORT_SYMBOL(__inetpeer_invalidate_tree);
-
-void inetpeer_invalidate_tree(struct net *net, int family)
-{
-	struct inet_peer_base *base = family_to_base(net, family);
-
-	__inetpeer_invalidate_tree(base);
-}
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4cd35c3904d..cf78343940d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -935,7 +935,7 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-	inetpeer_invalidate_tree(net, AF_INET);
+	inetpeer_invalidate_tree(net->ipv4.peers);
 }
 
 /*
@@ -3401,7 +3401,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net)
 	struct inet_peer_base *bp = net->ipv4.peers;
 
 	net->ipv4.peers = NULL;
-	__inetpeer_invalidate_tree(bp);
+	inetpeer_invalidate_tree(bp);
 	kfree(bp);
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3e1e4e0da09..7f346d7492d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3012,7 +3012,7 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
 	struct inet_peer_base *bp = net->ipv6.peers;
 
 	net->ipv6.peers = NULL;
-	__inetpeer_invalidate_tree(bp);
+	inetpeer_invalidate_tree(bp);
 	kfree(bp);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 2b823f72585552ef6fb77d6c081e55e047e879f0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 9 Jun 2012 19:00:16 -0700
Subject: ipv6: Do not mark ipv6_inetpeer_ops as __net_initdata.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7f346d7492d..9586c27e069 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3016,7 +3016,7 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
 	kfree(bp);
 }
 
-static __net_initdata struct pernet_operations ipv6_inetpeer_ops = {
+static struct pernet_operations ipv6_inetpeer_ops = {
 	.init	=	ipv6_inetpeer_init,
 	.exit	=	ipv6_inetpeer_exit,
 };
-- 
cgit v1.2.3-70-g09d2


From c0efc887dcadbdbfe171f028acfab9c7c00e9dde Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 9 Jun 2012 19:12:36 -0700
Subject: inet: Pass inetpeer root into inet_getpeer*() interfaces.

Otherwise we reference potentially non-existing members when
ipv6 is disabled.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 10 +++++-----
 net/ipv4/inetpeer.c    |  9 +--------
 net/ipv4/ip_fragment.c |  2 +-
 net/ipv4/route.c       |  6 +++---
 net/ipv6/route.c       |  2 +-
 5 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 733edc641b7..b84b32fd5df 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -83,11 +83,11 @@ static inline bool inet_metrics_new(const struct inet_peer *p)
 }
 
 /* can be called with or without local BH being disabled */
-struct inet_peer *inet_getpeer(struct net *net,
+struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
 			       int create);
 
-static inline struct inet_peer *inet_getpeer_v4(struct net *net,
+static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
 						__be32 v4daddr,
 						int create)
 {
@@ -95,10 +95,10 @@ static inline struct inet_peer *inet_getpeer_v4(struct net *net,
 
 	daddr.addr.a4 = v4daddr;
 	daddr.family = AF_INET;
-	return inet_getpeer(net, &daddr, create);
+	return inet_getpeer(base, &daddr, create);
 }
 
-static inline struct inet_peer *inet_getpeer_v6(struct net *net,
+static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
 						const struct in6_addr *v6daddr,
 						int create)
 {
@@ -106,7 +106,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net,
 
 	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(net, &daddr, create);
+	return inet_getpeer(base, &daddr, create);
 }
 
 /* can be called from BH context or outside */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9d89a381f0e..e4cba56a534 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -391,12 +391,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
 	call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 
-static struct inet_peer_base *family_to_base(struct net *net,
-					     int family)
-{
-	return family == AF_INET ? net->ipv4.peers : net->ipv6.peers;
-}
-
 /* perform garbage collect on all items stacked during a lookup */
 static int inet_peer_gc(struct inet_peer_base *base,
 			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
@@ -434,12 +428,11 @@ static int inet_peer_gc(struct inet_peer_base *base,
 	return cnt;
 }
 
-struct inet_peer *inet_getpeer(struct net *net,
+struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
 			       int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer_base *base = family_to_base(net, daddr->family);
 	struct inet_peer *p;
 	unsigned int sequence;
 	int invalidated, gccnt = 0;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 22c6bab9717..8d07c973409 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -184,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
 	qp->peer = sysctl_ipfrag_max_dist ?
-		inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL;
+		inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
 }
 
 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cf78343940d..2aa663a6ae9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1328,7 +1328,7 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
 	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(net, daddr, create);
+	peer = inet_getpeer_v4(net->ipv4.peers, daddr, create);
 
 	if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
@@ -1684,7 +1684,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
 	unsigned short est_mtu = 0;
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(net, iph->daddr, 1);
+	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
 	if (peer) {
 		unsigned short mtu = new_mtu;
 
@@ -1929,7 +1929,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 	if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
 		create = 1;
 
-	rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create);
+	rt->peer = peer = inet_getpeer_v4(net->ipv4.peers, rt->rt_dst, create);
 	if (peer) {
 		rt->rt_peer_genid = rt_peer_genid();
 		if (inet_metrics_new(peer))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9586c27e069..8fc41d502bb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -306,7 +306,7 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
 	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
+	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
 	else
-- 
cgit v1.2.3-70-g09d2


From 97bab73f987e2781129cd6f4b6379bf44d808cc6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 9 Jun 2012 22:36:36 -0700
Subject: inet: Hide route peer accesses behind helpers.

We encode the pointer(s) into an unsigned long with one state bit.

The state bit is used so we can store the inetpeer tree root to use
when resolving the peer later.

Later the peer roots will be per-FIB table, and this change works to
facilitate that.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h  | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 include/net/ip6_fib.h   | 32 +++++++++++++++++++++++++++-
 include/net/ip6_route.h |  6 +++---
 include/net/route.h     | 42 +++++++++++++++++++++++++++++++++----
 net/ipv4/route.c        | 56 +++++++++++++++++++++++++++++--------------------
 net/ipv4/xfrm4_policy.c | 10 ++++-----
 net/ipv6/route.c        | 42 +++++++++++++++++++++----------------
 net/ipv6/xfrm6_policy.c | 10 ++++-----
 8 files changed, 193 insertions(+), 59 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index b84b32fd5df..d432489e710 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -71,6 +71,60 @@ struct inet_peer_base {
 	int			total;
 };
 
+#define INETPEER_BASE_BIT	0x1UL
+
+static inline struct inet_peer *inetpeer_ptr(unsigned long val)
+{
+	BUG_ON(val & INETPEER_BASE_BIT);
+	return (struct inet_peer *) val;
+}
+
+static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val)
+{
+	if (!(val & INETPEER_BASE_BIT))
+		return NULL;
+	val &= ~INETPEER_BASE_BIT;
+	return (struct inet_peer_base *) val;
+}
+
+static inline bool inetpeer_ptr_is_peer(unsigned long val)
+{
+	return !(val & INETPEER_BASE_BIT);
+}
+
+static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer)
+{
+	/* This implicitly clears INETPEER_BASE_BIT */
+	*val = (unsigned long) peer;
+}
+
+static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer)
+{
+	unsigned long val = (unsigned long) peer;
+	unsigned long orig = *ptr;
+
+	if (!(orig & INETPEER_BASE_BIT) || !val ||
+	    cmpxchg(ptr, orig, val) != orig)
+		return false;
+	return true;
+}
+
+static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base)
+{
+	*ptr = (unsigned long) base | INETPEER_BASE_BIT;
+}
+
+static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from)
+{
+	unsigned long val = *from;
+
+	*to = val;
+	if (inetpeer_ptr_is_peer(val)) {
+		struct inet_peer *peer = inetpeer_ptr(val);
+		atomic_inc(&peer->refcnt);
+	}
+}
+
 extern void inet_peer_base_init(struct inet_peer_base *);
 
 void			inet_initpeers(void) __init;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0ae759a6c76..3ac5f155c69 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -107,7 +107,7 @@ struct rt6_info {
 	u32				rt6i_peer_genid;
 
 	struct inet6_dev		*rt6i_idev;
-	struct inet_peer		*rt6i_peer;
+	unsigned long			_rt6i_peer;
 
 #ifdef CONFIG_XFRM
 	u32				rt6i_flow_cache_genid;
@@ -118,6 +118,36 @@ struct rt6_info {
 	u8				rt6i_protocol;
 };
 
+static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt)
+{
+	return inetpeer_ptr(rt->_rt6i_peer);
+}
+
+static inline bool rt6_has_peer(struct rt6_info *rt)
+{
+	return inetpeer_ptr_is_peer(rt->_rt6i_peer);
+}
+
+static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer)
+{
+	__inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer);
+}
+
+static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer)
+{
+	return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer);
+}
+
+static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base)
+{
+	inetpeer_init_ptr(&rt->_rt6i_peer, base);
+}
+
+static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort)
+{
+	inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer);
+}
+
 static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 {
 	return ((struct rt6_info *)dst)->rt6i_idev;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 73d75028812..f88a85cf31c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -57,11 +57,11 @@ extern void rt6_bind_peer(struct rt6_info *rt, int create);
 
 static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
 {
-	if (rt->rt6i_peer)
-		return rt->rt6i_peer;
+	if (rt6_has_peer(rt))
+		return rt6_peer_ptr(rt);
 
 	rt6_bind_peer(rt, create);
-	return rt->rt6i_peer;
+	return rt6_peer_ptr(rt);
 }
 
 static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt)
diff --git a/include/net/route.h b/include/net/route.h
index 433fc6c1d40..6340c37677f 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -67,10 +67,44 @@ struct rtable {
 	/* Miscellaneous cached information */
 	__be32			rt_spec_dst; /* RFC1122 specific destination */
 	u32			rt_peer_genid;
-	struct inet_peer	*peer; /* long-living peer info */
+	unsigned long		_peer; /* long-living peer info */
 	struct fib_info		*fi; /* for client ref to shared metrics */
 };
 
+static inline struct inet_peer *rt_peer_ptr(struct rtable *rt)
+{
+	return inetpeer_ptr(rt->_peer);
+}
+
+static inline bool rt_has_peer(struct rtable *rt)
+{
+	return inetpeer_ptr_is_peer(rt->_peer);
+}
+
+static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer)
+{
+	__inetpeer_ptr_set_peer(&rt->_peer, peer);
+}
+
+static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer)
+{
+	return inetpeer_ptr_set_peer(&rt->_peer, peer);
+}
+
+static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base)
+{
+	inetpeer_init_ptr(&rt->_peer, base);
+}
+
+static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort)
+{
+	rt->_peer = ort->_peer;
+	if (rt_has_peer(ort)) {
+		struct inet_peer *peer = rt_peer_ptr(ort);
+		atomic_inc(&peer->refcnt);
+	}
+}
+
 static inline bool rt_is_input_route(const struct rtable *rt)
 {
 	return rt->rt_route_iif != 0;
@@ -298,11 +332,11 @@ extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create);
 
 static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create)
 {
-	if (rt->peer)
-		return rt->peer;
+	if (rt_has_peer(rt))
+		return rt_peer_ptr(rt);
 
 	rt_bind_peer(rt, daddr, create);
-	return rt->peer;
+	return rt_peer_ptr(rt);
 }
 
 static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2aa663a6ae9..03e5b614370 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -677,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth)
 static inline int rt_valuable(struct rtable *rth)
 {
 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
-		(rth->peer && rth->peer->pmtu_expires);
+		(rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires);
 }
 
 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -1325,12 +1325,16 @@ static u32 rt_peer_genid(void)
 
 void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
 {
-	struct net *net = dev_net(rt->dst.dev);
+	struct inet_peer_base *base;
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(net->ipv4.peers, daddr, create);
+	base = inetpeer_base_ptr(rt->_peer);
+	if (!base)
+		return;
+
+	peer = inet_getpeer_v4(base, daddr, create);
 
-	if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
+	if (!rt_set_peer(rt, peer))
 		inet_putpeer(peer);
 	else
 		rt->rt_peer_genid = rt_peer_genid();
@@ -1533,8 +1537,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 						rt_genid(dev_net(dst->dev)));
 			rt_del(hash, rt);
 			ret = NULL;
-		} else if (rt->peer && peer_pmtu_expired(rt->peer)) {
-			dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
+		} else if (rt_has_peer(rt)) {
+			struct inet_peer *peer = rt_peer_ptr(rt);
+			if (peer_pmtu_expired(peer))
+				dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
 		}
 	}
 	return ret;
@@ -1796,14 +1802,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *) dst;
-	struct inet_peer *peer = rt->peer;
 
 	if (rt->fi) {
 		fib_info_put(rt->fi);
 		rt->fi = NULL;
 	}
-	if (peer) {
-		rt->peer = NULL;
+	if (rt_has_peer(rt)) {
+		struct inet_peer *peer = rt_peer_ptr(rt);
 		inet_putpeer(peer);
 	}
 }
@@ -1816,8 +1821,11 @@ static void ipv4_link_failure(struct sk_buff *skb)
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
 
 	rt = skb_rtable(skb);
-	if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
-		dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
+	if (rt && rt_has_peer(rt)) {
+		struct inet_peer *peer = rt_peer_ptr(rt);
+		if (peer_pmtu_cleaned(peer))
+			dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig);
+	}
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1919,7 +1927,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 			    struct fib_info *fi)
 {
-	struct net *net = dev_net(rt->dst.dev);
+	struct inet_peer_base *base;
 	struct inet_peer *peer;
 	int create = 0;
 
@@ -1929,8 +1937,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 	if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
 		create = 1;
 
-	rt->peer = peer = inet_getpeer_v4(net->ipv4.peers, rt->rt_dst, create);
+	base = inetpeer_base_ptr(rt->_peer);
+	BUG_ON(!base);
+
+	peer = inet_getpeer_v4(base, rt->rt_dst, create);
 	if (peer) {
+		__rt_set_peer(rt, peer);
 		rt->rt_peer_genid = rt_peer_genid();
 		if (inet_metrics_new(peer))
 			memcpy(peer->metrics, fi->fib_metrics,
@@ -2046,7 +2058,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->rt_peer_genid = 0;
-	rth->peer = NULL;
+	rt_init_peer(rth, dev_net(dev)->ipv4.peers);
 	rth->fi = NULL;
 	if (our) {
 		rth->dst.input= ip_local_deliver;
@@ -2174,7 +2186,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->rt_peer_genid = 0;
-	rth->peer = NULL;
+	rt_init_peer(rth, dev_net(rth->dst.dev)->ipv4.peers);
 	rth->fi = NULL;
 
 	rth->dst.input = ip_forward;
@@ -2357,7 +2369,7 @@ local_input:
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->rt_peer_genid = 0;
-	rth->peer = NULL;
+	rt_init_peer(rth, net->ipv4.peers);
 	rth->fi = NULL;
 	if (res.type == RTN_UNREACHABLE) {
 		rth->dst.input= ip_error;
@@ -2561,7 +2573,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	rth->rt_gateway = fl4->daddr;
 	rth->rt_spec_dst= fl4->saddr;
 	rth->rt_peer_genid = 0;
-	rth->peer = NULL;
+	rt_init_peer(rth, dev_net(dev_out)->ipv4.peers);
 	rth->fi = NULL;
 
 	RT_CACHE_STAT_INC(out_slow_tot);
@@ -2898,9 +2910,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_src = ort->rt_src;
 		rt->rt_gateway = ort->rt_gateway;
 		rt->rt_spec_dst = ort->rt_spec_dst;
-		rt->peer = ort->peer;
-		if (rt->peer)
-			atomic_inc(&rt->peer->refcnt);
+		rt_transfer_peer(rt, ort);
 		rt->fi = ort->fi;
 		if (rt->fi)
 			atomic_inc(&rt->fi->fib_clntref);
@@ -2938,7 +2948,6 @@ static int rt_fill_info(struct net *net,
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
 	unsigned long expires = 0;
-	const struct inet_peer *peer = rt->peer;
 	u32 id = 0, ts = 0, tsage = 0, error;
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -2994,8 +3003,9 @@ static int rt_fill_info(struct net *net,
 		goto nla_put_failure;
 
 	error = rt->dst.error;
-	if (peer) {
-		inet_peer_refcheck(rt->peer);
+	if (rt_has_peer(rt)) {
+		const struct inet_peer *peer = rt_peer_ptr(rt);
+		inet_peer_refcheck(peer);
 		id = atomic_read(&peer->ip_id_count) & 0xffff;
 		if (peer->tcp_ts_stamp) {
 			ts = peer->tcp_ts;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0d3426cb5c4..8855d826855 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
 
-	xdst->u.rt.peer = rt->peer;
-	if (rt->peer)
-		atomic_inc(&rt->peer->refcnt);
+	rt_transfer_peer(&xdst->u.rt, rt);
 
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
@@ -212,8 +210,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 
 	dst_destroy_metrics_generic(dst);
 
-	if (likely(xdst->u.rt.peer))
-		inet_putpeer(xdst->u.rt.peer);
+	if (rt_has_peer(&xdst->u.rt)) {
+		struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt);
+		inet_putpeer(peer);
+	}
 
 	xfrm_dst_destroy(xdst);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8fc41d502bb..17a9b8687f2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -258,16 +258,18 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 #endif
 
 /* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 					     struct net_device *dev,
 					     int flags)
 {
-	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+					0, 0, flags);
 
-	if (rt)
+	if (rt) {
 		memset(&rt->rt6i_table, 0,
 		       sizeof(*rt) - sizeof(struct dst_entry));
-
+		rt6_init_peer(rt, net->ipv6.peers);
+	}
 	return rt;
 }
 
@@ -275,7 +277,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
-	struct inet_peer *peer = rt->rt6i_peer;
 
 	if (!(rt->dst.flags & DST_HOST))
 		dst_destroy_metrics_generic(dst);
@@ -288,8 +289,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
 		dst_release(dst->from);
 
-	if (peer) {
-		rt->rt6i_peer = NULL;
+	if (rt6_has_peer(rt)) {
+		struct inet_peer *peer = rt6_peer_ptr(rt);
 		inet_putpeer(peer);
 	}
 }
@@ -303,11 +304,15 @@ static u32 rt6_peer_genid(void)
 
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
-	struct net *net = dev_net(rt->dst.dev);
+	struct inet_peer_base *base;
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, create);
-	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
+	base = inetpeer_base_ptr(rt->_rt6i_peer);
+	if (!base)
+		return;
+
+	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+	if (!rt6_set_peer(rt, peer))
 		inet_putpeer(peer);
 	else
 		rt->rt6i_peer_genid = rt6_peer_genid();
@@ -950,6 +955,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 	if (rt) {
 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
+		rt6_init_peer(rt, net->ipv6.peers);
 
 		new = &rt->dst;
 
@@ -994,7 +1000,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
-			if (!rt->rt6i_peer)
+			if (!rt6_has_peer(rt))
 				rt6_bind_peer(rt, 0);
 			rt->rt6i_peer_genid = rt6_peer_genid();
 		}
@@ -1108,7 +1114,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (unlikely(!idev))
 		return ERR_PTR(-ENODEV);
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+	rt = ip6_dst_alloc(net, dev, 0);
 	if (unlikely(!rt)) {
 		in6_dev_put(idev);
 		dst = ERR_PTR(-ENOMEM);
@@ -1290,7 +1296,7 @@ int ip6_route_add(struct fib6_config *cfg)
 	if (!table)
 		goto out;
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT);
 
 	if (!rt) {
 		err = -ENOMEM;
@@ -1812,8 +1818,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 				    const struct in6_addr *dest)
 {
 	struct net *net = dev_net(ort->dst.dev);
-	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    ort->dst.dev, 0);
+	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0);
 
 	if (rt) {
 		rt->dst.input = ort->dst.input;
@@ -2097,8 +2102,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    bool anycast)
 {
 	struct net *net = dev_net(idev->dev);
-	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    net->loopback_dev, 0);
+	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0);
 	int err;
 
 	if (!rt) {
@@ -2519,7 +2523,9 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	peer = rt->rt6i_peer;
+	peer = NULL;
+	if (rt6_has_peer(rt))
+		peer = rt6_peer_ptr(rt);
 	ts = tsage = 0;
 	if (peer && peer->tcp_ts_stamp) {
 		ts = peer->tcp_ts;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8625fba96db..d7494845efb 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -99,9 +99,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	if (!xdst->u.rt6.rt6i_idev)
 		return -ENODEV;
 
-	xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
-	if (rt->rt6i_peer)
-		atomic_inc(&rt->rt6i_peer->refcnt);
+	rt6_transfer_peer(&xdst->u.rt6, rt);
 
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
@@ -223,8 +221,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
 	dst_destroy_metrics_generic(dst);
-	if (likely(xdst->u.rt6.rt6i_peer))
-		inet_putpeer(xdst->u.rt6.rt6i_peer);
+	if (rt6_has_peer(&xdst->u.rt6)) {
+		struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
+		inet_putpeer(peer);
+	}
 	xfrm_dst_destroy(xdst);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8b96d22d7a6ec999ae53ae86d829137503ceda65 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Jun 2012 02:01:56 -0700
Subject: inet: Use FIB table peer roots in routes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c |  8 ++++++--
 net/ipv6/route.c | 14 ++++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 456a9470fb5..4c33ce3000e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2125,7 +2125,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->rt_peer_genid = 0;
-	rt_init_peer(rth, dev_net(rth->dst.dev)->ipv4.peers);
+	rt_init_peer(rth, &res->table->tb_peers);
 	rth->fi = NULL;
 
 	rth->dst.input = ip_forward;
@@ -2512,7 +2512,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	rth->rt_gateway = fl4->daddr;
 	rth->rt_spec_dst= fl4->saddr;
 	rth->rt_peer_genid = 0;
-	rt_init_peer(rth, dev_net(dev_out)->ipv4.peers);
+	rt_init_peer(rth, (res->table ?
+			   &res->table->tb_peers :
+			   dev_net(dev_out)->ipv4.peers));
 	rth->fi = NULL;
 
 	RT_CACHE_STAT_INC(out_slow_tot);
@@ -2561,6 +2563,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
 	int orig_oif;
 
 	res.fi		= NULL;
+	res.table	= NULL;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	res.r		= NULL;
 #endif
@@ -2666,6 +2669,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
 
 	if (fib_lookup(net, fl4, &res)) {
 		res.fi = NULL;
+		res.table = NULL;
 		if (fl4->flowi4_oif) {
 			/* Apparently, routing tables are wrong. Assume,
 			   that the destination is on link.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 17a9b8687f2..d9ba4808f26 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -260,7 +260,8 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 					     struct net_device *dev,
-					     int flags)
+					     int flags,
+					     struct fib6_table *table)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					0, 0, flags);
@@ -268,7 +269,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 	if (rt) {
 		memset(&rt->rt6i_table, 0,
 		       sizeof(*rt) - sizeof(struct dst_entry));
-		rt6_init_peer(rt, net->ipv6.peers);
+		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 	}
 	return rt;
 }
@@ -1114,7 +1115,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (unlikely(!idev))
 		return ERR_PTR(-ENODEV);
 
-	rt = ip6_dst_alloc(net, dev, 0);
+	rt = ip6_dst_alloc(net, dev, 0, NULL);
 	if (unlikely(!rt)) {
 		in6_dev_put(idev);
 		dst = ERR_PTR(-ENOMEM);
@@ -1296,7 +1297,7 @@ int ip6_route_add(struct fib6_config *cfg)
 	if (!table)
 		goto out;
 
-	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT);
+	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
 
 	if (!rt) {
 		err = -ENOMEM;
@@ -1818,7 +1819,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 				    const struct in6_addr *dest)
 {
 	struct net *net = dev_net(ort->dst.dev);
-	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0);
+	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+					    ort->rt6i_table);
 
 	if (rt) {
 		rt->dst.input = ort->dst.input;
@@ -2102,7 +2104,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    bool anycast)
 {
 	struct net *net = dev_net(idev->dev);
-	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0);
+	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
 	int err;
 
 	if (!rt) {
-- 
cgit v1.2.3-70-g09d2


From 7b34ca2ac7063f4ebf07f85fd75253ed84d5c648 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Jun 2012 04:13:57 -0700
Subject: inet: Avoid potential NULL peer dereference.

We handle NULL in rt{,6}_set_peer but then our caller will try to pass
that NULL pointer into inet_putpeer() which isn't ready for it.

Fix this by moving the NULL check one level up, and then remove the
now unnecessary NULL check from inetpeer_ptr_set_peer().

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  2 +-
 net/ipv4/route.c       | 11 ++++++-----
 net/ipv6/route.c       | 10 ++++++----
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index e15c0862a68..c27c8f10ebd 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -104,7 +104,7 @@ static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *p
 	unsigned long val = (unsigned long) peer;
 	unsigned long orig = *ptr;
 
-	if (!(orig & INETPEER_BASE_BIT) || !val ||
+	if (!(orig & INETPEER_BASE_BIT) ||
 	    cmpxchg(ptr, orig, val) != orig)
 		return false;
 	return true;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4c33ce3000e..842510d5045 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1333,11 +1333,12 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
 		return;
 
 	peer = inet_getpeer_v4(base, daddr, create);
-
-	if (!rt_set_peer(rt, peer))
-		inet_putpeer(peer);
-	else
-		rt->rt_peer_genid = rt_peer_genid();
+	if (peer) {
+		if (!rt_set_peer(rt, peer))
+			inet_putpeer(peer);
+		else
+			rt->rt_peer_genid = rt_peer_genid();
+	}
 }
 
 /*
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9ba4808f26..58a3ec23da2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -313,10 +313,12 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
 		return;
 
 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
-	if (!rt6_set_peer(rt, peer))
-		inet_putpeer(peer);
-	else
-		rt->rt6i_peer_genid = rt6_peer_genid();
+	if (peer) {
+		if (!rt6_set_peer(rt, peer))
+			inet_putpeer(peer);
+		else
+			rt->rt6i_peer_genid = rt6_peer_genid();
+	}
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-- 
cgit v1.2.3-70-g09d2


From 81aded24675ebda5de8a68843250ad15584ac38a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 15 Jun 2012 14:54:11 -0700
Subject: ipv6: Handle PMTU in ICMP error handlers.

One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts
to handle the error pass the 32-bit info cookie in network byte order
whereas ipv4 passes it around in host byte order.

Like the ipv4 side, we have two helper functions.  One for when we
have a socket context and one for when we do not.

ip6ip6 tunnels are not handled here, because they handle PMTU events
by essentially relaying another ICMP packet-too-big message back to
the original sender.

This patch allows us to get rid of rt6_do_pmtu_disc().  It handles all
kinds of situations that simply cannot happen when we do the PMTU
update directly using a fully resolved route.

In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very
likely be removed or changed into a BUG_ON() check.  We should never
have a prefixed ipv6 route when we get there.

Another piece of strange history here is that TCP and DCCP, unlike in
ipv4, never invoke the update_pmtu() method from their ICMP error
handlers.  This is incredibly astonishing since this is the context
where we have the most accurate context in which to make a PMTU
update, namely we have a fully connected socket and associated cached
socket route.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |   8 +--
 net/dccp/ipv6.c         |   2 +
 net/ipv6/ah6.c          |   3 +-
 net/ipv6/esp6.c         |   2 +
 net/ipv6/icmp.c         |   6 +-
 net/ipv6/ipcomp6.c      |   2 +
 net/ipv6/raw.c          |   5 +-
 net/ipv6/route.c        | 143 +++++++++++-------------------------------------
 net/ipv6/tcp_ipv6.c     |   2 +
 net/ipv6/udp.c          |   3 +
 10 files changed, 54 insertions(+), 122 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a2cda240ca9..58cb3fc3487 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -140,10 +140,10 @@ extern void			rt6_redirect(const struct in6_addr *dest,
 					     u8 *lladdr,
 					     int on_link);
 
-extern void			rt6_pmtu_discovery(const struct in6_addr *daddr,
-						   const struct in6_addr *saddr,
-						   struct net_device *dev,
-						   u32 pmtu);
+extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+			    int oif, u32 mark);
+extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
+			       __be32 mtu);
 
 struct netlink_callback;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fa9512d86f3..9991be083ad 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
+		dst->ops->update_pmtu(dst, ntohl(info));
+
 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			dccp_sync_mss(sk, dst_mtu(dst));
 		} /* else let the usual retransmit timer handle it */
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f1a4a2c28ed..49d4d26bda8 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -35,6 +35,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/string.h>
 #include <linux/scatterlist.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
 		 ntohl(ah->spi), &iph->daddr);
-
+	ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index db1521fcda5..89a615ba84f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return;
 	pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n",
 		 ntohl(esph->spi), &iph->daddr);
+	ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ed89bba745a..5247d5c211f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	const struct in6_addr *saddr, *daddr;
-	const struct ipv6hdr *orig_hdr;
 	struct icmp6hdr *hdr;
 	u8 type;
 
@@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 				 XFRM_STATE_ICMP))
 			goto drop_no_count;
 
-		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 			goto drop_no_count;
 
 		nh = skb_network_offset(skb);
@@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 			goto discard_it;
 		hdr = icmp6_hdr(skb);
-		orig_hdr = (struct ipv6hdr *) (hdr + 1);
-		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
-				   ntohl(hdr->icmp6_mtu));
 
 		/*
 		 *	Drop through to notify
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5cb75bfe45b..92832385a8e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -46,6 +46,7 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n",
 		 spi, &iph->daddr);
+	ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 93d69836fde..43b0042f15f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 		return;
 
 	harderr = icmpv6_err_convert(type, code, &err);
-	if (type == ICMPV6_PKT_TOOBIG)
+	if (type == ICMPV6_PKT_TOOBIG) {
+		ip6_sk_update_pmtu(skb, sk, info);
 		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
-
+	}
 	if (np->recverr) {
 		u8 *payload = skb->data;
 		if (!inet->hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 58a3ec23da2..0d41f68daff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
+	dst_confirm(dst);
 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+		struct net *net = dev_net(dst->dev);
+
 		rt6->rt6i_flags |= RTF_MODIFIED;
 		if (mtu < IPV6_MIN_MTU) {
 			u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 			dst_metric_set(dst, RTAX_FEATURES, features);
 		}
 		dst_metric_set(dst, RTAX_MTU, mtu);
+		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
 	}
 }
 
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
+		     int oif, __be32 mark)
+{
+	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark;
+	fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (!dst->error)
+		ip6_rt_update_pmtu(dst, ntohl(mtu));
+	dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+	ip6_update_pmtu(skb, sock_net(sk), mtu,
+			sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
 	struct net_device *dev = dst->dev;
@@ -1703,116 +1736,6 @@ out:
 	dst_release(&rt->dst);
 }
 
-/*
- *	Handle ICMP "packet too big" messages
- *	i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
-			     struct net *net, u32 pmtu, int ifindex)
-{
-	struct rt6_info *rt, *nrt;
-	int allfrag = 0;
-again:
-	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
-	if (!rt)
-		return;
-
-	if (rt6_check_expired(rt)) {
-		ip6_del_rt(rt);
-		goto again;
-	}
-
-	if (pmtu >= dst_mtu(&rt->dst))
-		goto out;
-
-	if (pmtu < IPV6_MIN_MTU) {
-		/*
-		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
-		 * MTU (1280) and a fragment header should always be included
-		 * after a node receiving Too Big message reporting PMTU is
-		 * less than the IPv6 Minimum Link MTU.
-		 */
-		pmtu = IPV6_MIN_MTU;
-		allfrag = 1;
-	}
-
-	/* New mtu received -> path was valid.
-	   They are sent only in response to data packets,
-	   so that this nexthop apparently is reachable. --ANK
-	 */
-	dst_confirm(&rt->dst);
-
-	/* Host route. If it is static, it would be better
-	   not to override it, but add new one, so that
-	   when cache entry will expire old pmtu
-	   would return automatically.
-	 */
-	if (rt->rt6i_flags & RTF_CACHE) {
-		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
-		if (allfrag) {
-			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
-			features |= RTAX_FEATURE_ALLFRAG;
-			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
-		}
-		rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-		rt->rt6i_flags |= RTF_MODIFIED;
-		goto out;
-	}
-
-	/* Network route.
-	   Two cases are possible:
-	   1. It is connected route. Action: COW
-	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
-	 */
-	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, daddr, saddr);
-	else
-		nrt = rt6_alloc_clone(rt, daddr);
-
-	if (nrt) {
-		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
-		if (allfrag) {
-			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
-			features |= RTAX_FEATURE_ALLFRAG;
-			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
-		}
-
-		/* According to RFC 1981, detecting PMTU increase shouldn't be
-		 * happened within 5 mins, the recommended timer is 10 mins.
-		 * Here this route expiration time is set to ip6_rt_mtu_expires
-		 * which is 10 mins. After 10 mins the decreased pmtu is expired
-		 * and detecting PMTU increase will be automatically happened.
-		 */
-		rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-		nrt->rt6i_flags |= RTF_DYNAMIC;
-		ip6_ins_rt(nrt);
-	}
-out:
-	dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
-			struct net_device *dev, u32 pmtu)
-{
-	struct net *net = dev_net(dev);
-
-	/*
-	 * RFC 1981 states that a node "MUST reduce the size of the packets it
-	 * is sending along the path" that caused the Packet Too Big message.
-	 * Since it's not possible in the general case to determine which
-	 * interface was used to send the original packet, we update the MTU
-	 * on the interface that will be used to send future packets. We also
-	 * update the MTU on the interface that received the Packet Too Big in
-	 * case the original packet was forced out that interface with
-	 * SO_BINDTODEVICE or similar. This is the next best thing to the
-	 * correct behaviour, which would be to update the MTU on all
-	 * interfaces.
-	 */
-	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
-	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
-}
-
 /*
  *	Misc support functions
  */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f91b0bfd12d..26a88623940 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
+		dst->ops->update_pmtu(dst, ntohl(info));
+
 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			tcp_sync_mss(sk, dst_mtu(dst));
 			tcp_simple_retransmit(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f05099fc590..051ad481973 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk == NULL)
 		return;
 
+	if (type == ICMPV6_PKT_TOOBIG)
+		ip6_sk_update_pmtu(skb, sk, info);
+
 	np = inet6_sk(sk);
 
 	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
-- 
cgit v1.2.3-70-g09d2


From 42ae66c80d94645e8f74080c7f344596d6f19cd5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 15 Jun 2012 20:01:57 -0700
Subject: ipv6: Fix types of ip6_update_pmtu().

The mtu should be a __be32, not the mark.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c7ccc36ba63..1c279fe2c9b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1065,8 +1065,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 	}
 }
 
-void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
-		     int oif, __be32 mark)
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+		     int oif, u32 mark)
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
-- 
cgit v1.2.3-70-g09d2


From 4dc27d1cf3b3027b9ce654221c559e88b2f41b33 Mon Sep 17 00:00:00 2001
From: David McCullough <david_mccullough@mcafee.com>
Date: Mon, 25 Jun 2012 15:42:26 +0000
Subject: net/ipv6/route.c: packets originating on device match lo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix to allow IPv6 packets originating locally to match rules with the "iff"
set to "lo".  This allows IPv6 rule matching work the same as it does for
IPv4.  From the iproute2 man page:

   iif NAME
		  select  the incoming device to match.  If the interface is loop‐
		  back, the rule only matches packets originating from this  host.
		  This  means that you may create separate routing tables for for‐
		  warded and local packets and, hence, completely segregate them.

Signed-off-by: David McCullough <david_mccullough@mcafee.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c5bbece7f9f..c518e4ec0ce 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -937,6 +937,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 {
 	int flags = 0;
 
+	fl6->flowi6_iif = net->loopback_dev->ifindex;
+
 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 		flags |= RT6_LOOKUP_F_IFACE;
 
-- 
cgit v1.2.3-70-g09d2


From f894cbf847c9bea1955095bf37aca6c050553167 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Jul 2012 21:52:24 -0700
Subject: net: Add optional SKB arg to dst_ops->neigh_lookup().

Causes the handler to use the daddr in the ipv4/ipv6 header when
the route gateway is unspecified (local subnet).

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h         |  8 +++++++-
 include/net/dst_ops.h     |  4 +++-
 net/bridge/br_netfilter.c |  4 +++-
 net/decnet/dn_route.c     |  8 ++++++--
 net/ipv4/route.c          | 14 ++++++++++----
 net/ipv6/route.c          | 14 ++++++++++----
 net/xfrm/xfrm_policy.c    |  6 ++++--
 7 files changed, 43 insertions(+), 15 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/dst.h b/include/net/dst.h
index 84e7a3ff968..295a70547e7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -420,7 +420,13 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
 
 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 {
-	return dst->ops->neigh_lookup(dst, daddr);
+	return dst->ops->neigh_lookup(dst, NULL, daddr);
+}
+
+static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
+						     struct sk_buff *skb)
+{
+	return dst->ops->neigh_lookup(dst, skb, NULL);
 }
 
 static inline void dst_link_failure(struct sk_buff *skb)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 3682a0a076c..4badc86e45d 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -26,7 +26,9 @@ struct dst_ops {
 	void			(*link_failure)(struct sk_buff *);
 	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
 	int			(*local_out)(struct sk_buff *skb);
-	struct neighbour *	(*neigh_lookup)(const struct dst_entry *dst, const void *daddr);
+	struct neighbour *	(*neigh_lookup)(const struct dst_entry *dst,
+						struct sk_buff *skb,
+						const void *daddr);
 
 	struct kmem_cache	*kmem_cachep;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 20fa719889e..4378775432b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -120,7 +120,9 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
 	return NULL;
 }
 
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr)
 {
 	return NULL;
 }
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2493ed5bfec..60e4c6e1bac 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -117,7 +117,9 @@ static void dn_dst_destroy(struct dst_entry *);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr);
+static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+					     struct sk_buff *skb,
+					     const void *daddr);
 static int dn_route_input(struct sk_buff *);
 static void dn_run_flush(unsigned long dummy);
 
@@ -828,7 +830,9 @@ static unsigned int dn_dst_mtu(const struct dst_entry *dst)
 	return mtu ? : dst->dev->mtu;
 }
 
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+					     struct sk_buff *skb,
+					     const void *daddr)
 {
 	return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bae36386e72..7453dfcdb43 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -188,7 +188,9 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 	return p;
 }
 
-static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr);
+static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr);
 
 static struct dst_ops ipv4_dst_ops = {
 	.family =		AF_INET,
@@ -1088,7 +1090,9 @@ static int slow_chain_length(const struct rtable *head)
 	return length >> FRACT_BITS;
 }
 
-static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr)
 {
 	struct net_device *dev = dst->dev;
 	const __be32 *pkey = daddr;
@@ -1098,6 +1102,8 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo
 	rt = (const struct rtable *) dst;
 	if (rt->rt_gateway)
 		pkey = (const __be32 *) &rt->rt_gateway;
+	else if (skb)
+		pkey = &ip_hdr(skb)->daddr;
 
 	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
 	if (n)
@@ -1107,7 +1113,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo
 
 static int rt_bind_neighbour(struct rtable *rt)
 {
-	struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+	struct neighbour *n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway);
 	if (IS_ERR(n))
 		return PTR_ERR(n);
 	dst_set_neighbour(&rt->dst, n);
@@ -1388,7 +1394,7 @@ static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
 
 	rt->rt_gateway = peer->redirect_learned.a4;
 
-	n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+	n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway);
 	if (IS_ERR(n)) {
 		rt->rt_gateway = orig_gw;
 		return;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c518e4ec0ce..4b581c675bb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -120,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 	return p;
 }
 
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+					     struct sk_buff *skb,
+					     const void *daddr)
 {
 	struct in6_addr *p = &rt->rt6i_gateway;
 
 	if (!ipv6_addr_any(p))
 		return (const void *) p;
+	else if (skb)
+		return &ipv6_hdr(skb)->daddr;
 	return daddr;
 }
 
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+					  struct sk_buff *skb,
+					  const void *daddr)
 {
 	struct rt6_info *rt = (struct rt6_info *) dst;
 	struct neighbour *n;
 
-	daddr = choose_neigh_daddr(rt, daddr);
+	daddr = choose_neigh_daddr(rt, skb, daddr);
 	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
 	if (n)
 		return n;
@@ -1162,7 +1168,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (neigh)
 		neigh_hold(neigh);
 	else {
-		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
 		if (IS_ERR(neigh)) {
 			in6_dev_put(idev);
 			dst_free(&rt->dst);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ccfbd328a69..a28a3f972d5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2404,9 +2404,11 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst)
 	return mtu ? : dst_mtu(dst->path);
 }
 
-static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr)
 {
-	return dst_neigh_lookup(dst->path, daddr);
+	return dst->path->ops->neigh_lookup(dst, skb, daddr);
 }
 
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
-- 
cgit v1.2.3-70-g09d2


From 1d248b1cf4e09dbec8cef5f7fbeda5874248bd09 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Jul 2012 01:01:51 -0700
Subject: net: Pass neighbours and dest address into NETEVENT_REDIRECT events.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 23 +++++++++-------------
 include/net/netevent.h                             |  4 ++++
 net/ipv6/route.c                                   |  7 ++++++-
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index 55cf72af69c..633c6029e53 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -62,7 +62,8 @@ static const unsigned int MAX_ATIDS = 64 * 1024;
 static const unsigned int ATID_BASE = 0x10000;
 
 static void cxgb_neigh_update(struct neighbour *neigh);
-static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new);
+static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh,
+			  struct dst_entry *new, struct neighbour *new_neigh);
 
 static inline int offload_activated(struct t3cdev *tdev)
 {
@@ -968,8 +969,9 @@ static int nb_callback(struct notifier_block *self, unsigned long event,
 	}
 	case (NETEVENT_REDIRECT):{
 		struct netevent_redirect *nr = ctx;
-		cxgb_redirect(nr->old, nr->new);
-		cxgb_neigh_update(dst_get_neighbour_noref(nr->new));
+		cxgb_redirect(nr->old, nr->old_neigh,
+			      nr->new, nr->new_neigh);
+		cxgb_neigh_update(nr->new_neigh);
 		break;
 	}
 	default:
@@ -1107,10 +1109,10 @@ static void set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
 	tdev->send(tdev, skb);
 }
 
-static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
+static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh,
+			  struct dst_entry *new, struct neighbour *new_neigh)
 {
 	struct net_device *olddev, *newdev;
-	struct neighbour *n;
 	struct tid_info *ti;
 	struct t3cdev *tdev;
 	u32 tid;
@@ -1118,15 +1120,8 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
 	struct l2t_entry *e;
 	struct t3c_tid_entry *te;
 
-	n = dst_get_neighbour_noref(old);
-	if (!n)
-		return;
-	olddev = n->dev;
-
-	n = dst_get_neighbour_noref(new);
-	if (!n)
-		return;
-	newdev = n->dev;
+	olddev = old_neigh->dev;
+	newdev = new_neigh->dev;
 
 	if (!is_offloading(olddev))
 		return;
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 086f8a5b59d..3ce4988c9c0 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -12,10 +12,14 @@
  */
 
 struct dst_entry;
+struct neighbour;
 
 struct netevent_redirect {
 	struct dst_entry *old;
+	struct neighbour *old_neigh;
 	struct dst_entry *new;
+	struct neighbour *new_neigh;
+	const void *daddr;
 };
 
 enum netevent_notif_type {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4b581c675bb..34b29881e22 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1687,6 +1687,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 	struct rt6_info *rt, *nrt = NULL;
 	struct netevent_redirect netevent;
 	struct net *net = dev_net(neigh->dev);
+	struct neighbour *old_neigh;
 
 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
 
@@ -1714,7 +1715,8 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 	dst_confirm(&rt->dst);
 
 	/* Duplicate redirect: silently ignore. */
-	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
+	old_neigh = dst_get_neighbour_noref_raw(&rt->dst);
+	if (neigh == old_neigh)
 		goto out;
 
 	nrt = ip6_rt_copy(rt, dest);
@@ -1732,7 +1734,10 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 		goto out;
 
 	netevent.old = &rt->dst;
+	netevent.old_neigh = old_neigh;
 	netevent.new = &nrt->dst;
+	netevent.new_neigh = neigh;
+	netevent.daddr = dest;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags & RTF_CACHE) {
-- 
cgit v1.2.3-70-g09d2


From 97cac0821af4474ec4ba3a9e7a36b98ed9b6db88 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Jul 2012 22:43:47 -0700
Subject: ipv6: Store route neighbour in rt6_info struct.

This makes for a simplified conversion away from dst_get_neighbour*().

All code outside of ipv6 will use neigh lookups via dst_neigh_lookup*().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  2 ++
 net/ipv6/ip6_output.c   |  8 ++++++--
 net/ipv6/route.c        | 42 ++++++++++++++++++++++++++----------------
 net/ipv6/xfrm6_policy.c |  1 +
 4 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a192f780765..0fedbd8d747 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -86,6 +86,8 @@ struct fib6_table;
 struct rt6_info {
 	struct dst_entry		dst;
 
+	struct neighbour		*n;
+
 	/*
 	 * Tail elements of dst_entry (__refcnt etc.)
 	 * and these elements (rarely used in hot path) are in
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c94e4aabe11..6d9c0abc8c2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,6 +88,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
 	struct neighbour *neigh;
+	struct rt6_info *rt;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
@@ -123,7 +124,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	}
 
 	rcu_read_lock();
-	neigh = dst_get_neighbour_noref(dst);
+	rt = (struct rt6_info *) dst;
+	neigh = rt->n;
 	if (neigh) {
 		int res = dst_neigh_output(dst, neigh, skb);
 
@@ -944,6 +946,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	struct net *net = sock_net(sk);
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	struct neighbour *n;
+	struct rt6_info *rt;
 #endif
 	int err;
 
@@ -972,7 +975,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	 * dst entry of the nexthop router
 	 */
 	rcu_read_lock();
-	n = dst_get_neighbour_noref(*dst);
+	rt = (struct rt6_info *) dst;
+	n = rt->n;
 	if (n && !(n->nud_state & NUD_VALID)) {
 		struct inet6_ifaddr *ifp;
 		struct flowi6 fl_gw6;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 34b29881e22..ceff71d24f8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -155,7 +155,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
 		if (IS_ERR(n))
 			return PTR_ERR(n);
 	}
-	dst_set_neighbour(&rt->dst, n);
+	rt->n = n;
 
 	return 0;
 }
@@ -285,6 +285,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
 
+	if (rt->n)
+		neigh_release(rt->n);
+
 	if (!(rt->dst.flags & DST_HOST))
 		dst_destroy_metrics_generic(dst);
 
@@ -335,12 +338,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	struct net_device *loopback_dev =
 		dev_net(dev)->loopback_dev;
 
-	if (dev != loopback_dev && idev && idev->dev == dev) {
-		struct inet6_dev *loopback_idev =
-			in6_dev_get(loopback_dev);
-		if (loopback_idev) {
-			rt->rt6i_idev = loopback_idev;
-			in6_dev_put(idev);
+	if (dev != loopback_dev) {
+		if (idev && idev->dev == dev) {
+			struct inet6_dev *loopback_idev =
+				in6_dev_get(loopback_dev);
+			if (loopback_idev) {
+				rt->rt6i_idev = loopback_idev;
+				in6_dev_put(idev);
+			}
+		}
+		if (rt->n && rt->n->dev == dev) {
+			rt->n->dev = loopback_dev;
+			dev_hold(loopback_dev);
+			dev_put(dev);
 		}
 	}
 }
@@ -430,7 +440,7 @@ static void rt6_probe(struct rt6_info *rt)
 	 * to no more than one per minute.
 	 */
 	rcu_read_lock();
-	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
+	neigh = rt ? rt->n : NULL;
 	if (!neigh || (neigh->nud_state & NUD_VALID))
 		goto out;
 	read_lock_bh(&neigh->lock);
@@ -477,7 +487,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
 	int m;
 
 	rcu_read_lock();
-	neigh = dst_get_neighbour_noref(&rt->dst);
+	neigh = rt->n;
 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 	    !(rt->rt6i_flags & RTF_GATEWAY))
 		m = 1;
@@ -824,7 +834,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 
 	if (rt) {
 		rt->rt6i_flags |= RTF_CACHE;
-		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
+		rt->n = neigh_clone(ort->n);
 	}
 	return rt;
 }
@@ -858,7 +868,7 @@ restart:
 	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
-	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 	else if (!(rt->dst.flags & DST_HOST))
 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -1178,7 +1188,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 
 	rt->dst.flags |= DST_HOST;
 	rt->dst.output  = ip6_output;
-	dst_set_neighbour(&rt->dst, neigh);
+	rt->n = neigh;
 	atomic_set(&rt->dst.__refcnt, 1);
 	rt->rt6i_dst.addr = fl6->daddr;
 	rt->rt6i_dst.plen = 128;
@@ -1715,7 +1725,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 	dst_confirm(&rt->dst);
 
 	/* Duplicate redirect: silently ignore. */
-	old_neigh = dst_get_neighbour_noref_raw(&rt->dst);
+	old_neigh = rt->n;
 	if (neigh == old_neigh)
 		goto out;
 
@@ -1728,7 +1738,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
-	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
+	nrt->n = neigh_clone(neigh);
 
 	if (ip6_ins_rt(nrt))
 		goto out;
@@ -2442,7 +2452,7 @@ static int rt6_fill_node(struct net *net,
 		goto nla_put_failure;
 
 	rcu_read_lock();
-	n = dst_get_neighbour_noref(&rt->dst);
+	n = rt->n;
 	if (n) {
 		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
 			rcu_read_unlock();
@@ -2666,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 	seq_puts(m, "00000000000000000000000000000000 00 ");
 #endif
 	rcu_read_lock();
-	n = dst_get_neighbour_noref(&rt->dst);
+	n = rt->n;
 	if (n) {
 		seq_printf(m, "%pi6", n->primary_key);
 	} else {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d7494845efb..bb02038b822 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -103,6 +103,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
+	xdst->u.rt6.n = neigh_clone(rt->n);
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
 						   RTF_LOCAL);
 	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
-- 
cgit v1.2.3-70-g09d2


From a2de86f63cfc92f7aaf11e7b9d9f2150946a1622 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 5 Jul 2012 03:18:28 +0000
Subject: ipv6: Initialize the neighbour pointer of rt6_info on allocation

git commit 97cac082 (ipv6: Store route neighbour in rt6_info struct)
added a neighbour pointer to rt6_info. Currently we don't initialize
this pointer at allocation time. We assume this pointer to be valid
if it is not a null pointer, so initialize it on allocation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ceff71d24f8..6cc6c881f54 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -273,7 +273,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 					0, 0, flags);
 
 	if (rt) {
-		memset(&rt->rt6i_table, 0,
+		memset(&rt->n, 0,
 		       sizeof(*rt) - sizeof(struct dst_entry));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 	}
-- 
cgit v1.2.3-70-g09d2


From 81166dd6fa8eb780b2132d32fbc77eb6ac04e44e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2012 03:14:24 -0700
Subject: tcp: Move timestamps from inetpeer to metrics cache.

With help from Lin Ming.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h   |   4 +-
 include/net/tcp.h        |   5 +-
 net/ipv4/inetpeer.c      |   1 -
 net/ipv4/route.c         |   8 +--
 net/ipv4/tcp_ipv4.c      |  30 ++---------
 net/ipv4/tcp_metrics.c   | 136 +++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_minisocks.c |  46 ----------------
 net/ipv6/route.c         |  13 +----
 net/ipv6/tcp_ipv6.c      |  33 ++----------
 9 files changed, 149 insertions(+), 127 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index c27c8f10ebd..1119f6f6cdb 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -46,15 +46,13 @@ struct inet_peer {
 	};
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
-	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+	 * are not available: rid, ip_id_count
 	 * We can share memory with rcu_head to help keep inet_peer small.
 	 */
 	union {
 		struct {
 			atomic_t			rid;		/* Frag reception counter */
 			atomic_t			ip_id_count;	/* IP ID for the next packet */
-			__u32				tcp_ts;
-			__u32				tcp_ts_stamp;
 		};
 		struct rcu_head         rcu;
 		struct inet_peer	*gc_next;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0900d63d162..3618fefae04 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -390,7 +390,10 @@ extern void tcp_clear_retrans(struct tcp_sock *tp);
 extern void tcp_update_metrics(struct sock *sk);
 extern void tcp_init_metrics(struct sock *sk);
 extern void tcp_metrics_init(void);
-extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
+extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
+extern bool tcp_remember_stamp(struct sock *sk);
+extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
 extern void tcp_disable_fack(struct tcp_sock *tp);
 extern void tcp_close(struct sock *sk, long timeout);
 extern void tcp_init_sock(struct sock *sk);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index da90a8cab61..f457bcb4135 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -508,7 +508,6 @@ relookup:
 				(daddr->family == AF_INET) ?
 					secure_ip_id(daddr->addr.a4) :
 					secure_ipv6_id(daddr->addr.a6));
-		p->tcp_ts_stamp = 0;
 		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
 		p->rate_last = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d02c91177d3..78d81543766 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2846,7 +2846,7 @@ static int rt_fill_info(struct net *net,
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
 	unsigned long expires = 0;
-	u32 id = 0, ts = 0, tsage = 0, error;
+	u32 id = 0, error;
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
 	if (nlh == NULL)
@@ -2903,10 +2903,6 @@ static int rt_fill_info(struct net *net,
 		const struct inet_peer *peer = rt_peer_ptr(rt);
 		inet_peer_refcheck(peer);
 		id = atomic_read(&peer->ip_id_count) & 0xffff;
-		if (peer->tcp_ts_stamp) {
-			ts = peer->tcp_ts;
-			tsage = get_seconds() - peer->tcp_ts_stamp;
-		}
 		expires = ACCESS_ONCE(peer->pmtu_expires);
 		if (expires) {
 			if (time_before(jiffies, expires))
@@ -2942,7 +2938,7 @@ static int rt_fill_info(struct net *net,
 				goto nla_put_failure;
 	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, id, 0, 0,
 			       expires, error) < 0)
 		goto nla_put_failure;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9312a8f33a..d406bf7f37d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	if (tcp_death_row.sysctl_tw_recycle &&
-	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
-		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
-		/*
-		 * VJ's idea. We save last timestamp seen from
-		 * the destination in peer table, when entering state
-		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
-		 * when trying new connection.
-		 */
-		if (peer) {
-			inet_peer_refcheck(peer);
-			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-				tp->rx_opt.ts_recent = peer->tcp_ts;
-			}
-		}
-	}
+	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
+		tcp_fetch_timewait_stamp(sk, &rt->dst);
 
 	inet->inet_dport = usin->sin_port;
 	inet->inet_daddr = daddr;
@@ -1375,7 +1361,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
 		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
-		struct inet_peer *peer = NULL;
 		struct flowi4 fl4;
 
 		/* VJ's idea. We save last timestamp seen
@@ -1390,12 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
 		    (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
-		    fl4.daddr == saddr &&
-		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
-			inet_peer_refcheck(peer);
-			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
-			    (s32)(peer->tcp_ts - req->ts_recent) >
-							TCP_PAWS_WINDOW) {
+		    fl4.daddr == saddr) {
+			if (!tcp_peer_is_proven(req, dst, true)) {
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
 				goto drop_and_release;
 			}
@@ -1404,8 +1385,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		else if (!sysctl_tcp_syncookies &&
 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
 			  (sysctl_max_syn_backlog >> 2)) &&
-			 (!peer || !peer->tcp_ts_stamp) &&
-			 !tcp_peer_is_proven(req, dst)) {
+			 !tcp_peer_is_proven(req, dst, false)) {
 			/* Without syncookies last quarter of
 			 * backlog is filled with destinations,
 			 * proven to be alive.
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 56223bab251..1fd83d3118f 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -34,6 +34,8 @@ struct tcp_metrics_block {
 	struct tcp_metrics_block __rcu	*tcpm_next;
 	struct inetpeer_addr		tcpm_addr;
 	unsigned long			tcpm_stamp;
+	u32				tcpm_ts;
+	u32				tcpm_ts_stamp;
 	u32				tcpm_lock;
 	u32				tcpm_vals[TCP_METRIC_MAX];
 };
@@ -114,6 +116,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
 	tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
 	tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
 	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+	tm->tcpm_ts = 0;
+	tm->tcpm_ts_stamp = 0;
 }
 
 static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -230,6 +234,45 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 	return tm;
 }
 
+static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
+{
+	struct inet6_timewait_sock *tw6;
+	struct tcp_metrics_block *tm;
+	struct inetpeer_addr addr;
+	unsigned int hash;
+	struct net *net;
+
+	addr.family = tw->tw_family;
+	switch (addr.family) {
+	case AF_INET:
+		addr.addr.a4 = tw->tw_daddr;
+		hash = (__force unsigned int) addr.addr.a4;
+		break;
+	case AF_INET6:
+		tw6 = inet6_twsk((struct sock *)tw);
+		*(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
+		hash = ((__force unsigned int) addr.addr.a6[0] ^
+			(__force unsigned int) addr.addr.a6[1] ^
+			(__force unsigned int) addr.addr.a6[2] ^
+			(__force unsigned int) addr.addr.a6[3]);
+		break;
+	default:
+		return NULL;
+	}
+
+	hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8);
+
+	net = twsk_net(tw);
+	hash &= net->ipv4.tcp_metrics_hash_mask;
+
+	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+	     tm = rcu_dereference(tm->tcpm_next)) {
+		if (addr_same(&tm->tcpm_addr, &addr))
+			break;
+	}
+	return tm;
+}
+
 static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 						 struct dst_entry *dst,
 						 bool create)
@@ -496,7 +539,7 @@ reset:
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check)
 {
 	struct tcp_metrics_block *tm;
 	bool ret;
@@ -506,16 +549,99 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
 
 	rcu_read_lock();
 	tm = __tcp_get_metrics_req(req, dst);
-	if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
-		ret = true;
-	else
-		ret = false;
+	if (paws_check) {
+		if (tm &&
+		    (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
+		    (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW)
+			ret = false;
+		else
+			ret = true;
+	} else {
+		if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
+			ret = true;
+		else
+			ret = false;
+	}
 	rcu_read_unlock();
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(tcp_peer_is_proven);
 
+void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
+{
+	struct tcp_metrics_block *tm;
+
+	rcu_read_lock();
+	tm = tcp_get_metrics(sk, dst, true);
+	if (tm) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
+			tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
+			tp->rx_opt.ts_recent = tm->tcpm_ts;
+		}
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
+
+/* VJ's idea. Save last timestamp seen from this destination and hold
+ * it at least for normal timewait interval to use for duplicate
+ * segment detection in subsequent connections, before they enter
+ * synchronized state.
+ */
+bool tcp_remember_stamp(struct sock *sk)
+{
+	struct dst_entry *dst = __sk_dst_get(sk);
+	bool ret = false;
+
+	if (dst) {
+		struct tcp_metrics_block *tm;
+
+		rcu_read_lock();
+		tm = tcp_get_metrics(sk, dst, true);
+		if (tm) {
+			struct tcp_sock *tp = tcp_sk(sk);
+
+			if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
+			    ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
+			     tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
+				tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
+				tm->tcpm_ts = tp->rx_opt.ts_recent;
+			}
+			ret = true;
+		}
+		rcu_read_unlock();
+	}
+	return ret;
+}
+
+bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
+{
+	struct tcp_metrics_block *tm;
+	bool ret = false;
+
+	rcu_read_lock();
+	tm = __tcp_get_metrics_tw(tw);
+	if (tw) {
+		const struct tcp_timewait_sock *tcptw;
+		struct sock *sk = (struct sock *) tw;
+
+		tcptw = tcp_twsk(sk);
+		if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
+		    ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
+		     tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
+			tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
+			tm->tcpm_ts	   = tcptw->tw_ts_recent;
+		}
+		ret = true;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static unsigned long tcpmhash_entries;
 static int __init set_tcpmhash_entries(char *str)
 {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 72b7c63b1a3..a51aa534dab 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -49,52 +49,6 @@ struct inet_timewait_death_row tcp_death_row = {
 };
 EXPORT_SYMBOL_GPL(tcp_death_row);
 
-/* VJ's idea. Save last timestamp seen from this destination
- * and hold it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter synchronized
- * state.
- */
-
-static bool tcp_remember_stamp(struct sock *sk)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct inet_peer *peer;
-
-	peer = icsk->icsk_af_ops->get_peer(sk);
-	if (peer) {
-		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
-		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
-			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
-			peer->tcp_ts = tp->rx_opt.ts_recent;
-		}
-		return true;
-	}
-
-	return false;
-}
-
-static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
-{
-	const struct tcp_timewait_sock *tcptw;
-	struct sock *sk = (struct sock *) tw;
-	struct inet_peer *peer;
-
-	tcptw = tcp_twsk(sk);
-	peer = tcptw->tw_peer;
-	if (peer) {
-		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
-		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
-			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
-			peer->tcp_ts	   = tcptw->tw_ts_recent;
-		}
-		return true;
-	}
-	return false;
-}
-
 static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
 	if (seq == s_win)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6cc6c881f54..0c068475378 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2348,13 +2348,11 @@ static int rt6_fill_node(struct net *net,
 			 int iif, int type, u32 pid, u32 seq,
 			 int prefix, int nowait, unsigned int flags)
 {
-	const struct inet_peer *peer;
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
 	long expires;
 	u32 table;
 	struct neighbour *n;
-	u32 ts, tsage;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2473,16 +2471,7 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	peer = NULL;
-	if (rt6_has_peer(rt))
-		peer = rt6_peer_ptr(rt);
-	ts = tsage = 0;
-	if (peer && peer->tcp_ts_stamp) {
-		ts = peer->tcp_ts;
-		tsage = get_seconds() - peer->tcp_ts_stamp;
-	}
-
-	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
 			       expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 75d179555c2..9e96b5f21d2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	rt = (struct rt6_info *) dst;
 	if (tcp_death_row.sysctl_tw_recycle &&
 	    !tp->rx_opt.ts_recent_stamp &&
-	    ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
-		struct inet_peer *peer = rt6_get_peer(rt);
-		/*
-		 * VJ's idea. We save last timestamp seen from
-		 * the destination in peer table, when entering state
-		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
-		 * when trying new connection.
-		 */
-		if (peer) {
-			inet_peer_refcheck(peer);
-			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-				tp->rx_opt.ts_recent = peer->tcp_ts;
-			}
-		}
-	}
+	    ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
@@ -1134,8 +1120,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		treq->iif = inet6_iif(skb);
 
 	if (!isn) {
-		struct inet_peer *peer = NULL;
-
 		if (ipv6_opt_accepted(sk, skb) ||
 		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1160,14 +1144,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
-		    (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL &&
-		    (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
-		    ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
-				    &treq->rmt_addr)) {
-			inet_peer_refcheck(peer);
-			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
-			    (s32)(peer->tcp_ts - req->ts_recent) >
-							TCP_PAWS_WINDOW) {
+		    (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+			if (!tcp_peer_is_proven(req, dst, true)) {
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
 				goto drop_and_release;
 			}
@@ -1176,8 +1154,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		else if (!sysctl_tcp_syncookies &&
 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
 			  (sysctl_max_syn_backlog >> 2)) &&
-			 (!peer || !peer->tcp_ts_stamp) &&
-			 !tcp_peer_is_proven(req, dst)) {
+			 !tcp_peer_is_proven(req, dst, false)) {
 			/* Without syncookies last quarter of
 			 * backlog is filled with destinations,
 			 * proven to be alive.
-- 
cgit v1.2.3-70-g09d2


From 3e12939a2a67fbb4cbd962c3b9bc398c73319766 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2012 04:01:57 -0700
Subject: inet: Kill FLOWI_FLAG_PRECOW_METRICS.

No longer needed.  TCP writes metrics, but now in it's own special
cache that does not dirty the route metrics.  Therefore there is no
longer any reason to pre-cow metrics in this way.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h              |  5 ++---
 include/net/inet_sock.h         |  2 --
 include/net/route.h             |  2 --
 net/ipv4/inet_connection_sock.c |  2 +-
 net/ipv4/route.c                | 11 ++---------
 net/ipv6/route.c                |  2 +-
 6 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/flow.h b/include/net/flow.h
index bd524f59856..ce9cb7656b4 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -20,9 +20,8 @@ struct flowi_common {
 	__u8	flowic_proto;
 	__u8	flowic_flags;
 #define FLOWI_FLAG_ANYSRC		0x01
-#define FLOWI_FLAG_PRECOW_METRICS	0x02
-#define FLOWI_FLAG_CAN_SLEEP		0x04
-#define FLOWI_FLAG_RT_NOCACHE		0x08
+#define FLOWI_FLAG_CAN_SLEEP		0x02
+#define FLOWI_FLAG_RT_NOCACHE		0x04
 	__u32	flowic_secid;
 };
 
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index ae17e1352d7..924d7b98ab6 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 
 	if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
 		flags |= FLOWI_FLAG_ANYSRC;
-	if (sk->sk_protocol == IPPROTO_TCP)
-		flags |= FLOWI_FLAG_PRECOW_METRICS;
 	return flags;
 }
 
diff --git a/include/net/route.h b/include/net/route.h
index 211e2665139..635d7a99d19 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -278,8 +278,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 
 	if (inet_sk(sk)->transparent)
 		flow_flags |= FLOWI_FLAG_ANYSRC;
-	if (protocol == IPPROTO_TCP)
-		flow_flags |= FLOWI_FLAG_PRECOW_METRICS;
 	if (can_sleep)
 		flow_flags |= FLOWI_FLAG_CAN_SLEEP;
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 034ddbe42ad..76825be3b64 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct ip_options_rcu *opt = inet_rsk(req)->opt;
 	struct net *net = sock_net(sk);
-	int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS;
+	int flags = inet_sk_flowi_flags(sk);
 
 	if (nocache)
 		flags |= FLOWI_FLAG_RT_NOCACHE;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e376354dcb6..d4834e2914a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1658,7 +1658,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
 	struct rtable *rt;
 
 	flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
-			   protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS,
+			   protocol, flow_flags,
 			   iph->daddr, iph->saddr, 0, 0);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
@@ -1836,18 +1836,11 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 {
 	struct inet_peer_base *base;
 	struct inet_peer *peer;
-	int create = 0;
-
-	/* If a peer entry exists for this destination, we must hook
-	 * it up in order to get at cached metrics.
-	 */
-	if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
-		create = 1;
 
 	base = inetpeer_base_ptr(rt->_peer);
 	BUG_ON(!base);
 
-	peer = inet_getpeer_v4(base, rt->rt_dst, create);
+	peer = inet_getpeer_v4(base, rt->rt_dst, 0);
 	if (peer) {
 		__rt_set_peer(rt, peer);
 		rt->rt_peer_genid = rt_peer_genid();
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0c068475378..b7eb51e1a0e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1093,7 +1093,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
 	fl6.flowi6_mark = mark;
-	fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
+	fl6.flowi6_flags = 0;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
-- 
cgit v1.2.3-70-g09d2


From 87a50699cb6d169591cc776fb82683a2c77cecac Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2012 05:06:14 -0700
Subject: rtnetlink: Remove ts/tsage args to rtnl_put_cacheinfo().

Nobody provides non-zero values any longer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 3 +--
 net/core/rtnetlink.c      | 4 +---
 net/decnet/dn_route.c     | 2 +-
 net/ipv4/route.c          | 3 +--
 net/ipv6/route.c          | 3 +--
 5 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ea60b085410..db71c4ad862 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -619,8 +619,7 @@ extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
 extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
-			      u32 id, u32 ts, u32 tsage, long expires,
-			      u32 error);
+			      u32 id, long expires, u32 error);
 
 extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2b325c340b4..64127eee786 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -615,7 +615,7 @@ nla_put_failure:
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 
 int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
-		       u32 ts, u32 tsage, long expires, u32 error)
+		       long expires, u32 error)
 {
 	struct rta_cacheinfo ci = {
 		.rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
@@ -623,8 +623,6 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 		.rta_clntref = atomic_read(&(dst->__refcnt)),
 		.rta_error = error,
 		.rta_id =  id,
-		.rta_ts = ts,
-		.rta_tsage = tsage,
 	};
 
 	if (expires)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 707027fae8a..b5594cc73ee 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1590,7 +1590,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		goto errout;
 
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
-	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
 			       rt->dst.error) < 0)
 		goto errout;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d4834e2914a..67b08745daf 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2931,8 +2931,7 @@ static int rt_fill_info(struct net *net,
 				goto nla_put_failure;
 	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->dst, id, 0, 0,
-			       expires, error) < 0)
+	if (rtnl_put_cacheinfo(skb, &rt->dst, id, expires, error) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b7eb51e1a0e..563f12c1c99 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2471,8 +2471,7 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
-			       expires, rt->dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
-- 
cgit v1.2.3-70-g09d2


From e8599ff4b1d6b0d61e1074ae4ba9fca8dd0c41d0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Jul 2012 23:43:53 -0700
Subject: ipv6: Move bulk of redirect handling into rt6_redirect().

This sets things up so that we can have the protocol error handlers
call down into the ipv6 route code for redirects just as ipv4 already
does.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  7 +----
 net/ipv6/ndisc.c        | 72 +----------------------------------------------
 net/ipv6/route.c        | 75 +++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 72 insertions(+), 82 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 58cb3fc3487..5cedbd7688c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,12 +133,7 @@ extern int			rt6_route_rcv(struct net_device *dev,
 					      u8 *opt, int len,
 					      const struct in6_addr *gwaddr);
 
-extern void			rt6_redirect(const struct in6_addr *dest,
-					     const struct in6_addr *src,
-					     const struct in6_addr *saddr,
-					     struct neighbour *neigh,
-					     u8 *lladdr,
-					     int on_link);
+extern void			rt6_redirect(struct sk_buff *skb);
 
 extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 			    int oif, u32 mark);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a3189baa9f4..b8d53e186a7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -143,8 +143,6 @@ struct neigh_table nd_tbl = {
 	.gc_thresh3 =	1024,
 };
 
-#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-
 static inline int ndisc_opt_addr_space(struct net_device *dev)
 {
 	return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
@@ -1336,16 +1334,6 @@ out:
 
 static void ndisc_redirect_rcv(struct sk_buff *skb)
 {
-	struct inet6_dev *in6_dev;
-	struct icmp6hdr *icmph;
-	const struct in6_addr *dest;
-	const struct in6_addr *target;	/* new first hop to destination */
-	struct neighbour *neigh;
-	int on_link = 0;
-	struct ndisc_options ndopts;
-	int optlen;
-	u8 *lladdr = NULL;
-
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	switch (skb->ndisc_nodetype) {
 	case NDISC_NODETYPE_HOST:
@@ -1362,65 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	optlen = skb->tail - skb->transport_header;
-	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
-
-	if (optlen < 0) {
-		ND_PRINTK(2, warn, "Redirect: packet too short\n");
-		return;
-	}
-
-	icmph = icmp6_hdr(skb);
-	target = (const struct in6_addr *) (icmph + 1);
-	dest = target + 1;
-
-	if (ipv6_addr_is_multicast(dest)) {
-		ND_PRINTK(2, warn,
-			  "Redirect: destination address is multicast\n");
-		return;
-	}
-
-	if (ipv6_addr_equal(dest, target)) {
-		on_link = 1;
-	} else if (ipv6_addr_type(target) !=
-		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
-		ND_PRINTK(2, warn,
-			  "Redirect: target address is not link-local unicast\n");
-		return;
-	}
-
-	in6_dev = __in6_dev_get(skb->dev);
-	if (!in6_dev)
-		return;
-	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
-		return;
-
-	/* RFC2461 8.1:
-	 *	The IP source address of the Redirect MUST be the same as the current
-	 *	first-hop router for the specified ICMP Destination Address.
-	 */
-
-	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
-		ND_PRINTK(2, warn, "Redirect: invalid ND options\n");
-		return;
-	}
-	if (ndopts.nd_opts_tgt_lladdr) {
-		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
-					     skb->dev);
-		if (!lladdr) {
-			ND_PRINTK(2, warn,
-				  "Redirect: invalid link-layer address length\n");
-			return;
-		}
-	}
-
-	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
-	if (neigh) {
-		rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
-			     &ipv6_hdr(skb)->saddr, neigh, lladdr,
-			     on_link);
-		neigh_release(neigh);
-	}
+	rt6_redirect(skb);
 }
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 563f12c1c99..73cf3f78aaa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1690,14 +1690,78 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
 						   flags, __ip6_route_redirect);
 }
 
-void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
-		  const struct in6_addr *saddr,
-		  struct neighbour *neigh, u8 *lladdr, int on_link)
+void rt6_redirect(struct sk_buff *skb)
 {
-	struct rt6_info *rt, *nrt = NULL;
+	struct net *net = dev_net(skb->dev);
 	struct netevent_redirect netevent;
-	struct net *net = dev_net(neigh->dev);
+	struct rt6_info *rt, *nrt = NULL;
+	const struct in6_addr *target;
 	struct neighbour *old_neigh;
+	const struct in6_addr *dest;
+	const struct in6_addr *src;
+	const struct in6_addr *saddr;
+	struct ndisc_options ndopts;
+	struct inet6_dev *in6_dev;
+	struct neighbour *neigh;
+	struct icmp6hdr *icmph;
+	int on_link, optlen;
+	u8 *lladdr = NULL;
+
+	optlen = skb->tail - skb->transport_header;
+	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+
+	if (optlen < 0) {
+		net_dbg_ratelimited("rt6_redirect: packet too short\n");
+		return;
+	}
+
+	icmph = icmp6_hdr(skb);
+	target = (const struct in6_addr *) (icmph + 1);
+	dest = target + 1;
+
+	if (ipv6_addr_is_multicast(dest)) {
+		net_dbg_ratelimited("rt6_redirect: destination address is multicast\n");
+		return;
+	}
+
+	if (ipv6_addr_equal(dest, target)) {
+		on_link = 1;
+	} else if (ipv6_addr_type(target) !=
+		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+		net_dbg_ratelimited("rt6_redirect: target address is not link-local unicast\n");
+		return;
+	}
+
+	in6_dev = __in6_dev_get(skb->dev);
+	if (!in6_dev)
+		return;
+	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+		return;
+
+	/* RFC2461 8.1:
+	 *	The IP source address of the Redirect MUST be the same as the current
+	 *	first-hop router for the specified ICMP Destination Address.
+	 */
+
+	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
+		return;
+	}
+	if (ndopts.nd_opts_tgt_lladdr) {
+		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+					     skb->dev);
+		if (!lladdr) {
+			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
+			return;
+		}
+	}
+
+	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+	if (!neigh)
+		return;
+
+	src = &ipv6_hdr(skb)->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
 
 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
 
@@ -1756,6 +1820,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 	}
 
 out:
+	neigh_release(neigh);
 	dst_release(&rt->dst);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 6e157b6ac61aa7758ccd643d4aafdf3cc17b9f04 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jul 2012 00:05:02 -0700
Subject: ipv6: Pull main logic of rt6_redirect() into rt6_do_redirect().

Hook it into dst_ops->redirect as well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 80 ++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 31 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 73cf3f78aaa..545b1526c14 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -79,6 +79,7 @@ static int		ip6_pkt_discard(struct sk_buff *skb);
 static int		ip6_pkt_discard_out(struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void		rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -174,6 +175,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.negative_advice	=	ip6_negative_advice,
 	.link_failure		=	ip6_link_failure,
 	.update_pmtu		=	ip6_rt_update_pmtu,
+	.redirect		=	rt6_do_redirect,
 	.local_out		=	__ip6_local_out,
 	.neigh_lookup		=	ip6_neigh_lookup,
 };
@@ -1690,28 +1692,26 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
 						   flags, __ip6_route_redirect);
 }
 
-void rt6_redirect(struct sk_buff *skb)
+static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
 	struct netevent_redirect netevent;
 	struct rt6_info *rt, *nrt = NULL;
 	const struct in6_addr *target;
-	struct neighbour *old_neigh;
-	const struct in6_addr *dest;
-	const struct in6_addr *src;
-	const struct in6_addr *saddr;
 	struct ndisc_options ndopts;
+	const struct in6_addr *dest;
+	struct neighbour *old_neigh;
 	struct inet6_dev *in6_dev;
 	struct neighbour *neigh;
 	struct icmp6hdr *icmph;
-	int on_link, optlen;
-	u8 *lladdr = NULL;
+	int optlen, on_link;
+	u8 *lladdr;
 
 	optlen = skb->tail - skb->transport_header;
 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
 
 	if (optlen < 0) {
-		net_dbg_ratelimited("rt6_redirect: packet too short\n");
+		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
 		return;
 	}
 
@@ -1720,15 +1720,16 @@ void rt6_redirect(struct sk_buff *skb)
 	dest = target + 1;
 
 	if (ipv6_addr_is_multicast(dest)) {
-		net_dbg_ratelimited("rt6_redirect: destination address is multicast\n");
+		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
 		return;
 	}
 
+	on_link = 0;
 	if (ipv6_addr_equal(dest, target)) {
 		on_link = 1;
 	} else if (ipv6_addr_type(target) !=
 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
-		net_dbg_ratelimited("rt6_redirect: target address is not link-local unicast\n");
+		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
 		return;
 	}
 
@@ -1747,6 +1748,8 @@ void rt6_redirect(struct sk_buff *skb)
 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
 		return;
 	}
+
+	lladdr = NULL;
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
 					     skb->dev);
@@ -1756,19 +1759,26 @@ void rt6_redirect(struct sk_buff *skb)
 		}
 	}
 
-	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
-	if (!neigh)
+	rt = (struct rt6_info *) dst;
+	if (rt == net->ipv6.ip6_null_entry) {
+		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
 		return;
+	}
 
-	src = &ipv6_hdr(skb)->daddr;
-	saddr = &ipv6_hdr(skb)->saddr;
+	/* Redirect received -> path was valid.
+	 * Look, redirects are sent only in response to data packets,
+	 * so that this nexthop apparently is reachable. --ANK
+	 */
+	dst_confirm(&rt->dst);
 
-	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+	if (!neigh)
+		return;
 
-	if (rt == net->ipv6.ip6_null_entry) {
-		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
+	/* Duplicate redirect: silently ignore. */
+	old_neigh = rt->n;
+	if (neigh == old_neigh)
 		goto out;
-	}
 
 	/*
 	 *	We have finally decided to accept it.
@@ -1781,18 +1791,6 @@ void rt6_redirect(struct sk_buff *skb)
 				     NEIGH_UPDATE_F_ISROUTER))
 		     );
 
-	/*
-	 * Redirect received -> path was valid.
-	 * Look, redirects are sent only in response to data packets,
-	 * so that this nexthop apparently is reachable. --ANK
-	 */
-	dst_confirm(&rt->dst);
-
-	/* Duplicate redirect: silently ignore. */
-	old_neigh = rt->n;
-	if (neigh == old_neigh)
-		goto out;
-
 	nrt = ip6_rt_copy(rt, dest);
 	if (!nrt)
 		goto out;
@@ -1815,12 +1813,32 @@ void rt6_redirect(struct sk_buff *skb)
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags & RTF_CACHE) {
+		rt = (struct rt6_info *) dst_clone(&rt->dst);
 		ip6_del_rt(rt);
-		return;
 	}
 
 out:
 	neigh_release(neigh);
+}
+
+void rt6_redirect(struct sk_buff *skb)
+{
+	const struct in6_addr *target;
+	const struct in6_addr *dest;
+	const struct in6_addr *src;
+	const struct in6_addr *saddr;
+	struct icmp6hdr *icmph;
+	struct rt6_info *rt;
+
+	icmph = icmp6_hdr(skb);
+	target = (const struct in6_addr *) (icmph + 1);
+	dest = target + 1;
+
+	src = &ipv6_hdr(skb)->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
+
+	rt = ip6_route_redirect(dest, src, saddr, skb->dev);
+	rt6_do_redirect(&rt->dst, skb);
 	dst_release(&rt->dst);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 3a5ad2ee5e2c5030d8a303d06f9148a2f893a369 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jul 2012 00:08:07 -0700
Subject: ipv6: Add ip6_redirect() and ip6_sk_redirect() helper functions.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  2 ++
 net/ipv6/route.c        | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 5cedbd7688c..693940565d8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -139,6 +139,8 @@ extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 			    int oif, u32 mark);
 extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
 			       __be32 mtu);
+extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
+extern void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);
 
 struct netlink_callback;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 545b1526c14..f52cf83bb1e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1114,6 +1114,33 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
 
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+{
+	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark;
+	fl6.flowi6_flags = 0;
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (!dst->error)
+		rt6_do_redirect(dst, skb);
+	dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_redirect);
+
+void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+
 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
 	struct net_device *dev = dst->dev;
-- 
cgit v1.2.3-70-g09d2


From b94f1c0904da9b8bf031667afc48080ba7c3e8c9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jul 2012 00:33:37 -0700
Subject: ipv6: Use icmpv6_notify() to propagate redirect, instead of
 rt6_redirect().

And delete rt6_redirect(), since it is no longer used.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |   2 -
 include/net/ipv6.h      |   2 +
 net/ipv6/icmp.c         |   2 +-
 net/ipv6/ndisc.c        |   2 +-
 net/ipv6/route.c        | 107 ------------------------------------------------
 5 files changed, 4 insertions(+), 111 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 693940565d8..b6b6f7d6f3c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,8 +133,6 @@ extern int			rt6_route_rcv(struct net_device *dev,
 					      u8 *opt, int len,
 					      const struct in6_addr *gwaddr);
 
-extern void			rt6_redirect(struct sk_buff *skb);
-
 extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 			    int oif, u32 mark);
 extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d4261d4d6c4..f695f39e892 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -251,6 +251,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
 		atomic_dec(&fl->users);
 }
 
+extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);
+
 extern int 			ip6_ra_control(struct sock *sk, int sel);
 
 extern int			ipv6_parse_hopopts(struct sk_buff *skb);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a113f7d7e93..24d69dbca4d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -598,7 +598,7 @@ out:
 	icmpv6_xmit_unlock(sk);
 }
 
-static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
 	const struct inet6_protocol *ipprot;
 	int inner_offset;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b8d53e186a7..ff36194a71a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1350,7 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	rt6_redirect(skb);
+	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
 }
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f52cf83bb1e..7296af144d6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1633,92 +1633,6 @@ static int ip6_route_del(struct fib6_config *cfg)
 	return err;
 }
 
-/*
- *	Handle redirects
- */
-struct ip6rd_flowi {
-	struct flowi6 fl6;
-	struct in6_addr gateway;
-};
-
-static struct rt6_info *__ip6_route_redirect(struct net *net,
-					     struct fib6_table *table,
-					     struct flowi6 *fl6,
-					     int flags)
-{
-	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt;
-	struct fib6_node *fn;
-
-	/*
-	 * Get the "current" route for this destination and
-	 * check if the redirect has come from approriate router.
-	 *
-	 * RFC 2461 specifies that redirects should only be
-	 * accepted if they come from the nexthop to the target.
-	 * Due to the way the routes are chosen, this notion
-	 * is a bit fuzzy and one might need to check all possible
-	 * routes.
-	 */
-
-	read_lock_bh(&table->tb6_lock);
-	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
-restart:
-	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-		/*
-		 * Current route is on-link; redirect is always invalid.
-		 *
-		 * Seems, previous statement is not true. It could
-		 * be node, which looks for us as on-link (f.e. proxy ndisc)
-		 * But then router serving it might decide, that we should
-		 * know truth 8)8) --ANK (980726).
-		 */
-		if (rt6_check_expired(rt))
-			continue;
-		if (!(rt->rt6i_flags & RTF_GATEWAY))
-			continue;
-		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
-			continue;
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
-			continue;
-		break;
-	}
-
-	if (!rt)
-		rt = net->ipv6.ip6_null_entry;
-	BACKTRACK(net, &fl6->saddr);
-out:
-	dst_hold(&rt->dst);
-
-	read_unlock_bh(&table->tb6_lock);
-
-	return rt;
-};
-
-static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
-					   const struct in6_addr *src,
-					   const struct in6_addr *gateway,
-					   struct net_device *dev)
-{
-	int flags = RT6_LOOKUP_F_HAS_SADDR;
-	struct net *net = dev_net(dev);
-	struct ip6rd_flowi rdfl = {
-		.fl6 = {
-			.flowi6_oif = dev->ifindex,
-			.daddr = *dest,
-			.saddr = *src,
-		},
-	};
-
-	rdfl.gateway = *gateway;
-
-	if (rt6_need_strict(dest))
-		flags |= RT6_LOOKUP_F_IFACE;
-
-	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
-						   flags, __ip6_route_redirect);
-}
-
 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
@@ -1848,27 +1762,6 @@ out:
 	neigh_release(neigh);
 }
 
-void rt6_redirect(struct sk_buff *skb)
-{
-	const struct in6_addr *target;
-	const struct in6_addr *dest;
-	const struct in6_addr *src;
-	const struct in6_addr *saddr;
-	struct icmp6hdr *icmph;
-	struct rt6_info *rt;
-
-	icmph = icmp6_hdr(skb);
-	target = (const struct in6_addr *) (icmph + 1);
-	dest = target + 1;
-
-	src = &ipv6_hdr(skb)->daddr;
-	saddr = &ipv6_hdr(skb)->saddr;
-
-	rt = ip6_route_redirect(dest, src, saddr, skb->dev);
-	rt6_do_redirect(&rt->dst, skb);
-	dst_release(&rt->dst);
-}
-
 /*
  *	Misc support functions
  */
-- 
cgit v1.2.3-70-g09d2


From b587ee3ba21f58b7770a132e6bca5c6658ac5095 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jul 2012 00:39:24 -0700
Subject: net: Add dummy dst_ops->redirect method where needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 5 +++++
 net/decnet/dn_route.c     | 6 ++++++
 net/ipv4/route.c          | 5 +++++
 net/ipv6/route.c          | 5 +++++
 4 files changed, 21 insertions(+)

(limited to 'net/ipv6/route.c')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index b98d3d78ca7..81f76c402cf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -115,6 +115,10 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
 
+static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb)
+{
+}
+
 static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
 {
 	return NULL;
@@ -136,6 +140,7 @@ static struct dst_ops fake_dst_ops = {
 	.family =		AF_INET,
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.update_pmtu =		fake_update_pmtu,
+	.redirect =		fake_redirect,
 	.cow_metrics =		fake_cow_metrics,
 	.neigh_lookup =		fake_neigh_lookup,
 	.mtu =			fake_mtu,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b5594cc73ee..e9c4e2e864c 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -118,6 +118,7 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb);
 static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
 					     struct sk_buff *skb,
 					     const void *daddr);
@@ -145,6 +146,7 @@ static struct dst_ops dn_dst_ops = {
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
+	.redirect =		dn_dst_redirect,
 	.neigh_lookup =		dn_dst_neigh_lookup,
 };
 
@@ -292,6 +294,10 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 	}
 }
 
+static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb)
+{
+}
+
 /*
  * When a route has been marked obsolete. (e.g. routing cache flush)
  */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e98207dcd08..23bbe29b3bb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2591,6 +2591,10 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
 
+static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+{
+}
+
 static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
 					  unsigned long old)
 {
@@ -2605,6 +2609,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.mtu			=	ipv4_blackhole_mtu,
 	.default_advmss		=	ipv4_default_advmss,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
+	.redirect		=	ipv4_rt_blackhole_redirect,
 	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
 	.neigh_lookup		=	ipv4_neigh_lookup,
 };
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7296af144d6..3151aabff5f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -191,6 +191,10 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
 
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+{
+}
+
 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 					 unsigned long old)
 {
@@ -205,6 +209,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.mtu			=	ip6_blackhole_mtu,
 	.default_advmss		=	ip6_default_advmss,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
+	.redirect		=	ip6_rt_blackhole_redirect,
 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 	.neigh_lookup		=	ip6_neigh_lookup,
 };
-- 
cgit v1.2.3-70-g09d2


From 8104891b86b212de77063660c0c062b427526fa6 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 5 Jul 2012 23:37:09 +0000
Subject: ipv6: Initialize the struct rt6_info behind the dst_enty field

We start initializing the struct rt6_info at the first field
behind the struct dst_enty. This is error prone because it
might leave a new field uninitialized. So start initializing
the struct rt6_info right behind the dst_entry.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3151aabff5f..2a4c8d48977 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -280,8 +280,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 					0, 0, flags);
 
 	if (rt) {
-		memset(&rt->n, 0,
-		       sizeof(*rt) - sizeof(struct dst_entry));
+		struct dst_entry *dst = &rt->dst;
+
+		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 	}
 	return rt;
@@ -982,11 +983,11 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 
 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 	if (rt) {
-		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
-		rt6_init_peer(rt, net->ipv6.peers);
-
 		new = &rt->dst;
 
+		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+		rt6_init_peer(rt, net->ipv6.peers);
+
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
-- 
cgit v1.2.3-70-g09d2


From f0396f60d7c165018c9b203fb9b89fb224835578 Mon Sep 17 00:00:00 2001
From: Denis Ovsienko <infrastation@yandex.ru>
Date: Tue, 10 Jul 2012 04:45:50 +0000
Subject: ipv6: fix RTPROT_RA markup of RA routes w/nexthops

Userspace implementations of network routing protocols sometimes need to
tell RA-originated IPv6 routes from other kernel routes to make proper
routing decisions. This makes most sense for RA routes with nexthops,
namely, default routes and Route Information routes.

The intended mean of preserving RA route origin in a netlink message is
through indicating RTPROT_RA as protocol code. Function rt6_fill_node()
tried to do that for default routes, but its test condition was taken
wrong. This change is modeled after the original mailing list posting
by Jeff Haran. It fixes the test condition for default route case and
sets the same behaviour for Route Information case (both types use
nexthops). Handling of the 3rd RA route type, Prefix Information, is
left unchanged, as it stands for interface connected routes (without
nexthops).

Signed-off-by: Denis Ovsienko <infrastation@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2a4c8d48977..412fad809a3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2399,10 +2399,12 @@ static int rt6_fill_node(struct net *net,
 	rtm->rtm_protocol = rt->rt6i_protocol;
 	if (rt->rt6i_flags & RTF_DYNAMIC)
 		rtm->rtm_protocol = RTPROT_REDIRECT;
-	else if (rt->rt6i_flags & RTF_ADDRCONF)
-		rtm->rtm_protocol = RTPROT_KERNEL;
-	else if (rt->rt6i_flags & RTF_DEFAULT)
-		rtm->rtm_protocol = RTPROT_RA;
+	else if (rt->rt6i_flags & RTF_ADDRCONF) {
+		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
+			rtm->rtm_protocol = RTPROT_RA;
+		else
+			rtm->rtm_protocol = RTPROT_KERNEL;
+	}
 
 	if (rt->rt6i_flags & RTF_CACHE)
 		rtm->rtm_flags |= RTM_F_CLONED;
-- 
cgit v1.2.3-70-g09d2


From 6700c2709c08d74ae2c3c29b84a30da012dbc7f1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jul 2012 03:29:28 -0700
Subject: net: Pass optional SKB and SK arguments to
 dst_ops->{update_pmtu,redirect}()

This will be used so that we can compose a full flow key.

Even though we have a route in this context, we need more.  In the
future the routes will be without destination address, source address,
etc. keying.  One ipv4 route will cover entire subnets, etc.

In this environment we have to have a way to possess persistent storage
for redirects and PMTU information.  This persistent storage will exist
in the FIB tables, and that's why we'll need to be able to rebuild a
full lookup flow key here.  Using that flow key will do a fib_lookup()
and create/update the persistent entry.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |  2 +-
 include/net/dst_ops.h                   |  6 ++++--
 net/bridge/br_netfilter.c               |  6 ++++--
 net/dccp/ipv4.c                         |  2 +-
 net/dccp/ipv6.c                         |  2 +-
 net/decnet/dn_route.c                   | 12 ++++++++----
 net/ipv4/inet_connection_sock.c         |  2 +-
 net/ipv4/ip_gre.c                       |  2 +-
 net/ipv4/ipip.c                         |  2 +-
 net/ipv4/route.c                        | 21 +++++++++++++--------
 net/ipv4/tcp_ipv4.c                     |  2 +-
 net/ipv4/xfrm4_policy.c                 | 10 ++++++----
 net/ipv6/inet6_connection_sock.c        |  2 +-
 net/ipv6/ip6_tunnel.c                   |  6 +++---
 net/ipv6/route.c                        | 21 +++++++++++++--------
 net/ipv6/sit.c                          |  2 +-
 net/ipv6/tcp_ipv6.c                     |  2 +-
 net/ipv6/xfrm6_policy.c                 | 10 ++++++----
 net/netfilter/ipvs/ip_vs_xmit.c         |  4 ++--
 net/sctp/input.c                        |  2 +-
 net/sctp/transport.c                    |  2 +-
 21 files changed, 71 insertions(+), 49 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 014504d8e43..1ca732201f3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
 	int e = skb_queue_empty(&priv->cm.skb_queue);
 
 	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 	skb_queue_tail(&priv->cm.skb_queue, skb);
 	if (e)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 085931fa7ce..d079fc61c12 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -24,8 +24,10 @@ struct dst_ops {
 					  struct net_device *dev, int how);
 	struct dst_entry *	(*negative_advice)(struct dst_entry *);
 	void			(*link_failure)(struct sk_buff *);
-	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
-	void			(*redirect)(struct dst_entry *dst, struct sk_buff *skb);
+	void			(*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+					       struct sk_buff *skb, u32 mtu);
+	void			(*redirect)(struct dst_entry *dst, struct sock *sk,
+					    struct sk_buff *skb);
 	int			(*local_out)(struct sk_buff *skb);
 	struct neighbour *	(*neigh_lookup)(const struct dst_entry *dst,
 						struct sk_buff *skb,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 81f76c402cf..68e8f364bbf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
 	 pppoe_proto(skb) == htons(PPP_IPV6) && \
 	 brnf_filter_pppoe_tagged)
 
-static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			     struct sk_buff *skb, u32 mtu)
 {
 }
 
-static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+			  struct sk_buff *skb)
 {
 }
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 683902fcc8e..ab4f44c9bb2 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -193,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
 	struct dst_entry *dst = __sk_dst_check(sk, 0);
 
 	if (dst)
-		dst->ops->redirect(dst, skb);
+		dst->ops->redirect(dst, sk, skb);
 }
 
 /*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3ee0342e1ce..56840b249f3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
 		if (dst)
-			dst->ops->redirect(dst, skb);
+			dst->ops->redirect(dst, sk, skb);
 	}
 
 	if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index e9c4e2e864c..47de90d8fe9 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *);
 static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
-static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
-static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb);
+static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			       struct sk_buff *skb , u32 mtu);
+static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+			    struct sk_buff *skb);
 static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
 					     struct sk_buff *skb,
 					     const void *daddr);
@@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops)
  * We update both the mtu and the advertised mss (i.e. the segment size we
  * advertise to the other end).
  */
-static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			       struct sk_buff *skb, u32 mtu)
 {
 	struct dn_route *rt = (struct dn_route *) dst;
 	struct neighbour *n = rt->n;
@@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 	}
 }
 
-static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+			    struct sk_buff *skb)
 {
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 200d2180937..3ea465286a3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -840,7 +840,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
 		if (!dst)
 			goto out;
 	}
-	dst->ops->update_pmtu(dst, mtu);
+	dst->ops->update_pmtu(dst, sk, NULL, mtu);
 
 	dst = __sk_dst_check(sk, 0);
 	if (!dst)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0c3123566d7..42c44b1403c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
 	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c2d0e6d8baa..2c2c35bace7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		if (skb_dst(skb))
-			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 		if ((old_iph->frag_off & htons(IP_DF)) &&
 		    mtu < ntohs(old_iph->tot_len)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index aad21819316..b35d3bfc66c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -148,8 +148,10 @@ static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
-static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
-static void		 ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
+static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+					   struct sk_buff *skb, u32 mtu);
+static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
+					struct sk_buff *skb);
 static int rt_garbage_collect(struct dst_ops *ops);
 
 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1273,7 +1275,7 @@ static void rt_del(unsigned int hash, struct rtable *rt)
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
 
-static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 {
 	__be32 new_gw = icmp_hdr(skb)->un.gateway;
 	__be32 old_gw = ip_hdr(skb)->saddr;
@@ -1506,7 +1508,8 @@ out:	kfree_skb(skb);
 	return 0;
 }
 
-static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			      struct sk_buff *skb, u32 mtu)
 {
 	struct rtable *rt = (struct rtable *) dst;
 
@@ -1531,7 +1534,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
 			   iph->daddr, iph->saddr, 0, 0);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
-		ip_rt_update_pmtu(&rt->dst, mtu);
+		ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu);
 		ip_rt_put(rt);
 	}
 }
@@ -1559,7 +1562,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
 			   protocol, flow_flags, iph->daddr, iph->saddr, 0, 0);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
-		ip_do_redirect(&rt->dst, skb);
+		ip_do_redirect(&rt->dst, NULL, skb);
 		ip_rt_put(rt);
 	}
 }
@@ -2587,11 +2590,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 	return mtu ? : dst->dev->mtu;
 }
 
-static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+					  struct sk_buff *skb, u32 mtu)
 {
 }
 
-static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+				       struct sk_buff *skb)
 {
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b8e7e059540..d9caf5c07aa 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -319,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
 	struct dst_entry *dst = __sk_dst_check(sk, 0);
 
 	if (dst)
-		dst->ops->redirect(dst, skb);
+		dst->ops->redirect(dst, sk, skb);
 }
 
 /*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 737131cef37..fcf7678bc00 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
 }
 
-static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			      struct sk_buff *skb, u32 mtu)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->update_pmtu(path, mtu);
+	path->ops->update_pmtu(path, sk, skb, mtu);
 }
 
-static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+			   struct sk_buff *skb)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->redirect(path, skb);
+	path->ops->redirect(path, sk, skb);
 }
 
 static void xfrm4_dst_destroy(struct dst_entry *dst)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 62539a4b2dc..4a0c4d2d8b0 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -269,7 +269,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
 
 	if (IS_ERR(dst))
 		return NULL;
-	dst->ops->update_pmtu(dst, mtu);
+	dst->ops->update_pmtu(dst, sk, NULL, mtu);
 
 	return inet6_csk_route_socket(sk);
 }
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 61d10659729..db328466796 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (rel_info > dst_mtu(skb_dst(skb2)))
 			goto out;
 
-		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
+		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
 	}
 	if (rel_type == ICMP_REDIRECT)
-		skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2);
+		skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
 
 	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
 
@@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 	if (skb->len > mtu) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2a4c8d48977..31af1ed6c1d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,8 +78,10 @@ static int		 ip6_dst_gc(struct dst_ops *ops);
 static int		ip6_pkt_discard(struct sk_buff *skb);
 static int		ip6_pkt_discard_out(struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
-static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
-static void		rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
+static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+					   struct sk_buff *skb, u32 mtu);
+static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+					struct sk_buff *skb);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 	return mtu ? : dst->dev->mtu;
 }
 
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+					 struct sk_buff *skb, u32 mtu)
 {
 }
 
-static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+				      struct sk_buff *skb)
 {
 }
 
@@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb)
 	}
 }
 
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			       struct sk_buff *skb, u32 mtu)
 {
 	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
@@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
-		ip6_rt_update_pmtu(dst, ntohl(mtu));
+		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
 	dst_release(dst);
 }
 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
-		rt6_do_redirect(dst, skb);
+		rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
 EXPORT_SYMBOL_GPL(ip6_redirect);
@@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 	return err;
 }
 
-static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
 	struct netevent_redirect netevent;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fbf1622fdee..3bd1bfc01f8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		}
 
 		if (tunnel->parms.iph.daddr && skb_dst(skb))
-			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 		if (skb->len > mtu) {
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ecdf241cad0..c9dabdd832d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
 		if (dst)
-			dst->ops->redirect(dst,skb);
+			dst->ops->redirect(dst, sk, skb);
 	}
 
 	if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f5a9cb8257b..ef39812107b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
 }
 
-static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			      struct sk_buff *skb, u32 mtu)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->update_pmtu(path, mtu);
+	path->ops->update_pmtu(path, sk, skb, mtu);
 }
 
-static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+			   struct sk_buff *skb)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->redirect(path, skb);
+	path->ops->redirect(path, sk, skb);
 }
 
 static void xfrm6_dst_destroy(struct dst_entry *dst)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 71d6ecb6592..65b616ae171 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_put;
 	}
 	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 	df |= (old_iph->frag_off & htons(IP_DF));
 
@@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_put;
 	}
 	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
 	    !skb_is_gso(skb)) {
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a67bc31f49f..c201b26879a 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
 		return;
 	dst = sctp_transport_dst_check(t);
 	if (dst)
-		dst->ops->redirect(dst, skb);
+		dst->ops->redirect(dst, sk, skb);
 }
 
 /*
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index e69e1a2175a..a6b7ee9ce28 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -249,7 +249,7 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p
 		t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
 
 	if (dst) {
-		dst->ops->update_pmtu(dst, pmtu);
+		dst->ops->update_pmtu(dst, sk, NULL, pmtu);
 
 		dst = sctp_transport_dst_check(t);
 		if (!dst)
-- 
cgit v1.2.3-70-g09d2


From f5b0a8743601a4477419171f5046bd07d1c080a0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 19 Jul 2012 12:31:33 -0700
Subject: net: Document dst->obsolete better.

Add a big comment explaining how the field works, and use defines
instead of magic constants for the values assigned to it.

Suggested by Joe Perches.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h      | 14 +++++++++++++-
 net/core/dst.c         |  4 ++--
 net/decnet/dn_route.c  |  4 ++--
 net/ipv4/route.c       |  5 +++--
 net/ipv6/route.c       |  4 ++--
 net/sctp/transport.c   |  2 +-
 net/xfrm/xfrm_policy.c | 23 ++++++++++++-----------
 7 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'net/ipv6/route.c')

diff --git a/include/net/dst.h b/include/net/dst.h
index 51610468c63..0df661a0c29 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -65,7 +65,19 @@ struct dst_entry {
 	unsigned short		pending_confirm;
 
 	short			error;
+
+	/* A non-zero value of dst->obsolete forces by-hand validation
+	 * of the route entry.  Positive values are set by the generic
+	 * dst layer to indicate that the entry has been forcefully
+	 * destroyed.
+	 *
+	 * Negative values are used by the implementation layer code to
+	 * force invocation of the dst_ops->check() method.
+	 */
 	short			obsolete;
+#define DST_OBSOLETE_NONE	0
+#define DST_OBSOLETE_DEAD	2
+#define DST_OBSOLETE_FORCE_CHK	-1
 	unsigned short		header_len;	/* more space at head required */
 	unsigned short		trailer_len;	/* space to reserve at tail */
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -359,7 +371,7 @@ extern struct dst_entry *dst_destroy(struct dst_entry *dst);
 
 static inline void dst_free(struct dst_entry *dst)
 {
-	if (dst->obsolete > 1)
+	if (dst->obsolete > 0)
 		return;
 	if (!atomic_read(&dst->__refcnt)) {
 		dst = dst_destroy(dst);
diff --git a/net/core/dst.c b/net/core/dst.c
index 07bacff84aa..069d51d2941 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -94,7 +94,7 @@ loop:
 			 * But we do not have state "obsoleted, but
 			 * referenced by parent", so it is right.
 			 */
-			if (dst->obsolete > 1)
+			if (dst->obsolete > 0)
 				continue;
 
 			___dst_free(dst);
@@ -202,7 +202,7 @@ static void ___dst_free(struct dst_entry *dst)
 	 */
 	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
 		dst->input = dst->output = dst_discard;
-	dst->obsolete = 2;
+	dst->obsolete = DST_OBSOLETE_DEAD;
 }
 
 void __dst_free(struct dst_entry *dst)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 47de90d8fe9..23cc11dd4e4 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1176,7 +1176,7 @@ make_route:
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
-	rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST);
+	rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
 	if (rt == NULL)
 		goto e_nobufs;
 
@@ -1444,7 +1444,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 	}
 
 make_route:
-	rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST);
+	rt = dst_alloc(&dn_dst_ops, out_dev, 0, DST_OBSOLETE_NONE, DST_HOST);
 	if (rt == NULL)
 		goto e_nobufs;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d1d57963809..50d2498c928 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1221,7 +1221,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
 static struct rtable *rt_dst_alloc(struct net_device *dev,
 				   bool nopolicy, bool noxfrm)
 {
-	return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
+	return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
 			 DST_HOST | DST_NOCACHE |
 			 (nopolicy ? DST_NOPOLICY : 0) |
 			 (noxfrm ? DST_NOXFRM : 0));
@@ -1969,9 +1969,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 
 struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
-	struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
 	struct rtable *ort = (struct rtable *) dst_orig;
+	struct rtable *rt;
 
+	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
 	if (rt) {
 		struct dst_entry *new = &rt->dst;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 84f6564dd37..cf02cb97bbd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -281,7 +281,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 					     struct fib6_table *table)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
-					0, 0, flags);
+					0, DST_OBSOLETE_NONE, flags);
 
 	if (rt) {
 		struct dst_entry *dst = &rt->dst;
@@ -985,7 +985,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 	struct dst_entry *new = NULL;
 
-	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
+	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 	if (rt) {
 		new = &rt->dst;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a6b7ee9ce28..ec3a12b9b80 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -216,7 +216,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
 void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 {
 	/* If we don't have a fresh route, look one up */
-	if (!transport->dst || transport->dst->obsolete > 1) {
+	if (!transport->dst || transport->dst->obsolete) {
 		dst_release(transport->dst);
 		transport->af_specific->get_dst(transport, &transport->saddr,
 						&transport->fl, sk);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 65bd1ca5151..c5a5165a592 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1350,7 +1350,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 	default:
 		BUG();
 	}
-	xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
+	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
 
 	if (likely(xdst)) {
 		struct dst_entry *dst = &xdst->u.dst;
@@ -1477,7 +1477,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		dst1->xfrm = xfrm[i];
 		xdst->xfrm_genid = xfrm[i]->genid;
 
-		dst1->obsolete = -1;
+		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
 		dst1->flags |= DST_HOST;
 		dst1->lastuse = now;
 
@@ -2219,12 +2219,13 @@ EXPORT_SYMBOL(__xfrm_route_forward);
 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 {
 	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
-	 * to "-1" to force all XFRM destinations to get validated by
-	 * dst_ops->check on every use.  We do this because when a
-	 * normal route referenced by an XFRM dst is obsoleted we do
-	 * not go looking around for all parent referencing XFRM dsts
-	 * so that we can invalidate them.  It is just too much work.
-	 * Instead we make the checks here on every use.  For example:
+	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
+	 * get validated by dst_ops->check on every use.  We do this
+	 * because when a normal route referenced by an XFRM dst is
+	 * obsoleted we do not go looking around for all parent
+	 * referencing XFRM dsts so that we can invalidate them.  It
+	 * is just too much work.  Instead we make the checks here on
+	 * every use.  For example:
 	 *
 	 *	XFRM dst A --> IPv4 dst X
 	 *
@@ -2234,9 +2235,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 	 * stale_bundle() check.
 	 *
 	 * When a policy's bundle is pruned, we dst_free() the XFRM
-	 * dst which causes it's ->obsolete field to be set to a
-	 * positive non-zero integer.  If an XFRM dst has been pruned
-	 * like this, we want to force a new route lookup.
+	 * dst which causes it's ->obsolete field to be set to
+	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
+	 * this, we want to force a new route lookup.
 	 */
 	if (dst->obsolete < 0 && !stale_bundle(dst))
 		return dst;
-- 
cgit v1.2.3-70-g09d2