From fbfe95a42e90b3dd079cc9019ba7d7700feee0f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 8 Jun 2012 23:24:18 -0700
Subject: inet: Create and use rt{,6}_get_peer_create().

There's a lot of places that open-code rt{,6}_get_peer() only because
they want to set 'create' to one.  So add an rt{,6}_get_peer_create()
for their sake.

There were also a few spots open-coding plain rt{,6}_get_peer() and
those are transformed here as well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv6/icmp.c')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 091a2971c7b..ed89bba745a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -188,14 +188,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 	} else {
 		struct rt6_info *rt = (struct rt6_info *)dst;
 		int tmo = net->ipv6.sysctl.icmpv6_time;
+		struct inet_peer *peer;
 
 		/* Give more bandwidth to wider prefixes. */
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		if (!rt->rt6i_peer)
-			rt6_bind_peer(rt, 1);
-		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
+		peer = rt6_get_peer_create(rt);
+		res = inet_peer_xrlim_allow(peer, tmo);
 	}
 	dst_release(dst);
 	return res;
-- 
cgit v1.2.3-70-g09d2


From 81aded24675ebda5de8a68843250ad15584ac38a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 15 Jun 2012 14:54:11 -0700
Subject: ipv6: Handle PMTU in ICMP error handlers.

One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts
to handle the error pass the 32-bit info cookie in network byte order
whereas ipv4 passes it around in host byte order.

Like the ipv4 side, we have two helper functions.  One for when we
have a socket context and one for when we do not.

ip6ip6 tunnels are not handled here, because they handle PMTU events
by essentially relaying another ICMP packet-too-big message back to
the original sender.

This patch allows us to get rid of rt6_do_pmtu_disc().  It handles all
kinds of situations that simply cannot happen when we do the PMTU
update directly using a fully resolved route.

In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very
likely be removed or changed into a BUG_ON() check.  We should never
have a prefixed ipv6 route when we get there.

Another piece of strange history here is that TCP and DCCP, unlike in
ipv4, never invoke the update_pmtu() method from their ICMP error
handlers.  This is incredibly astonishing since this is the context
where we have the most accurate context in which to make a PMTU
update, namely we have a fully connected socket and associated cached
socket route.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |   8 +--
 net/dccp/ipv6.c         |   2 +
 net/ipv6/ah6.c          |   3 +-
 net/ipv6/esp6.c         |   2 +
 net/ipv6/icmp.c         |   6 +-
 net/ipv6/ipcomp6.c      |   2 +
 net/ipv6/raw.c          |   5 +-
 net/ipv6/route.c        | 143 +++++++++++-------------------------------------
 net/ipv6/tcp_ipv6.c     |   2 +
 net/ipv6/udp.c          |   3 +
 10 files changed, 54 insertions(+), 122 deletions(-)

(limited to 'net/ipv6/icmp.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a2cda240ca9..58cb3fc3487 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -140,10 +140,10 @@ extern void			rt6_redirect(const struct in6_addr *dest,
 					     u8 *lladdr,
 					     int on_link);
 
-extern void			rt6_pmtu_discovery(const struct in6_addr *daddr,
-						   const struct in6_addr *saddr,
-						   struct net_device *dev,
-						   u32 pmtu);
+extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+			    int oif, u32 mark);
+extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
+			       __be32 mtu);
 
 struct netlink_callback;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fa9512d86f3..9991be083ad 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
+		dst->ops->update_pmtu(dst, ntohl(info));
+
 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			dccp_sync_mss(sk, dst_mtu(dst));
 		} /* else let the usual retransmit timer handle it */
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f1a4a2c28ed..49d4d26bda8 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -35,6 +35,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/string.h>
 #include <linux/scatterlist.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
 		 ntohl(ah->spi), &iph->daddr);
-
+	ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index db1521fcda5..89a615ba84f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return;
 	pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n",
 		 ntohl(esph->spi), &iph->daddr);
+	ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ed89bba745a..5247d5c211f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	const struct in6_addr *saddr, *daddr;
-	const struct ipv6hdr *orig_hdr;
 	struct icmp6hdr *hdr;
 	u8 type;
 
@@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 				 XFRM_STATE_ICMP))
 			goto drop_no_count;
 
-		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 			goto drop_no_count;
 
 		nh = skb_network_offset(skb);
@@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 			goto discard_it;
 		hdr = icmp6_hdr(skb);
-		orig_hdr = (struct ipv6hdr *) (hdr + 1);
-		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
-				   ntohl(hdr->icmp6_mtu));
 
 		/*
 		 *	Drop through to notify
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5cb75bfe45b..92832385a8e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -46,6 +46,7 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n",
 		 spi, &iph->daddr);
+	ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 93d69836fde..43b0042f15f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 		return;
 
 	harderr = icmpv6_err_convert(type, code, &err);
-	if (type == ICMPV6_PKT_TOOBIG)
+	if (type == ICMPV6_PKT_TOOBIG) {
+		ip6_sk_update_pmtu(skb, sk, info);
 		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
-
+	}
 	if (np->recverr) {
 		u8 *payload = skb->data;
 		if (!inet->hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 58a3ec23da2..0d41f68daff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
+	dst_confirm(dst);
 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+		struct net *net = dev_net(dst->dev);
+
 		rt6->rt6i_flags |= RTF_MODIFIED;
 		if (mtu < IPV6_MIN_MTU) {
 			u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 			dst_metric_set(dst, RTAX_FEATURES, features);
 		}
 		dst_metric_set(dst, RTAX_MTU, mtu);
+		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
 	}
 }
 
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
+		     int oif, __be32 mark)
+{
+	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark;
+	fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (!dst->error)
+		ip6_rt_update_pmtu(dst, ntohl(mtu));
+	dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+	ip6_update_pmtu(skb, sock_net(sk), mtu,
+			sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
 	struct net_device *dev = dst->dev;
@@ -1703,116 +1736,6 @@ out:
 	dst_release(&rt->dst);
 }
 
-/*
- *	Handle ICMP "packet too big" messages
- *	i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
-			     struct net *net, u32 pmtu, int ifindex)
-{
-	struct rt6_info *rt, *nrt;
-	int allfrag = 0;
-again:
-	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
-	if (!rt)
-		return;
-
-	if (rt6_check_expired(rt)) {
-		ip6_del_rt(rt);
-		goto again;
-	}
-
-	if (pmtu >= dst_mtu(&rt->dst))
-		goto out;
-
-	if (pmtu < IPV6_MIN_MTU) {
-		/*
-		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
-		 * MTU (1280) and a fragment header should always be included
-		 * after a node receiving Too Big message reporting PMTU is
-		 * less than the IPv6 Minimum Link MTU.
-		 */
-		pmtu = IPV6_MIN_MTU;
-		allfrag = 1;
-	}
-
-	/* New mtu received -> path was valid.
-	   They are sent only in response to data packets,
-	   so that this nexthop apparently is reachable. --ANK
-	 */
-	dst_confirm(&rt->dst);
-
-	/* Host route. If it is static, it would be better
-	   not to override it, but add new one, so that
-	   when cache entry will expire old pmtu
-	   would return automatically.
-	 */
-	if (rt->rt6i_flags & RTF_CACHE) {
-		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
-		if (allfrag) {
-			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
-			features |= RTAX_FEATURE_ALLFRAG;
-			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
-		}
-		rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-		rt->rt6i_flags |= RTF_MODIFIED;
-		goto out;
-	}
-
-	/* Network route.
-	   Two cases are possible:
-	   1. It is connected route. Action: COW
-	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
-	 */
-	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, daddr, saddr);
-	else
-		nrt = rt6_alloc_clone(rt, daddr);
-
-	if (nrt) {
-		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
-		if (allfrag) {
-			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
-			features |= RTAX_FEATURE_ALLFRAG;
-			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
-		}
-
-		/* According to RFC 1981, detecting PMTU increase shouldn't be
-		 * happened within 5 mins, the recommended timer is 10 mins.
-		 * Here this route expiration time is set to ip6_rt_mtu_expires
-		 * which is 10 mins. After 10 mins the decreased pmtu is expired
-		 * and detecting PMTU increase will be automatically happened.
-		 */
-		rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-		nrt->rt6i_flags |= RTF_DYNAMIC;
-		ip6_ins_rt(nrt);
-	}
-out:
-	dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
-			struct net_device *dev, u32 pmtu)
-{
-	struct net *net = dev_net(dev);
-
-	/*
-	 * RFC 1981 states that a node "MUST reduce the size of the packets it
-	 * is sending along the path" that caused the Packet Too Big message.
-	 * Since it's not possible in the general case to determine which
-	 * interface was used to send the original packet, we update the MTU
-	 * on the interface that will be used to send future packets. We also
-	 * update the MTU on the interface that received the Packet Too Big in
-	 * case the original packet was forced out that interface with
-	 * SO_BINDTODEVICE or similar. This is the next best thing to the
-	 * correct behaviour, which would be to update the MTU on all
-	 * interfaces.
-	 */
-	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
-	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
-}
-
 /*
  *	Misc support functions
  */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f91b0bfd12d..26a88623940 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
+		dst->ops->update_pmtu(dst, ntohl(info));
+
 		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			tcp_sync_mss(sk, dst_mtu(dst));
 			tcp_simple_retransmit(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f05099fc590..051ad481973 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk == NULL)
 		return;
 
+	if (type == ICMPV6_PKT_TOOBIG)
+		ip6_sk_update_pmtu(skb, sk, info);
+
 	np = inet6_sk(sk);
 
 	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
-- 
cgit v1.2.3-70-g09d2


From f9242b6b28d61295f2bf7e8adfb1060b382e5381 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Jun 2012 18:56:21 -0700
Subject: inet: Sanitize inet{,6} protocol demux.

Don't pretend that inet_protos[] and inet6_protos[] are hashes, thay
are just a straight arrays.  Remove all unnecessary hash masking.

Document MAX_INET_PROTOS.

Use RAW_HTABLE_SIZE when appropriate.

Reported-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  7 +++++--
 net/ipv4/af_inet.c     | 26 ++++++++++++--------------
 net/ipv4/icmp.c        |  9 ++++-----
 net/ipv4/ip_input.c    |  5 ++---
 net/ipv4/protocol.c    |  8 +++-----
 net/ipv6/icmp.c        |  7 ++-----
 net/ipv6/ip6_input.c   |  9 +++------
 net/ipv6/protocol.c    |  8 +++-----
 net/ipv6/raw.c         |  4 ++--
 9 files changed, 36 insertions(+), 47 deletions(-)

(limited to 'net/ipv6/icmp.c')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 875f4895b03..a1b1b530c33 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -29,8 +29,11 @@
 #include <linux/ipv6.h>
 #endif
 
-#define MAX_INET_PROTOS	256		/* Must be a power of 2		*/
-
+/* This is one larger than the largest protocol value that can be
+ * found in an ipv4 or ipv6 header.  Since in both cases the protocol
+ * value is presented in a __u8, this is defined to be 256.
+ */
+#define MAX_INET_PROTOS		256
 
 /* This is used to register protocols. */
 struct net_protocol {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4e8e00a2c9..85a3b176313 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -242,20 +242,18 @@ void build_ehash_secret(void)
 }
 EXPORT_SYMBOL(build_ehash_secret);
 
-static inline int inet_netns_ok(struct net *net, int protocol)
+static inline int inet_netns_ok(struct net *net, __u8 protocol)
 {
-	int hash;
 	const struct net_protocol *ipprot;
 
 	if (net_eq(net, &init_net))
 		return 1;
 
-	hash = protocol & (MAX_INET_PROTOS - 1);
-	ipprot = rcu_dereference(inet_protos[hash]);
-
-	if (ipprot == NULL)
+	ipprot = rcu_dereference(inet_protos[protocol]);
+	if (ipprot == NULL) {
 		/* raw IP is OK */
 		return 1;
+	}
 	return ipprot->netns_ok;
 }
 
@@ -1216,8 +1214,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 
 static int inet_gso_send_check(struct sk_buff *skb)
 {
-	const struct iphdr *iph;
 	const struct net_protocol *ops;
+	const struct iphdr *iph;
 	int proto;
 	int ihl;
 	int err = -EINVAL;
@@ -1236,7 +1234,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
 	__skb_pull(skb, ihl);
 	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	proto = iph->protocol;
 	err = -EPROTONOSUPPORT;
 
 	rcu_read_lock();
@@ -1253,8 +1251,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	struct iphdr *iph;
 	const struct net_protocol *ops;
+	struct iphdr *iph;
 	int proto;
 	int ihl;
 	int id;
@@ -1286,7 +1284,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
 	id = ntohs(iph->id);
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	proto = iph->protocol;
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
 	rcu_read_lock();
@@ -1340,7 +1338,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 			goto out;
 	}
 
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	proto = iph->protocol;
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_protos[proto]);
@@ -1398,11 +1396,11 @@ out:
 
 static int inet_gro_complete(struct sk_buff *skb)
 {
-	const struct net_protocol *ops;
+	__be16 newlen = htons(skb->len - skb_network_offset(skb));
 	struct iphdr *iph = ip_hdr(skb);
-	int proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	const struct net_protocol *ops;
+	int proto = iph->protocol;
 	int err = -ENOSYS;
-	__be16 newlen = htons(skb->len - skb_network_offset(skb));
 
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e1caa1abe5d..49a74cc79dc 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -637,12 +637,12 @@ EXPORT_SYMBOL(icmp_send);
 
 static void icmp_unreach(struct sk_buff *skb)
 {
+	const struct net_protocol *ipprot;
 	const struct iphdr *iph;
 	struct icmphdr *icmph;
-	int hash, protocol;
-	const struct net_protocol *ipprot;
-	u32 info = 0;
 	struct net *net;
+	u32 info = 0;
+	int protocol;
 
 	net = dev_net(skb_dst(skb)->dev);
 
@@ -731,9 +731,8 @@ static void icmp_unreach(struct sk_buff *skb)
 	 */
 	raw_icmp_error(skb, protocol, info);
 
-	hash = protocol & (MAX_INET_PROTOS - 1);
 	rcu_read_lock();
-	ipprot = rcu_dereference(inet_protos[hash]);
+	ipprot = rcu_dereference(inet_protos[protocol]);
 	if (ipprot && ipprot->err_handler)
 		ipprot->err_handler(skb, info);
 	rcu_read_unlock();
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 8590144ca33..c4fe1d27113 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 	rcu_read_lock();
 	{
 		int protocol = ip_hdr(skb)->protocol;
-		int hash, raw;
 		const struct net_protocol *ipprot;
+		int raw;
 
 	resubmit:
 		raw = raw_local_deliver(skb, protocol);
 
-		hash = protocol & (MAX_INET_PROTOS - 1);
-		ipprot = rcu_dereference(inet_protos[hash]);
+		ipprot = rcu_dereference(inet_protos[protocol]);
 		if (ipprot != NULL) {
 			int ret;
 
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 9ae5c01cd0b..8918eff1426 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
 
 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
-	int hash = protocol & (MAX_INET_PROTOS - 1);
-
-	return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+	return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet_add_protocol);
@@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol);
 
 int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
-	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+	int ret;
 
-	ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+	ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5247d5c211f..c7da1422cbd 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -600,9 +600,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
 	const struct inet6_protocol *ipprot;
 	int inner_offset;
-	int hash;
-	u8 nexthdr;
 	__be16 frag_off;
+	u8 nexthdr;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		return;
@@ -629,10 +628,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 	   --ANK (980726)
 	 */
 
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
-
 	rcu_read_lock();
-	ipprot = rcu_dereference(inet6_protos[hash]);
+	ipprot = rcu_dereference(inet6_protos[nexthdr]);
 	if (ipprot && ipprot->err_handler)
 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 	rcu_read_unlock();
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 21a15dfe4a9..5ab923e51af 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,13 +168,12 @@ drop:
 
 static int ip6_input_finish(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct inet6_protocol *ipprot;
+	struct inet6_dev *idev;
 	unsigned int nhoff;
 	int nexthdr;
 	bool raw;
-	u8 hash;
-	struct inet6_dev *idev;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	/*
 	 *	Parse extension headers
@@ -189,9 +188,7 @@ resubmit:
 	nexthdr = skb_network_header(skb)[nhoff];
 
 	raw = raw6_local_deliver(skb, nexthdr);
-
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
-	if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+	if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) {
 		int ret;
 
 		if (ipprot->flags & INET6_PROTO_FINAL) {
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9a7978fdc02..053082dfc93 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
 
 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
-	int hash = protocol & (MAX_INET_PROTOS - 1);
-
-	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet6_add_protocol);
@@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol);
 
 int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
-	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+	int ret;
 
-	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
 		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 43b0042f15f..b5c1dcb2773 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
 
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
+	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
 
 	read_lock(&raw_v6_hashinfo.lock);
 	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
@@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
 {
 	struct sock *raw_sk;
 
-	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
+	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
 	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
 		raw_sk = NULL;
 
-- 
cgit v1.2.3-70-g09d2


From 1d861aa4b3fb08822055345f480850205ffe6170 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2012 03:58:16 -0700
Subject: inet: Minimize use of cached route inetpeer.

Only use it in the absolutely required cases:

1) COW'ing metrics

2) ipv4 PMTU

3) ipv4 redirects

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c       |  3 ++-
 net/ipv4/route.c      | 32 ++++++++++++++++----------------
 net/ipv6/icmp.c       |  4 +++-
 net/ipv6/ip6_output.c | 10 ++++++++--
 net/ipv6/ndisc.c      |  8 ++++++--
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'net/ipv6/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4bce5a2830a..4a049449305 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -254,9 +254,10 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 
 	/* Limit if icmp type is enabled in ratemask. */
 	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
-		struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
+		struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
 		rc = inet_peer_xrlim_allow(peer,
 					   net->ipv4.sysctl_icmp_ratelimit);
+		inet_putpeer(peer);
 	}
 out:
 	return rc;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 78d81543766..e376354dcb6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1289,20 +1289,15 @@ static void ip_select_fb_ident(struct iphdr *iph)
 
 void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 {
-	struct rtable *rt = (struct rtable *) dst;
-
-	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst);
+	struct net *net = dev_net(dst->dev);
+	struct inet_peer *peer;
 
-		/* If peer is attached to destination, it is never detached,
-		   so that we need not to grab a lock to dereference it.
-		 */
-		if (peer) {
-			iph->id = htons(inet_getid(peer, more));
-			return;
-		}
-	} else if (!rt)
-		pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0));
+	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
+	if (peer) {
+		iph->id = htons(inet_getid(peer, more));
+		inet_putpeer(peer);
+		return;
+	}
 
 	ip_select_fb_ident(iph);
 }
@@ -1492,6 +1487,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	struct rtable *rt = skb_rtable(skb);
 	struct in_device *in_dev;
 	struct inet_peer *peer;
+	struct net *net;
 	int log_martians;
 
 	rcu_read_lock();
@@ -1503,7 +1499,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
 	rcu_read_unlock();
 
-	peer = rt_get_peer_create(rt, rt->rt_dst);
+	net = dev_net(rt->dst.dev);
+	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
 	if (!peer) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
 		return;
@@ -1520,7 +1517,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	 */
 	if (peer->rate_tokens >= ip_rt_redirect_number) {
 		peer->rate_last = jiffies;
-		return;
+		goto out_put_peer;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
@@ -1541,6 +1538,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 					     &rt->rt_dst, &rt->rt_gateway);
 #endif
 	}
+out_put_peer:
+	inet_putpeer(peer);
 }
 
 static int ip_error(struct sk_buff *skb)
@@ -1583,7 +1582,7 @@ static int ip_error(struct sk_buff *skb)
 		break;
 	}
 
-	peer = rt_get_peer_create(rt, rt->rt_dst);
+	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
 
 	send = true;
 	if (peer) {
@@ -1596,6 +1595,7 @@ static int ip_error(struct sk_buff *skb)
 			peer->rate_tokens -= ip_rt_error_cost;
 		else
 			send = false;
+		inet_putpeer(peer);
 	}
 	if (send)
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c7da1422cbd..a113f7d7e93 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -194,8 +194,10 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		peer = rt6_get_peer_create(rt);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 		res = inet_peer_xrlim_allow(peer, tmo);
+		if (peer)
+			inet_putpeer(peer);
 	}
 	dst_release(dst);
 	return res;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c6af5963a20..5b2d63ed793 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -466,13 +466,15 @@ int ip6_forward(struct sk_buff *skb)
 		else
 			target = &hdr->daddr;
 
-		peer = rt6_get_peer_create(rt);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 
 		/* Limit redirects both by destination (here)
 		   and by source (inside ndisc_send_redirect)
 		 */
 		if (inet_peer_xrlim_allow(peer, 1*HZ))
 			ndisc_send_redirect(skb, target);
+		if (peer)
+			inet_putpeer(peer);
 	} else {
 		int addrtype = ipv6_addr_type(&hdr->saddr);
 
@@ -592,10 +594,14 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 	int old, new;
 
 	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		struct inet_peer *peer = rt6_get_peer_create(rt);
+		struct inet_peer *peer;
+		struct net *net;
 
+		net = dev_net(rt->dst.dev);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 		if (peer) {
 			fhdr->identification = htonl(inet_getid(peer, 0));
+			inet_putpeer(peer);
 			return;
 		}
 	}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 69a6330dea9..0fddd571400 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1486,6 +1486,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	int rd_len;
 	int err;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+	bool ret;
 
 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
 		ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
@@ -1519,8 +1520,11 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			  "Redirect: destination is not a neighbour\n");
 		goto release;
 	}
-	peer = rt6_get_peer_create(rt);
-	if (!inet_peer_xrlim_allow(peer, 1*HZ))
+	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+	ret = inet_peer_xrlim_allow(peer, 1*HZ);
+	if (peer)
+		inet_putpeer(peer);
+	if (!ret)
 		goto release;
 
 	if (dev->addr_len) {
-- 
cgit v1.2.3-70-g09d2


From b94f1c0904da9b8bf031667afc48080ba7c3e8c9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jul 2012 00:33:37 -0700
Subject: ipv6: Use icmpv6_notify() to propagate redirect, instead of
 rt6_redirect().

And delete rt6_redirect(), since it is no longer used.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |   2 -
 include/net/ipv6.h      |   2 +
 net/ipv6/icmp.c         |   2 +-
 net/ipv6/ndisc.c        |   2 +-
 net/ipv6/route.c        | 107 ------------------------------------------------
 5 files changed, 4 insertions(+), 111 deletions(-)

(limited to 'net/ipv6/icmp.c')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 693940565d8..b6b6f7d6f3c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,8 +133,6 @@ extern int			rt6_route_rcv(struct net_device *dev,
 					      u8 *opt, int len,
 					      const struct in6_addr *gwaddr);
 
-extern void			rt6_redirect(struct sk_buff *skb);
-
 extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 			    int oif, u32 mark);
 extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d4261d4d6c4..f695f39e892 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -251,6 +251,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
 		atomic_dec(&fl->users);
 }
 
+extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);
+
 extern int 			ip6_ra_control(struct sock *sk, int sel);
 
 extern int			ipv6_parse_hopopts(struct sk_buff *skb);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a113f7d7e93..24d69dbca4d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -598,7 +598,7 @@ out:
 	icmpv6_xmit_unlock(sk);
 }
 
-static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
 	const struct inet6_protocol *ipprot;
 	int inner_offset;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b8d53e186a7..ff36194a71a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1350,7 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	rt6_redirect(skb);
+	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
 }
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f52cf83bb1e..7296af144d6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1633,92 +1633,6 @@ static int ip6_route_del(struct fib6_config *cfg)
 	return err;
 }
 
-/*
- *	Handle redirects
- */
-struct ip6rd_flowi {
-	struct flowi6 fl6;
-	struct in6_addr gateway;
-};
-
-static struct rt6_info *__ip6_route_redirect(struct net *net,
-					     struct fib6_table *table,
-					     struct flowi6 *fl6,
-					     int flags)
-{
-	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt;
-	struct fib6_node *fn;
-
-	/*
-	 * Get the "current" route for this destination and
-	 * check if the redirect has come from approriate router.
-	 *
-	 * RFC 2461 specifies that redirects should only be
-	 * accepted if they come from the nexthop to the target.
-	 * Due to the way the routes are chosen, this notion
-	 * is a bit fuzzy and one might need to check all possible
-	 * routes.
-	 */
-
-	read_lock_bh(&table->tb6_lock);
-	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
-restart:
-	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-		/*
-		 * Current route is on-link; redirect is always invalid.
-		 *
-		 * Seems, previous statement is not true. It could
-		 * be node, which looks for us as on-link (f.e. proxy ndisc)
-		 * But then router serving it might decide, that we should
-		 * know truth 8)8) --ANK (980726).
-		 */
-		if (rt6_check_expired(rt))
-			continue;
-		if (!(rt->rt6i_flags & RTF_GATEWAY))
-			continue;
-		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
-			continue;
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
-			continue;
-		break;
-	}
-
-	if (!rt)
-		rt = net->ipv6.ip6_null_entry;
-	BACKTRACK(net, &fl6->saddr);
-out:
-	dst_hold(&rt->dst);
-
-	read_unlock_bh(&table->tb6_lock);
-
-	return rt;
-};
-
-static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
-					   const struct in6_addr *src,
-					   const struct in6_addr *gateway,
-					   struct net_device *dev)
-{
-	int flags = RT6_LOOKUP_F_HAS_SADDR;
-	struct net *net = dev_net(dev);
-	struct ip6rd_flowi rdfl = {
-		.fl6 = {
-			.flowi6_oif = dev->ifindex,
-			.daddr = *dest,
-			.saddr = *src,
-		},
-	};
-
-	rdfl.gateway = *gateway;
-
-	if (rt6_need_strict(dest))
-		flags |= RT6_LOOKUP_F_IFACE;
-
-	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
-						   flags, __ip6_route_redirect);
-}
-
 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
@@ -1848,27 +1762,6 @@ out:
 	neigh_release(neigh);
 }
 
-void rt6_redirect(struct sk_buff *skb)
-{
-	const struct in6_addr *target;
-	const struct in6_addr *dest;
-	const struct in6_addr *src;
-	const struct in6_addr *saddr;
-	struct icmp6hdr *icmph;
-	struct rt6_info *rt;
-
-	icmph = icmp6_hdr(skb);
-	target = (const struct in6_addr *) (icmph + 1);
-	dest = target + 1;
-
-	src = &ipv6_hdr(skb)->daddr;
-	saddr = &ipv6_hdr(skb)->saddr;
-
-	rt = ip6_route_redirect(dest, src, saddr, skb->dev);
-	rt6_do_redirect(&rt->dst, skb);
-	dst_release(&rt->dst);
-}
-
 /*
  *	Misc support functions
  */
-- 
cgit v1.2.3-70-g09d2