From fbfe95a42e90b3dd079cc9019ba7d7700feee0f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 8 Jun 2012 23:24:18 -0700
Subject: inet: Create and use rt{,6}_get_peer_create().

There's a lot of places that open-code rt{,6}_get_peer() only because
they want to set 'create' to one.  So add an rt{,6}_get_peer_create()
for their sake.

There were also a few spots open-coding plain rt{,6}_get_peer() and
those are transformed here as well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c75efbdc71c..0c78ef1e5dd 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 
 	/* Limit if icmp type is enabled in ratemask. */
 	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
-		if (!rt->peer)
-			rt_bind_peer(rt, fl4->daddr, 1);
-		rc = inet_peer_xrlim_allow(rt->peer,
+		struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
+		rc = inet_peer_xrlim_allow(peer,
 					   net->ipv4.sysctl_icmp_ratelimit);
 	}
 out:
-- 
cgit v1.2.3-70-g09d2


From 46517008e1168dc926cf2c47d529efc07eca85c0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 10 Jun 2012 00:04:12 -0700
Subject: ipv4: Kill ip_rt_frag_needed().

There is zero point to this function.

It's only real substance is to perform an extremely outdated BSD4.2
ICMP check, which we can safely remove.  If you really have a MTU
limited link being routed by a BSD4.2 derived system, here's a nickel
go buy yourself a real router.

The other actions of ip_rt_frag_needed(), checking and conditionally
updating the peer, are done by the per-protocol handlers of the ICMP
event.

TCP, UDP, et al. have a handler which will receive this event and
transmit it back into the associated route via dst_ops->update_pmtu().

This simplification is important, because it eliminates the one place
where we do not have a proper route context in which to make an
inetpeer lookup.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h  |  2 --
 net/ipv4/icmp.c      |  4 +---
 net/ipv4/route.c     | 61 ----------------------------------------------------
 net/rxrpc/ar-error.c |  4 ----
 4 files changed, 1 insertion(+), 70 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/include/net/route.h b/include/net/route.h
index 6340c37677f..cc693a5bb20 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -215,8 +215,6 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s
 	return ip_route_input_common(skb, dst, src, tos, devin, true);
 }
 
-extern unsigned short	ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
-					  unsigned short new_mtu, struct net_device *dev);
 extern void		ip_rt_send_redirect(struct sk_buff *skb);
 
 extern unsigned int		inet_addr_type(struct net *net, __be32 addr);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0c78ef1e5dd..e1caa1abe5d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -673,9 +673,7 @@ static void icmp_unreach(struct sk_buff *skb)
 				LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
 					       &iph->daddr);
 			} else {
-				info = ip_rt_frag_needed(net, iph,
-							 ntohs(icmph->un.frag.mtu),
-							 skb->dev);
+				info = ntohs(icmph->un.frag.mtu);
 				if (!info)
 					goto out;
 			}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 03e5b614370..4f5834c4a66 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1664,67 +1664,6 @@ out:	kfree_skb(skb);
 	return 0;
 }
 
-/*
- *	The last two values are not from the RFC but
- *	are needed for AMPRnet AX.25 paths.
- */
-
-static const unsigned short mtu_plateau[] =
-{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
-
-static inline unsigned short guess_mtu(unsigned short old_mtu)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
-		if (old_mtu > mtu_plateau[i])
-			return mtu_plateau[i];
-	return 68;
-}
-
-unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
-				 unsigned short new_mtu,
-				 struct net_device *dev)
-{
-	unsigned short old_mtu = ntohs(iph->tot_len);
-	unsigned short est_mtu = 0;
-	struct inet_peer *peer;
-
-	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
-	if (peer) {
-		unsigned short mtu = new_mtu;
-
-		if (new_mtu < 68 || new_mtu >= old_mtu) {
-			/* BSD 4.2 derived systems incorrectly adjust
-			 * tot_len by the IP header length, and report
-			 * a zero MTU in the ICMP message.
-			 */
-			if (mtu == 0 &&
-			    old_mtu >= 68 + (iph->ihl << 2))
-				old_mtu -= iph->ihl << 2;
-			mtu = guess_mtu(old_mtu);
-		}
-
-		if (mtu < ip_rt_min_pmtu)
-			mtu = ip_rt_min_pmtu;
-		if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
-			unsigned long pmtu_expires;
-
-			pmtu_expires = jiffies + ip_rt_mtu_expires;
-			if (!pmtu_expires)
-				pmtu_expires = 1UL;
-
-			est_mtu = mtu;
-			peer->pmtu_learned = mtu;
-			peer->pmtu_expires = pmtu_expires;
-			atomic_inc(&__rt_peer_genid);
-		}
-
-		inet_putpeer(peer);
-	}
-	return est_mtu ? : new_mtu;
-}
-
 static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
 {
 	unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 5d6b572a670..a9206087b4d 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -81,10 +81,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
 			_net("I/F MTU %u", mtu);
 		}
 
-		/* ip_rt_frag_needed() may have eaten the info */
-		if (mtu == 0)
-			mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
-
 		if (mtu == 0) {
 			/* they didn't give us a size, estimate one */
 			if (mtu > 1500) {
-- 
cgit v1.2.3-70-g09d2


From f9242b6b28d61295f2bf7e8adfb1060b382e5381 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Jun 2012 18:56:21 -0700
Subject: inet: Sanitize inet{,6} protocol demux.

Don't pretend that inet_protos[] and inet6_protos[] are hashes, thay
are just a straight arrays.  Remove all unnecessary hash masking.

Document MAX_INET_PROTOS.

Use RAW_HTABLE_SIZE when appropriate.

Reported-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  7 +++++--
 net/ipv4/af_inet.c     | 26 ++++++++++++--------------
 net/ipv4/icmp.c        |  9 ++++-----
 net/ipv4/ip_input.c    |  5 ++---
 net/ipv4/protocol.c    |  8 +++-----
 net/ipv6/icmp.c        |  7 ++-----
 net/ipv6/ip6_input.c   |  9 +++------
 net/ipv6/protocol.c    |  8 +++-----
 net/ipv6/raw.c         |  4 ++--
 9 files changed, 36 insertions(+), 47 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 875f4895b03..a1b1b530c33 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -29,8 +29,11 @@
 #include <linux/ipv6.h>
 #endif
 
-#define MAX_INET_PROTOS	256		/* Must be a power of 2		*/
-
+/* This is one larger than the largest protocol value that can be
+ * found in an ipv4 or ipv6 header.  Since in both cases the protocol
+ * value is presented in a __u8, this is defined to be 256.
+ */
+#define MAX_INET_PROTOS		256
 
 /* This is used to register protocols. */
 struct net_protocol {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4e8e00a2c9..85a3b176313 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -242,20 +242,18 @@ void build_ehash_secret(void)
 }
 EXPORT_SYMBOL(build_ehash_secret);
 
-static inline int inet_netns_ok(struct net *net, int protocol)
+static inline int inet_netns_ok(struct net *net, __u8 protocol)
 {
-	int hash;
 	const struct net_protocol *ipprot;
 
 	if (net_eq(net, &init_net))
 		return 1;
 
-	hash = protocol & (MAX_INET_PROTOS - 1);
-	ipprot = rcu_dereference(inet_protos[hash]);
-
-	if (ipprot == NULL)
+	ipprot = rcu_dereference(inet_protos[protocol]);
+	if (ipprot == NULL) {
 		/* raw IP is OK */
 		return 1;
+	}
 	return ipprot->netns_ok;
 }
 
@@ -1216,8 +1214,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 
 static int inet_gso_send_check(struct sk_buff *skb)
 {
-	const struct iphdr *iph;
 	const struct net_protocol *ops;
+	const struct iphdr *iph;
 	int proto;
 	int ihl;
 	int err = -EINVAL;
@@ -1236,7 +1234,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
 	__skb_pull(skb, ihl);
 	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	proto = iph->protocol;
 	err = -EPROTONOSUPPORT;
 
 	rcu_read_lock();
@@ -1253,8 +1251,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	struct iphdr *iph;
 	const struct net_protocol *ops;
+	struct iphdr *iph;
 	int proto;
 	int ihl;
 	int id;
@@ -1286,7 +1284,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
 	id = ntohs(iph->id);
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	proto = iph->protocol;
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
 	rcu_read_lock();
@@ -1340,7 +1338,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 			goto out;
 	}
 
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	proto = iph->protocol;
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_protos[proto]);
@@ -1398,11 +1396,11 @@ out:
 
 static int inet_gro_complete(struct sk_buff *skb)
 {
-	const struct net_protocol *ops;
+	__be16 newlen = htons(skb->len - skb_network_offset(skb));
 	struct iphdr *iph = ip_hdr(skb);
-	int proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	const struct net_protocol *ops;
+	int proto = iph->protocol;
 	int err = -ENOSYS;
-	__be16 newlen = htons(skb->len - skb_network_offset(skb));
 
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e1caa1abe5d..49a74cc79dc 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -637,12 +637,12 @@ EXPORT_SYMBOL(icmp_send);
 
 static void icmp_unreach(struct sk_buff *skb)
 {
+	const struct net_protocol *ipprot;
 	const struct iphdr *iph;
 	struct icmphdr *icmph;
-	int hash, protocol;
-	const struct net_protocol *ipprot;
-	u32 info = 0;
 	struct net *net;
+	u32 info = 0;
+	int protocol;
 
 	net = dev_net(skb_dst(skb)->dev);
 
@@ -731,9 +731,8 @@ static void icmp_unreach(struct sk_buff *skb)
 	 */
 	raw_icmp_error(skb, protocol, info);
 
-	hash = protocol & (MAX_INET_PROTOS - 1);
 	rcu_read_lock();
-	ipprot = rcu_dereference(inet_protos[hash]);
+	ipprot = rcu_dereference(inet_protos[protocol]);
 	if (ipprot && ipprot->err_handler)
 		ipprot->err_handler(skb, info);
 	rcu_read_unlock();
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 8590144ca33..c4fe1d27113 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 	rcu_read_lock();
 	{
 		int protocol = ip_hdr(skb)->protocol;
-		int hash, raw;
 		const struct net_protocol *ipprot;
+		int raw;
 
 	resubmit:
 		raw = raw_local_deliver(skb, protocol);
 
-		hash = protocol & (MAX_INET_PROTOS - 1);
-		ipprot = rcu_dereference(inet_protos[hash]);
+		ipprot = rcu_dereference(inet_protos[protocol]);
 		if (ipprot != NULL) {
 			int ret;
 
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 9ae5c01cd0b..8918eff1426 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
 
 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
-	int hash = protocol & (MAX_INET_PROTOS - 1);
-
-	return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+	return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet_add_protocol);
@@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol);
 
 int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
-	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+	int ret;
 
-	ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+	ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5247d5c211f..c7da1422cbd 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -600,9 +600,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
 	const struct inet6_protocol *ipprot;
 	int inner_offset;
-	int hash;
-	u8 nexthdr;
 	__be16 frag_off;
+	u8 nexthdr;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		return;
@@ -629,10 +628,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 	   --ANK (980726)
 	 */
 
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
-
 	rcu_read_lock();
-	ipprot = rcu_dereference(inet6_protos[hash]);
+	ipprot = rcu_dereference(inet6_protos[nexthdr]);
 	if (ipprot && ipprot->err_handler)
 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 	rcu_read_unlock();
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 21a15dfe4a9..5ab923e51af 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,13 +168,12 @@ drop:
 
 static int ip6_input_finish(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct inet6_protocol *ipprot;
+	struct inet6_dev *idev;
 	unsigned int nhoff;
 	int nexthdr;
 	bool raw;
-	u8 hash;
-	struct inet6_dev *idev;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	/*
 	 *	Parse extension headers
@@ -189,9 +188,7 @@ resubmit:
 	nexthdr = skb_network_header(skb)[nhoff];
 
 	raw = raw6_local_deliver(skb, nexthdr);
-
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
-	if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+	if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) {
 		int ret;
 
 		if (ipprot->flags & INET6_PROTO_FINAL) {
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9a7978fdc02..053082dfc93 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
 
 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
-	int hash = protocol & (MAX_INET_PROTOS - 1);
-
-	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet6_add_protocol);
@@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol);
 
 int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
-	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+	int ret;
 
-	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
 		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 43b0042f15f..b5c1dcb2773 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
 
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
+	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
 
 	read_lock(&raw_v6_hashinfo.lock);
 	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
@@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
 {
 	struct sock *raw_sk;
 
-	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
+	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
 	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
 		raw_sk = NULL;
 
-- 
cgit v1.2.3-70-g09d2


From 35ebf65e851c6d9731abc6362b189858eb59f4d3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 28 Jun 2012 03:59:11 -0700
Subject: ipv4: Create and use fib_compute_spec_dst() helper.

The specific destination is the host we direct unicast replies to.
Usually this is the original packet source address, but if we are
responding to a multicast or broadcast packet we have to use something
different.

Specifically we must use the source address we would use if we were to
send a packet to the unicast source of the original packet.

The routing cache precomputes this value, but we want to remove that
precomputation because it creates a hard dependency on the expensive
rpfilter source address validation which we'd like to make cheaper.

There are only three places where this matters:

1) ICMP replies.

2) pktinfo CMSG

3) IP options

Now there will be no real users of rt->rt_spec_dst and we can simply
remove it altogether.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  1 +
 net/ipv4/fib_frontend.c | 29 +++++++++++++++++++++++++++++
 net/ipv4/icmp.c         |  6 ++++--
 net/ipv4/ip_options.c   | 22 +++++++++++-----------
 net/ipv4/ip_sockglue.c  |  7 ++++---
 5 files changed, 49 insertions(+), 16 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4b347c0ca09..1687b3de54f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -230,6 +230,7 @@ extern struct fib_table *fib_get_table(struct net *net, u32 id);
 /* Exported by fib_frontend.c */
 extern const struct nla_policy rtm_ipv4_policy[];
 extern void		ip_fib_init(void);
+extern __be32 fib_compute_spec_dst(struct sk_buff *skb);
 extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 			       u8 tos, int oif, struct net_device *dev,
 			       __be32 *spec_dst, u32 *itag);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3854411fa37..451939b60c5 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -180,6 +180,35 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 }
 EXPORT_SYMBOL(inet_dev_addr_type);
 
+__be32 fib_compute_spec_dst(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct in_device *in_dev;
+	struct fib_result res;
+	struct flowi4 fl4;
+	struct net *net;
+
+	if (skb->pkt_type != PACKET_BROADCAST &&
+	    skb->pkt_type != PACKET_MULTICAST)
+		return ip_hdr(skb)->daddr;
+
+	in_dev = __in_dev_get_rcu(dev);
+	BUG_ON(!in_dev);
+	fl4.flowi4_oif = 0;
+	fl4.flowi4_iif = 0;
+	fl4.daddr = ip_hdr(skb)->saddr;
+	fl4.saddr = ip_hdr(skb)->daddr;
+	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
+	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+	fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
+
+	net = dev_net(dev);
+	if (!fib_lookup(net, &fl4, &res))
+		return FIB_RES_PREFSRC(net, res);
+	else
+		return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+}
+
 /* Given (packet source, input interface) and optional (dst, oif, tos):
  * - (main) check, that source is valid i.e. not broadcast or our local
  *   address.
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 49a74cc79dc..4bce5a2830a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -95,6 +95,7 @@
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <net/inet_common.h>
+#include <net/ip_fib.h>
 
 /*
  *	Build xmit assembly blocks
@@ -333,7 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	struct flowi4 fl4;
 	struct sock *sk;
 	struct inet_sock *inet;
-	__be32 daddr;
+	__be32 daddr, saddr;
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 		return;
@@ -347,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 
 	inet->tos = ip_hdr(skb)->tos;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
+	saddr = fib_compute_spec_dst(skb);
 	ipc.opt = NULL;
 	ipc.tx_flags = 0;
 	if (icmp_param->replyopts.opt.opt.optlen) {
@@ -356,7 +358,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	}
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
-	fl4.saddr = rt->rt_spec_dst;
+	fl4.saddr = saddr;
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 708b99494e2..766dfe56885 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -27,6 +27,7 @@
 #include <net/icmp.h>
 #include <net/route.h>
 #include <net/cipso_ipv4.h>
+#include <net/ip_fib.h>
 
 /*
  * Write options to IP header, record destination address to
@@ -104,7 +105,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
 	sptr = skb_network_header(skb);
 	dptr = dopt->__data;
 
-	daddr = skb_rtable(skb)->rt_spec_dst;
+	daddr = fib_compute_spec_dst(skb);
 
 	if (sopt->rr) {
 		optlen  = sptr[sopt->rr+1];
@@ -250,15 +251,14 @@ void ip_options_fragment(struct sk_buff *skb)
 int ip_options_compile(struct net *net,
 		       struct ip_options *opt, struct sk_buff *skb)
 {
-	int l;
-	unsigned char *iph;
-	unsigned char *optptr;
-	int optlen;
+	__be32 spec_dst = (__force __be32) 0;
 	unsigned char *pp_ptr = NULL;
-	struct rtable *rt = NULL;
+	unsigned char *optptr;
+	unsigned char *iph;
+	int optlen, l;
 
 	if (skb != NULL) {
-		rt = skb_rtable(skb);
+		spec_dst = fib_compute_spec_dst(skb);
 		optptr = (unsigned char *)&(ip_hdr(skb)[1]);
 	} else
 		optptr = opt->__data;
@@ -330,8 +330,8 @@ int ip_options_compile(struct net *net,
 					pp_ptr = optptr + 2;
 					goto error;
 				}
-				if (rt) {
-					memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
+				if (skb) {
+					memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
 					opt->is_changed = 1;
 				}
 				optptr[2] += 4;
@@ -372,8 +372,8 @@ int ip_options_compile(struct net *net,
 						goto error;
 					}
 					opt->ts = optptr - iph;
-					if (rt)  {
-						memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
+					if (skb)  {
+						memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
 						timeptr = &optptr[optptr[2]+3];
 					}
 					opt->ts_needaddr = 1;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0d11f234d61..de29f46f68b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -40,6 +40,7 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/transp_v6.h>
 #endif
+#include <net/ip_fib.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -1019,8 +1020,8 @@ e_inval:
  * @sk: socket
  * @skb: buffer
  *
- * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
- * in skb->cb[] before dst drop.
+ * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
+ * destination in skb->cb[] before dst drop.
  * This way, receiver doesnt make cache line misses to read rtable.
  */
 void ipv4_pktinfo_prepare(struct sk_buff *skb)
@@ -1030,7 +1031,7 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb)
 
 	if (rt) {
 		pktinfo->ipi_ifindex = rt->rt_iif;
-		pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
+		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
 	} else {
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
-- 
cgit v1.2.3-70-g09d2


From 1d861aa4b3fb08822055345f480850205ffe6170 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Jul 2012 03:58:16 -0700
Subject: inet: Minimize use of cached route inetpeer.

Only use it in the absolutely required cases:

1) COW'ing metrics

2) ipv4 PMTU

3) ipv4 redirects

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c       |  3 ++-
 net/ipv4/route.c      | 32 ++++++++++++++++----------------
 net/ipv6/icmp.c       |  4 +++-
 net/ipv6/ip6_output.c | 10 ++++++++--
 net/ipv6/ndisc.c      |  8 ++++++--
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4bce5a2830a..4a049449305 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -254,9 +254,10 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 
 	/* Limit if icmp type is enabled in ratemask. */
 	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
-		struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
+		struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
 		rc = inet_peer_xrlim_allow(peer,
 					   net->ipv4.sysctl_icmp_ratelimit);
+		inet_putpeer(peer);
 	}
 out:
 	return rc;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 78d81543766..e376354dcb6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1289,20 +1289,15 @@ static void ip_select_fb_ident(struct iphdr *iph)
 
 void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 {
-	struct rtable *rt = (struct rtable *) dst;
-
-	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst);
+	struct net *net = dev_net(dst->dev);
+	struct inet_peer *peer;
 
-		/* If peer is attached to destination, it is never detached,
-		   so that we need not to grab a lock to dereference it.
-		 */
-		if (peer) {
-			iph->id = htons(inet_getid(peer, more));
-			return;
-		}
-	} else if (!rt)
-		pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0));
+	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
+	if (peer) {
+		iph->id = htons(inet_getid(peer, more));
+		inet_putpeer(peer);
+		return;
+	}
 
 	ip_select_fb_ident(iph);
 }
@@ -1492,6 +1487,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	struct rtable *rt = skb_rtable(skb);
 	struct in_device *in_dev;
 	struct inet_peer *peer;
+	struct net *net;
 	int log_martians;
 
 	rcu_read_lock();
@@ -1503,7 +1499,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
 	rcu_read_unlock();
 
-	peer = rt_get_peer_create(rt, rt->rt_dst);
+	net = dev_net(rt->dst.dev);
+	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
 	if (!peer) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
 		return;
@@ -1520,7 +1517,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	 */
 	if (peer->rate_tokens >= ip_rt_redirect_number) {
 		peer->rate_last = jiffies;
-		return;
+		goto out_put_peer;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
@@ -1541,6 +1538,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 					     &rt->rt_dst, &rt->rt_gateway);
 #endif
 	}
+out_put_peer:
+	inet_putpeer(peer);
 }
 
 static int ip_error(struct sk_buff *skb)
@@ -1583,7 +1582,7 @@ static int ip_error(struct sk_buff *skb)
 		break;
 	}
 
-	peer = rt_get_peer_create(rt, rt->rt_dst);
+	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
 
 	send = true;
 	if (peer) {
@@ -1596,6 +1595,7 @@ static int ip_error(struct sk_buff *skb)
 			peer->rate_tokens -= ip_rt_error_cost;
 		else
 			send = false;
+		inet_putpeer(peer);
 	}
 	if (send)
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c7da1422cbd..a113f7d7e93 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -194,8 +194,10 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		peer = rt6_get_peer_create(rt);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 		res = inet_peer_xrlim_allow(peer, tmo);
+		if (peer)
+			inet_putpeer(peer);
 	}
 	dst_release(dst);
 	return res;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c6af5963a20..5b2d63ed793 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -466,13 +466,15 @@ int ip6_forward(struct sk_buff *skb)
 		else
 			target = &hdr->daddr;
 
-		peer = rt6_get_peer_create(rt);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 
 		/* Limit redirects both by destination (here)
 		   and by source (inside ndisc_send_redirect)
 		 */
 		if (inet_peer_xrlim_allow(peer, 1*HZ))
 			ndisc_send_redirect(skb, target);
+		if (peer)
+			inet_putpeer(peer);
 	} else {
 		int addrtype = ipv6_addr_type(&hdr->saddr);
 
@@ -592,10 +594,14 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 	int old, new;
 
 	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		struct inet_peer *peer = rt6_get_peer_create(rt);
+		struct inet_peer *peer;
+		struct net *net;
 
+		net = dev_net(rt->dst.dev);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 		if (peer) {
 			fhdr->identification = htonl(inet_getid(peer, 0));
+			inet_putpeer(peer);
 			return;
 		}
 	}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 69a6330dea9..0fddd571400 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1486,6 +1486,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	int rd_len;
 	int err;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+	bool ret;
 
 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
 		ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
@@ -1519,8 +1520,11 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 			  "Redirect: destination is not a neighbour\n");
 		goto release;
 	}
-	peer = rt6_get_peer_create(rt);
-	if (!inet_peer_xrlim_allow(peer, 1*HZ))
+	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+	ret = inet_peer_xrlim_allow(peer, 1*HZ);
+	if (peer)
+		inet_putpeer(peer);
+	if (!ret)
 		goto release;
 
 	if (dev->addr_len) {
-- 
cgit v1.2.3-70-g09d2


From 1de9243bbfc451962ab716a3f7a7fda26d91c359 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Jul 2012 18:32:17 -0700
Subject: ipv4: Pull icmp socket delivery out into a helper function.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4a049449305..18e39d1895d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -634,18 +634,31 @@ out:;
 EXPORT_SYMBOL(icmp_send);
 
 
+static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
+{
+	const struct iphdr *iph = (const struct iphdr *) skb->data;
+	const struct net_protocol *ipprot;
+	int protocol = iph->protocol;
+
+	raw_icmp_error(skb, protocol, info);
+
+	rcu_read_lock();
+	ipprot = rcu_dereference(inet_protos[protocol]);
+	if (ipprot && ipprot->err_handler)
+		ipprot->err_handler(skb, info);
+	rcu_read_unlock();
+}
+
 /*
  *	Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
  */
 
 static void icmp_unreach(struct sk_buff *skb)
 {
-	const struct net_protocol *ipprot;
 	const struct iphdr *iph;
 	struct icmphdr *icmph;
 	struct net *net;
 	u32 info = 0;
-	int protocol;
 
 	net = dev_net(skb_dst(skb)->dev);
 
@@ -726,19 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
 		goto out;
 
-	iph = (const struct iphdr *)skb->data;
-	protocol = iph->protocol;
-
-	/*
-	 *	Deliver ICMP message to raw sockets. Pretty useless feature?
-	 */
-	raw_icmp_error(skb, protocol, info);
-
-	rcu_read_lock();
-	ipprot = rcu_dereference(inet_protos[protocol]);
-	if (ipprot && ipprot->err_handler)
-		ipprot->err_handler(skb, info);
-	rcu_read_unlock();
+	icmp_socket_deliver(skb, info);
 
 out:
 	return;
-- 
cgit v1.2.3-70-g09d2


From d3351b75a7169337877fe6f6f2c019154b6ec1ea Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Jul 2012 18:35:12 -0700
Subject: ipv4: Deliver ICMP redirects to sockets too.

And thus, we can remove the ping_err() hack.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 18e39d1895d..588514627aa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -782,13 +782,7 @@ static void icmp_redirect(struct sk_buff *skb)
 		break;
 	}
 
-	/* Ping wants to see redirects.
-         * Let's pretend they are errors of sorts... */
-	if (iph->protocol == IPPROTO_ICMP &&
-	    iph->ihl >= 5 &&
-	    pskb_may_pull(skb, (iph->ihl<<2)+8)) {
-		ping_err(skb, icmp_hdr(skb)->un.gateway);
-	}
+	icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
 
 out:
 	return;
-- 
cgit v1.2.3-70-g09d2


From 94206125c4aac32e43c25bfe1b827e7ab993b7dc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Jul 2012 20:38:08 -0700
Subject: ipv4: Rearrange arguments to ip_rt_redirect()

Pass in the SKB rather than just the IP addresses, so that policy
and other aspects can reside in ip_rt_redirect() rather then
icmp_redirect().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  3 +--
 net/ipv4/icmp.c     | 36 ++++++------------------------------
 net/ipv4/route.c    | 23 +++++++++++++++++++----
 3 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/include/net/route.h b/include/net/route.h
index 52362368af0..b1402787aec 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -108,8 +108,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
 
 struct in_device;
 extern int		ip_rt_init(void);
-extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
-				       __be32 src, struct net_device *dev);
+extern void		ip_rt_redirect(struct sk_buff *skb, __be32 new_gw);
 extern void		rt_cache_flush(struct net *net, int how);
 extern void		rt_cache_flush_batch(struct net *net);
 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 588514627aa..70a793559bb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -755,40 +755,16 @@ out_err:
 
 static void icmp_redirect(struct sk_buff *skb)
 {
-	const struct iphdr *iph;
-
-	if (skb->len < sizeof(struct iphdr))
-		goto out_err;
+	if (skb->len < sizeof(struct iphdr)) {
+		ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+		return;
+	}
 
-	/*
-	 *	Get the copied header of the packet that caused the redirect
-	 */
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto out;
-
-	iph = (const struct iphdr *)skb->data;
-
-	switch (icmp_hdr(skb)->code & 7) {
-	case ICMP_REDIR_NET:
-	case ICMP_REDIR_NETTOS:
-		/*
-		 * As per RFC recommendations now handle it as a host redirect.
-		 */
-	case ICMP_REDIR_HOST:
-	case ICMP_REDIR_HOSTTOS:
-		ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
-			       icmp_hdr(skb)->un.gateway,
-			       iph->saddr, skb->dev);
-		break;
-	}
+		return;
 
+	ip_rt_redirect(skb, icmp_hdr(skb)->un.gateway);
 	icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
-
-out:
-	return;
-out_err:
-	ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
-	goto out;
 }
 
 /*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4de87f44c3..f8921b448c3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1292,18 +1292,33 @@ static void ip_do_redirect(struct rtable *rt, __be32 old_gw, __be32 new_gw)
 }
 
 /* called in rcu_read_lock() section */
-void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
-		    __be32 saddr, struct net_device *dev)
+void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw)
 {
-	int s, i;
+	const struct iphdr *iph = (const struct iphdr *) skb->data;
+	__be32 old_gw = ip_hdr(skb)->saddr;
+	__be32 daddr = iph->daddr;
+	__be32 saddr = iph->saddr;
+	struct net_device *dev = skb->dev;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	__be32 skeys[2] = { saddr, 0 };
 	int    ikeys[2] = { dev->ifindex, 0 };
+	__be32 skeys[2] = { saddr, 0 };
 	struct net *net;
+	int s, i;
 
 	if (!in_dev)
 		return;
 
+	switch (icmp_hdr(skb)->code & 7) {
+	case ICMP_REDIR_NET:
+	case ICMP_REDIR_NETTOS:
+	case ICMP_REDIR_HOST:
+	case ICMP_REDIR_HOSTTOS:
+		break;
+
+	default:
+		return;
+	}
+
 	net = dev_net(dev);
 	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
 	    ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
-- 
cgit v1.2.3-70-g09d2


From 1f42539d257af671d56d4bdbcf13aef31abff6ef Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Jul 2012 21:30:08 -0700
Subject: ipv4: Kill ip_rt_redirect().

No longer needed, as the protocol handlers now all properly
propagate the redirect back into the routing code.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  1 -
 net/ipv4/icmp.c     |  1 -
 net/ipv4/route.c    | 44 --------------------------------------------
 3 files changed, 46 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/include/net/route.h b/include/net/route.h
index 6ab93eeb866..ace3cb44251 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -108,7 +108,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
 
 struct in_device;
 extern int		ip_rt_init(void);
-extern void		ip_rt_redirect(struct sk_buff *skb, __be32 new_gw);
 extern void		rt_cache_flush(struct net *net, int how);
 extern void		rt_cache_flush_batch(struct net *net);
 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 70a793559bb..d01aeb4d492 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -763,7 +763,6 @@ static void icmp_redirect(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		return;
 
-	ip_rt_redirect(skb, icmp_hdr(skb)->un.gateway);
 	icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index aabece6b729..e98207dcd08 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1345,50 +1345,6 @@ reject_redirect:
 	;
 }
 
-/* called in rcu_read_lock() section */
-void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw)
-{
-	const struct iphdr *iph = (const struct iphdr *) skb->data;
-	__be32 daddr = iph->daddr;
-	__be32 saddr = iph->saddr;
-	struct net_device *dev = skb->dev;
-	int    ikeys[2] = { dev->ifindex, 0 };
-	__be32 skeys[2] = { saddr, 0 };
-	struct net *net;
-	int s, i;
-
-	net = dev_net(dev);
-	for (s = 0; s < 2; s++) {
-		for (i = 0; i < 2; i++) {
-			unsigned int hash;
-			struct rtable __rcu **rthp;
-			struct rtable *rt;
-
-			hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net));
-
-			rthp = &rt_hash_table[hash].chain;
-
-			while ((rt = rcu_dereference(*rthp)) != NULL) {
-				rthp = &rt->dst.rt_next;
-
-				if (rt->rt_key_dst != daddr ||
-				    rt->rt_key_src != skeys[s] ||
-				    rt->rt_oif != ikeys[i] ||
-				    rt_is_input_route(rt) ||
-				    rt_is_expired(rt) ||
-				    !net_eq(dev_net(rt->dst.dev), net) ||
-				    rt->dst.error ||
-				    rt->dst.dev != dev)
-					continue;
-
-				ip_do_redirect(&rt->dst, skb);
-			}
-		}
-	}
-	return;
-
-}
-
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *)dst;
-- 
cgit v1.2.3-70-g09d2


From f0a70e902f483295a8b6d74ef4393bc577b703d7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jul 2012 08:06:04 -0700
Subject: ipv4: Put proper checks into icmp_socket_deliver().

All handler->err() routines expect that we've done a pskb_may_pull()
test to make sure that IP header length + 8 bytes can be safely
pulled.

Reported-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d01aeb4d492..ea3a996de95 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -640,6 +640,12 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
 	const struct net_protocol *ipprot;
 	int protocol = iph->protocol;
 
+	/* Checkin full IP header plus 8 bytes of protocol to
+	 * avoid additional coding at protocol handlers.
+	 */
+	if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
+		return;
+
 	raw_icmp_error(skb, protocol, info);
 
 	rcu_read_lock();
@@ -733,12 +739,6 @@ static void icmp_unreach(struct sk_buff *skb)
 		goto out;
 	}
 
-	/* Checkin full IP header plus 8 bytes of protocol to
-	 * avoid additional coding at protocol handlers.
-	 */
-	if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
-		goto out;
-
 	icmp_socket_deliver(skb, info);
 
 out:
-- 
cgit v1.2.3-70-g09d2


From 838942a594017817d33b2d914152305054e255af Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 23 Jul 2012 13:20:26 -0700
Subject: ipv4: Really ignore ICMP address requests/replies.

Alexey removed kernel side support for requests, and the
only thing we do for replies is log a message if something
doesn't look right.

As Alexey's comment indicates, this belongs in userspace (if
anywhere), and thus we can safely just get rid of this code.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 84 ++-------------------------------------------------------
 1 file changed, 2 insertions(+), 82 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ea3a996de95..f2a06beffbd 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -837,86 +837,6 @@ out_err:
 	goto out;
 }
 
-
-/*
- *	Handle ICMP_ADDRESS_MASK requests.  (RFC950)
- *
- * RFC1122 (3.2.2.9).  A host MUST only send replies to
- * ADDRESS_MASK requests if it's been configured as an address mask
- * agent.  Receiving a request doesn't constitute implicit permission to
- * act as one. Of course, implementing this correctly requires (SHOULD)
- * a way to turn the functionality on and off.  Another one for sysctl(),
- * I guess. -- MS
- *
- * RFC1812 (4.3.3.9).	A router MUST implement it.
- *			A router SHOULD have switch turning it on/off.
- *		      	This switch MUST be ON by default.
- *
- * Gratuitous replies, zero-source replies are not implemented,
- * that complies with RFC. DO NOT implement them!!! All the idea
- * of broadcast addrmask replies as specified in RFC950 is broken.
- * The problem is that it is not uncommon to have several prefixes
- * on one physical interface. Moreover, addrmask agent can even be
- * not aware of existing another prefixes.
- * If source is zero, addrmask agent cannot choose correct prefix.
- * Gratuitous mask announcements suffer from the same problem.
- * RFC1812 explains it, but still allows to use ADDRMASK,
- * that is pretty silly. --ANK
- *
- * All these rules are so bizarre, that I removed kernel addrmask
- * support at all. It is wrong, it is obsolete, nobody uses it in
- * any case. --ANK
- *
- * Furthermore you can do it with a usermode address agent program
- * anyway...
- */
-
-static void icmp_address(struct sk_buff *skb)
-{
-#if 0
-	net_dbg_ratelimited("a guy asks for address mask. Who is it?\n");
-#endif
-}
-
-/*
- * RFC1812 (4.3.3.9).	A router SHOULD listen all replies, and complain
- *			loudly if an inconsistency is found.
- * called with rcu_read_lock()
- */
-
-static void icmp_address_reply(struct sk_buff *skb)
-{
-	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = skb->dev;
-	struct in_device *in_dev;
-	struct in_ifaddr *ifa;
-
-	if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
-		return;
-
-	in_dev = __in_dev_get_rcu(dev);
-	if (!in_dev)
-		return;
-
-	if (in_dev->ifa_list &&
-	    IN_DEV_LOG_MARTIANS(in_dev) &&
-	    IN_DEV_FORWARD(in_dev)) {
-		__be32 _mask, *mp;
-
-		mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
-		BUG_ON(mp == NULL);
-		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-			if (*mp == ifa->ifa_mask &&
-			    inet_ifa_match(ip_hdr(skb)->saddr, ifa))
-				break;
-		}
-		if (!ifa)
-			net_info_ratelimited("Wrong address mask %pI4 from %s/%pI4\n",
-					     mp,
-					     dev->name, &ip_hdr(skb)->saddr);
-	}
-}
-
 static void icmp_discard(struct sk_buff *skb)
 {
 }
@@ -1080,10 +1000,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
 		.handler = icmp_discard,
 	},
 	[ICMP_ADDRESS] = {
-		.handler = icmp_address,
+		.handler = icmp_discard,
 	},
 	[ICMP_ADDRESSREPLY] = {
-		.handler = icmp_address_reply,
+		.handler = icmp_discard,
 	},
 };
 
-- 
cgit v1.2.3-70-g09d2


From 92101b3b2e3178087127709a556b091dae314e9e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 23 Jul 2012 16:29:00 -0700
Subject: ipv4: Prepare for change of rt->rt_iif encoding.

Use inet_iif() consistently, and for TCP record the input interface of
cached RX dst in inet sock.

rt->rt_iif is going to be encoded differently, so that we can
legitimately cache input routes in the FIB info more aggressively.

When the input interface is "use SKB device index" the rt->rt_iif will
be set to zero.

This forces us to move the TCP RX dst cache installation into the ipv4
specific code, and as well it should since doing the route caching for
ipv6 is pointless at the moment since it is not inspected in the ipv6
input paths yet.

Also, remove the unlikely on dst->obsolete, all ipv4 dsts have
obsolete set to a non-zero value to force invocation of the check
callback.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h |  1 +
 net/dccp/ipv4.c         |  2 +-
 net/ipv4/icmp.c         |  2 +-
 net/ipv4/ip_sockglue.c  |  5 ++---
 net/ipv4/route.c        |  2 +-
 net/ipv4/tcp_input.c    | 12 ------------
 net/ipv4/tcp_ipv4.c     | 24 ++++++++++++++++++------
 net/sched/cls_route.c   |  2 +-
 net/sched/em_meta.c     |  2 +-
 net/sctp/protocol.c     |  2 +-
 10 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'net/ipv4/icmp.c')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 924d7b98ab6..613cfa40167 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -172,6 +172,7 @@ struct inet_sock {
 	int			uc_index;
 	int			mc_index;
 	__be32			mc_addr;
+	int			rx_dst_ifindex;
 	struct ip_mc_socklist __rcu	*mc_list;
 	struct inet_cork_full	cork;
 };
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 25428d0c50c..176ecdba4a2 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -481,7 +481,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	struct rtable *rt;
 	const struct iphdr *iph = ip_hdr(skb);
 	struct flowi4 fl4 = {
-		.flowi4_oif = skb_rtable(skb)->rt_iif,
+		.flowi4_oif = inet_iif(skb),
 		.daddr = iph->saddr,
 		.saddr = iph->daddr,
 		.flowi4_tos = RT_CONN_FLAGS(sk),
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f2a06beffbd..f2eccd53174 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -571,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		rcu_read_lock();
 		if (rt_is_input_route(rt) &&
 		    net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
-			dev = dev_get_by_index_rcu(net, rt->rt_iif);
+			dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
 
 		if (dev)
 			saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index de29f46f68b..5eea4a81104 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1027,10 +1027,9 @@ e_inval:
 void ipv4_pktinfo_prepare(struct sk_buff *skb)
 {
 	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
-	const struct rtable *rt = skb_rtable(skb);
 
-	if (rt) {
-		pktinfo->ipi_ifindex = rt->rt_iif;
+	if (skb_rtable(skb)) {
+		pktinfo->ipi_ifindex = inet_iif(skb);
 		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
 	} else {
 		pktinfo->ipi_ifindex = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 34017be87c8..f6be7811939 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -848,7 +848,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 		if (log_martians &&
 		    peer->rate_tokens == ip_rt_redirect_number)
 			net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
-					     &ip_hdr(skb)->saddr, rt->rt_iif,
+					     &ip_hdr(skb)->saddr, inet_iif(skb),
 					     &ip_hdr(skb)->daddr, &rt->rt_gateway);
 #endif
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 21d7f8f3a7a..3e07a64ca44 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5391,18 +5391,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (sk->sk_rx_dst) {
-		struct dst_entry *dst = sk->sk_rx_dst;
-		if (unlikely(dst->obsolete)) {
-			if (dst->ops->check(dst, 0) == NULL) {
-				dst_release(dst);
-				sk->sk_rx_dst = NULL;
-			}
-		}
-	}
-	if (unlikely(sk->sk_rx_dst == NULL))
-		sk->sk_rx_dst = dst_clone(skb_dst(skb));
-
 	/*
 	 *	Header prediction.
 	 *	The code loosely follows the one in the famous
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bc5432e3c77..3e30548ac32 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1618,6 +1618,20 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		sock_rps_save_rxhash(sk, skb);
+		if (sk->sk_rx_dst) {
+			struct dst_entry *dst = sk->sk_rx_dst;
+			if (dst->ops->check(dst, 0) == NULL) {
+				dst_release(dst);
+				sk->sk_rx_dst = NULL;
+			}
+		}
+		if (unlikely(sk->sk_rx_dst == NULL)) {
+			struct inet_sock *icsk = inet_sk(sk);
+			struct rtable *rt = skb_rtable(skb);
+
+			sk->sk_rx_dst = dst_clone(&rt->dst);
+			icsk->rx_dst_ifindex = inet_iif(skb);
+		}
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
 			goto reset;
@@ -1700,14 +1714,12 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 		skb->destructor = sock_edemux;
 		if (sk->sk_state != TCP_TIME_WAIT) {
 			struct dst_entry *dst = sk->sk_rx_dst;
+			struct inet_sock *icsk = inet_sk(sk);
 			if (dst)
 				dst = dst_check(dst, 0);
-			if (dst) {
-				struct rtable *rt = (struct rtable *) dst;
-
-				if (rt->rt_iif == dev->ifindex)
-					skb_dst_set_noref(skb, dst);
-			}
+			if (dst &&
+			    icsk->rx_dst_ifindex == dev->ifindex)
+				skb_dst_set_noref(skb, dst);
 		}
 	}
 }
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 36fec422740..44f405cb9aa 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (head == NULL)
 		goto old_method;
 
-	iif = ((struct rtable *)dst)->rt_iif;
+	iif = inet_iif(skb);
 
 	h = route4_fastmap_hash(id, iif);
 	if (id == head->fastmap[h].id &&
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 4790c696cbc..4ab6e332557 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif)
 	if (unlikely(skb_rtable(skb) == NULL))
 		*err = -1;
 	else
-		dst->value = skb_rtable(skb)->rt_iif;
+		dst->value = inet_iif(skb);
 }
 
 /**************************************************************************
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 9c90811d113..1f89c4e6964 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -568,7 +568,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
 /* What interface did this skb arrive on? */
 static int sctp_v4_skb_iif(const struct sk_buff *skb)
 {
-	return skb_rtable(skb)->rt_iif;
+	return inet_iif(skb);
 }
 
 /* Was this packet marked by Explicit Congestion Notification? */
-- 
cgit v1.2.3-70-g09d2