summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-12 03:49:19 -0700
committerDavid S. Miller <davem@davemloft.net>2012-07-12 03:49:19 -0700
commit3ec5a261aef55a32664bffd335e5c32aeadf3215 (patch)
treea2ddd1e4c432d7300396814320608ee946030fc1 /net/ipv4
parent46d3ceabd8d98ed0ad10f20c595ca784e34786c5 (diff)
parent1ed5c48f231cd00eac0b3d2350ac61e3c825063e (diff)
Merge branch 'redirect_via_sock'
As described in my patch series from the other day, we need to rearrange redirect handling so that the local initiators of packets (sockets, tunnels, xfrms, etc.) that implement the protocols compute the route and pass this down into the ipv4/ipv6 routing code. These changes here do so by implementing a new dst_ops->redirect method. No more do we have this funny code that tries several different sets of routing keys to try and figure out which route the redirect should actually be applied to. No more do we have the problem wherein TOS rewriting causes problems for us. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/ah4.c18
-rw-r--r--net/ipv4/esp4.c18
-rw-r--r--net/ipv4/icmp.c74
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ipcomp.c18
-rw-r--r--net/ipv4/ipip.c9
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c115
-rw-r--r--net/ipv4/tcp_ipv4.c11
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_policy.c9
12 files changed, 174 insertions, 113 deletions
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 916d5ecaf6c..a0d8392491c 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -398,17 +398,25 @@ static void ah4_err(struct sk_buff *skb, u32 info)
struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
- icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return;
+ case ICMP_REDIRECT:
+ break;
+ default:
return;
+ }
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
ah->spi, IPPROTO_AH, AF_INET);
if (!x)
return;
- pr_debug("pmtu discovery on SA AH/%08x/%08x\n",
- ntohl(ah->spi), ntohl(iph->daddr));
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
+
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
+ else
+ ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 7b95b49a36c..b61e9deb7c7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -484,17 +484,25 @@ static void esp4_err(struct sk_buff *skb, u32 info)
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
- icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return;
+ case ICMP_REDIRECT:
+ break;
+ default:
return;
+ }
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
return;
- NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
- ntohl(esph->spi), ntohl(iph->daddr));
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
+
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
+ else
+ ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4a049449305..d01aeb4d492 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -634,18 +634,31 @@ out:;
EXPORT_SYMBOL(icmp_send);
+static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ const struct net_protocol *ipprot;
+ int protocol = iph->protocol;
+
+ raw_icmp_error(skb, protocol, info);
+
+ rcu_read_lock();
+ ipprot = rcu_dereference(inet_protos[protocol]);
+ if (ipprot && ipprot->err_handler)
+ ipprot->err_handler(skb, info);
+ rcu_read_unlock();
+}
+
/*
* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
*/
static void icmp_unreach(struct sk_buff *skb)
{
- const struct net_protocol *ipprot;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
u32 info = 0;
- int protocol;
net = dev_net(skb_dst(skb)->dev);
@@ -726,19 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
goto out;
- iph = (const struct iphdr *)skb->data;
- protocol = iph->protocol;
-
- /*
- * Deliver ICMP message to raw sockets. Pretty useless feature?
- */
- raw_icmp_error(skb, protocol, info);
-
- rcu_read_lock();
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->err_handler)
- ipprot->err_handler(skb, info);
- rcu_read_unlock();
+ icmp_socket_deliver(skb, info);
out:
return;
@@ -754,46 +755,15 @@ out_err:
static void icmp_redirect(struct sk_buff *skb)
{
- const struct iphdr *iph;
-
- if (skb->len < sizeof(struct iphdr))
- goto out_err;
-
- /*
- * Get the copied header of the packet that caused the redirect
- */
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto out;
-
- iph = (const struct iphdr *)skb->data;
-
- switch (icmp_hdr(skb)->code & 7) {
- case ICMP_REDIR_NET:
- case ICMP_REDIR_NETTOS:
- /*
- * As per RFC recommendations now handle it as a host redirect.
- */
- case ICMP_REDIR_HOST:
- case ICMP_REDIR_HOSTTOS:
- ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
- icmp_hdr(skb)->un.gateway,
- iph->saddr, skb->dev);
- break;
+ if (skb->len < sizeof(struct iphdr)) {
+ ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+ return;
}
- /* Ping wants to see redirects.
- * Let's pretend they are errors of sorts... */
- if (iph->protocol == IPPROTO_ICMP &&
- iph->ihl >= 5 &&
- pskb_may_pull(skb, (iph->ihl<<2)+8)) {
- ping_err(skb, icmp_hdr(skb)->un.gateway);
- }
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return;
-out:
- return;
-out_err:
- ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
- goto out;
+ icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
}
/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 594cec35ac4..0c3123566d7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -528,6 +528,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
if (code != ICMP_EXC_TTL)
return;
break;
+
+ case ICMP_REDIRECT:
+ break;
}
rcu_read_lock();
@@ -543,7 +546,11 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
t->parms.link, 0, IPPROTO_GRE, 0);
goto out;
}
-
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+ IPPROTO_GRE, 0);
+ goto out;
+ }
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
goto out;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index b91375482d8..d3ab47e19a8 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -31,18 +31,26 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
- icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return;
+ case ICMP_REDIRECT:
+ break;
+ default:
return;
+ }
spi = htonl(ntohs(ipch->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET);
if (!x)
return;
- NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n",
- spi, &iph->daddr);
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
+
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
+ else
+ ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 715338a1b20..c2d0e6d8baa 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -360,6 +360,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (code != ICMP_EXC_TTL)
return 0;
break;
+ case ICMP_REDIRECT:
+ break;
}
err = -ENOENT;
@@ -376,6 +378,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
goto out;
}
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ IPPROTO_IPIP, 0);
+ err = 0;
+ goto out;
+ }
+
if (t->parms.iph.daddr == 0)
goto out;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 340fcf29a96..6232d476f37 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -387,6 +387,7 @@ void ping_err(struct sk_buff *skb, u32 info)
break;
case ICMP_REDIRECT:
/* See ICMP_SOURCE_QUENCH */
+ ipv4_sk_redirect(skb, sk);
err = EREMOTEIO;
break;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 659ddfb1094..ff0f071969e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,6 +218,8 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
+ else if (type == ICMP_REDIRECT)
+ ipv4_sk_redirect(skb, sk);
/* Report error on raw socket, if:
1. User requested ip_recverr.
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 95bfa1ba5b2..23bbe29b3bb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -149,6 +149,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
static int rt_garbage_collect(struct dst_ops *ops);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -179,6 +180,7 @@ static struct dst_ops ipv4_dst_ops = {
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
+ .redirect = ip_do_redirect,
.local_out = __ip_local_out,
.neigh_lookup = ipv4_neigh_lookup,
};
@@ -1271,16 +1273,35 @@ static void rt_del(unsigned int hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
-/* called in rcu_read_lock() section */
-void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
- __be32 saddr, struct net_device *dev)
+static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
{
- int s, i;
- struct in_device *in_dev = __in_dev_get_rcu(dev);
- __be32 skeys[2] = { saddr, 0 };
- int ikeys[2] = { dev->ifindex, 0 };
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ __be32 new_gw = icmp_hdr(skb)->un.gateway;
+ __be32 old_gw = ip_hdr(skb)->saddr;
+ struct net_device *dev = skb->dev;
+ __be32 daddr = iph->daddr;
+ __be32 saddr = iph->saddr;
+ struct in_device *in_dev;
+ struct neighbour *n;
+ struct rtable *rt;
struct net *net;
+ switch (icmp_hdr(skb)->code & 7) {
+ case ICMP_REDIR_NET:
+ case ICMP_REDIR_NETTOS:
+ case ICMP_REDIR_HOST:
+ case ICMP_REDIR_HOSTTOS:
+ break;
+
+ default:
+ return;
+ }
+
+ rt = (struct rtable *) dst;
+ if (rt->rt_gateway != old_gw)
+ return;
+
+ in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
return;
@@ -1300,45 +1321,16 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
goto reject_redirect;
}
- for (s = 0; s < 2; s++) {
- for (i = 0; i < 2; i++) {
- unsigned int hash;
- struct rtable __rcu **rthp;
- struct rtable *rt;
-
- hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net));
-
- rthp = &rt_hash_table[hash].chain;
-
- while ((rt = rcu_dereference(*rthp)) != NULL) {
- struct neighbour *n;
-
- rthp = &rt->dst.rt_next;
-
- if (rt->rt_key_dst != daddr ||
- rt->rt_key_src != skeys[s] ||
- rt->rt_oif != ikeys[i] ||
- rt_is_input_route(rt) ||
- rt_is_expired(rt) ||
- !net_eq(dev_net(rt->dst.dev), net) ||
- rt->dst.error ||
- rt->dst.dev != dev ||
- rt->rt_gateway != old_gw)
- continue;
-
- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
- if (n) {
- if (!(n->nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- } else {
- rt->rt_gateway = new_gw;
- rt->rt_flags |= RTCF_REDIRECTED;
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
- }
- neigh_release(n);
- }
- }
+ n = ipv4_neigh_lookup(dst, NULL, &new_gw);
+ if (n) {
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ } else {
+ rt->rt_gateway = new_gw;
+ rt->rt_flags |= RTCF_REDIRECTED;
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
+ neigh_release(n);
}
return;
@@ -1554,6 +1546,34 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
+void ipv4_redirect(struct sk_buff *skb, struct net *net,
+ int oif, u32 mark, u8 protocol, int flow_flags)
+{
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+ struct flowi4 fl4;
+ struct rtable *rt;
+
+ flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
+ protocol, flow_flags, iph->daddr, iph->saddr, 0, 0);
+ rt = __ip_route_output_key(net, &fl4);
+ if (!IS_ERR(rt)) {
+ ip_do_redirect(&rt->dst, skb);
+ ip_rt_put(rt);
+ }
+}
+EXPORT_SYMBOL_GPL(ipv4_redirect);
+
+void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+
+ return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+ sk->sk_mark,
+ inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ inet_sk_flowi_flags(sk));
+}
+EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
+
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rtable *rt = (struct rtable *) dst;
@@ -2571,6 +2591,10 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
{
}
+static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+{
+}
+
static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
unsigned long old)
{
@@ -2585,6 +2609,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.mtu = ipv4_blackhole_mtu,
.default_advmss = ipv4_default_advmss,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
+ .redirect = ipv4_rt_blackhole_redirect,
.cow_metrics = ipv4_rt_blackhole_cow_metrics,
.neigh_lookup = ipv4_neigh_lookup,
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 01545a3fc0f..7a0062cb4ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -321,6 +321,14 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
} /* else let the usual retransmit timer handle it */
}
+static void do_redirect(struct sk_buff *skb, struct sock *sk)
+{
+ struct dst_entry *dst = __sk_dst_check(sk, 0);
+
+ if (dst)
+ dst->ops->redirect(dst, skb);
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -394,6 +402,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
switch (type) {
+ case ICMP_REDIRECT:
+ do_redirect(icmp_skb, sk);
+ goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ee37d47d472..b4c3582a991 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -630,6 +630,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
err = icmp_err_convert[code].errno;
}
break;
+ case ICMP_REDIRECT:
+ ipv4_sk_redirect(skb, sk);
+ break;
}
/*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 87d3fcc302d..737131cef37 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -202,6 +202,14 @@ static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
path->ops->update_pmtu(path, mtu);
}
+static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+ path->ops->redirect(path, skb);
+}
+
static void xfrm4_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -225,6 +233,7 @@ static struct dst_ops xfrm4_dst_ops = {
.protocol = cpu_to_be16(ETH_P_IP),
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
+ .redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown,