From fbfe95a42e90b3dd079cc9019ba7d7700feee0f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 8 Jun 2012 23:24:18 -0700 Subject: inet: Create and use rt{,6}_get_peer_create(). There's a lot of places that open-code rt{,6}_get_peer() only because they want to set 'create' to one. So add an rt{,6}_get_peer_create() for their sake. There were also a few spots open-coding plain rt{,6}_get_peer() and those are transformed here as well. Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 091a2971c7b..ed89bba745a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -188,14 +188,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; + struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); + peer = rt6_get_peer_create(rt); + res = inet_peer_xrlim_allow(peer, tmo); } dst_release(dst); return res; -- cgit v1.2.3-70-g09d2 From 81aded24675ebda5de8a68843250ad15584ac38a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 15 Jun 2012 14:54:11 -0700 Subject: ipv6: Handle PMTU in ICMP error handlers. One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 8 +-- net/dccp/ipv6.c | 2 + net/ipv6/ah6.c | 3 +- net/ipv6/esp6.c | 2 + net/ipv6/icmp.c | 6 +- net/ipv6/ipcomp6.c | 2 + net/ipv6/raw.c | 5 +- net/ipv6/route.c | 143 +++++++++++------------------------------------- net/ipv6/tcp_ipv6.c | 2 + net/ipv6/udp.c | 3 + 10 files changed, 54 insertions(+), 122 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a2cda240ca9..58cb3fc3487 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,10 +140,10 @@ extern void rt6_redirect(const struct in6_addr *dest, u8 *lladdr, int on_link); -extern void rt6_pmtu_discovery(const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct net_device *dev, - u32 pmtu); +extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, + int oif, u32 mark); +extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, + __be32 mtu); struct netlink_callback; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fa9512d86f3..9991be083ad 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f1a4a2c28ed..49d4d26bda8 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); - + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index db1521fcda5..89a615ba84f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ed89bba745a..5247d5c211f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; - const struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; u8 type; @@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb) XFRM_STATE_ICMP)) goto drop_no_count; - if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); @@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); - orig_hdr = (struct ipv6hdr *) (hdr + 1); - rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, - ntohl(hdr->icmp6_mtu)); /* * Drop through to notify diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5cb75bfe45b..92832385a8e 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 93d69836fde..43b0042f15f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, return; harderr = icmpv6_err_convert(type, code, &err); - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); - + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58a3ec23da2..0d41f68daff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; + dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + struct net *net = dev_net(dst->dev); + rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } +void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, __be32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + ip6_rt_update_pmtu(dst, ntohl(mtu)); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_update_pmtu); + +void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) +{ + ip6_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -1703,116 +1736,6 @@ out: dst_release(&rt->dst); } -/* - * Handle ICMP "packet too big" messages - * i.e. Path MTU discovery - */ - -static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net *net, u32 pmtu, int ifindex) -{ - struct rt6_info *rt, *nrt; - int allfrag = 0; -again: - rt = rt6_lookup(net, daddr, saddr, ifindex, 0); - if (!rt) - return; - - if (rt6_check_expired(rt)) { - ip6_del_rt(rt); - goto again; - } - - if (pmtu >= dst_mtu(&rt->dst)) - goto out; - - if (pmtu < IPV6_MIN_MTU) { - /* - * According to RFC2460, PMTU is set to the IPv6 Minimum Link - * MTU (1280) and a fragment header should always be included - * after a node receiving Too Big message reporting PMTU is - * less than the IPv6 Minimum Link MTU. - */ - pmtu = IPV6_MIN_MTU; - allfrag = 1; - } - - /* New mtu received -> path was valid. - They are sent only in response to data packets, - so that this nexthop apparently is reachable. --ANK - */ - dst_confirm(&rt->dst); - - /* Host route. If it is static, it would be better - not to override it, but add new one, so that - when cache entry will expire old pmtu - would return automatically. - */ - if (rt->rt6i_flags & RTF_CACHE) { - dst_metric_set(&rt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&rt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&rt->dst, RTAX_FEATURES, features); - } - rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); - rt->rt6i_flags |= RTF_MODIFIED; - goto out; - } - - /* Network route. - Two cases are possible: - 1. It is connected route. Action: COW - 2. It is gatewayed route or NONEXTHOP route. Action: clone it. - */ - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, daddr, saddr); - else - nrt = rt6_alloc_clone(rt, daddr); - - if (nrt) { - dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&nrt->dst, RTAX_FEATURES, features); - } - - /* According to RFC 1981, detecting PMTU increase shouldn't be - * happened within 5 mins, the recommended timer is 10 mins. - * Here this route expiration time is set to ip6_rt_mtu_expires - * which is 10 mins. After 10 mins the decreased pmtu is expired - * and detecting PMTU increase will be automatically happened. - */ - rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); - nrt->rt6i_flags |= RTF_DYNAMIC; - ip6_ins_rt(nrt); - } -out: - dst_release(&rt->dst); -} - -void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) -{ - struct net *net = dev_net(dev); - - /* - * RFC 1981 states that a node "MUST reduce the size of the packets it - * is sending along the path" that caused the Packet Too Big message. - * Since it's not possible in the general case to determine which - * interface was used to send the original packet, we update the MTU - * on the interface that will be used to send future packets. We also - * update the MTU on the interface that received the Packet Too Big in - * case the original packet was forced out that interface with - * SO_BINDTODEVICE or similar. This is the next best thing to the - * correct behaviour, which would be to update the MTU on all - * interfaces. - */ - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); -} - /* * Misc support functions */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f91b0bfd12d..26a88623940 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f05099fc590..051ad481973 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) + ip6_sk_update_pmtu(skb, sk, info); + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) -- cgit v1.2.3-70-g09d2 From f9242b6b28d61295f2bf7e8adfb1060b382e5381 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 18:56:21 -0700 Subject: inet: Sanitize inet{,6} protocol demux. Don't pretend that inet_protos[] and inet6_protos[] are hashes, thay are just a straight arrays. Remove all unnecessary hash masking. Document MAX_INET_PROTOS. Use RAW_HTABLE_SIZE when appropriate. Reported-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/protocol.h | 7 +++++-- net/ipv4/af_inet.c | 26 ++++++++++++-------------- net/ipv4/icmp.c | 9 ++++----- net/ipv4/ip_input.c | 5 ++--- net/ipv4/protocol.c | 8 +++----- net/ipv6/icmp.c | 7 ++----- net/ipv6/ip6_input.c | 9 +++------ net/ipv6/protocol.c | 8 +++----- net/ipv6/raw.c | 4 ++-- 9 files changed, 36 insertions(+), 47 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/include/net/protocol.h b/include/net/protocol.h index 875f4895b03..a1b1b530c33 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -29,8 +29,11 @@ #include #endif -#define MAX_INET_PROTOS 256 /* Must be a power of 2 */ - +/* This is one larger than the largest protocol value that can be + * found in an ipv4 or ipv6 header. Since in both cases the protocol + * value is presented in a __u8, this is defined to be 256. + */ +#define MAX_INET_PROTOS 256 /* This is used to register protocols. */ struct net_protocol { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e4e8e00a2c9..85a3b176313 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -242,20 +242,18 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); -static inline int inet_netns_ok(struct net *net, int protocol) +static inline int inet_netns_ok(struct net *net, __u8 protocol) { - int hash; const struct net_protocol *ipprot; if (net_eq(net, &init_net)) return 1; - hash = protocol & (MAX_INET_PROTOS - 1); - ipprot = rcu_dereference(inet_protos[hash]); - - if (ipprot == NULL) + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot == NULL) { /* raw IP is OK */ return 1; + } return ipprot->netns_ok; } @@ -1216,8 +1214,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - const struct iphdr *iph; const struct net_protocol *ops; + const struct iphdr *iph; int proto; int ihl; int err = -EINVAL; @@ -1236,7 +1234,7 @@ static int inet_gso_send_check(struct sk_buff *skb) __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; err = -EPROTONOSUPPORT; rcu_read_lock(); @@ -1253,8 +1251,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - struct iphdr *iph; const struct net_protocol *ops; + struct iphdr *iph; int proto; int ihl; int id; @@ -1286,7 +1284,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb_reset_transport_header(skb); iph = ip_hdr(skb); id = ntohs(iph->id); - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); @@ -1340,7 +1338,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out; } - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; rcu_read_lock(); ops = rcu_dereference(inet_protos[proto]); @@ -1398,11 +1396,11 @@ out: static int inet_gro_complete(struct sk_buff *skb) { - const struct net_protocol *ops; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); struct iphdr *iph = ip_hdr(skb); - int proto = iph->protocol & (MAX_INET_PROTOS - 1); + const struct net_protocol *ops; + int proto = iph->protocol; int err = -ENOSYS; - __be16 newlen = htons(skb->len - skb_network_offset(skb)); csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e1caa1abe5d..49a74cc79dc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -637,12 +637,12 @@ EXPORT_SYMBOL(icmp_send); static void icmp_unreach(struct sk_buff *skb) { + const struct net_protocol *ipprot; const struct iphdr *iph; struct icmphdr *icmph; - int hash, protocol; - const struct net_protocol *ipprot; - u32 info = 0; struct net *net; + u32 info = 0; + int protocol; net = dev_net(skb_dst(skb)->dev); @@ -731,9 +731,8 @@ static void icmp_unreach(struct sk_buff *skb) */ raw_icmp_error(skb, protocol, info); - hash = protocol & (MAX_INET_PROTOS - 1); rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[hash]); + ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, info); rcu_read_unlock(); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 8590144ca33..c4fe1d27113 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb) rcu_read_lock(); { int protocol = ip_hdr(skb)->protocol; - int hash, raw; const struct net_protocol *ipprot; + int raw; resubmit: raw = raw_local_deliver(skb, protocol); - hash = protocol & (MAX_INET_PROTOS - 1); - ipprot = rcu_dereference(inet_protos[hash]); + ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot != NULL) { int ret; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 9ae5c01cd0b..8918eff1426 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct net_protocol **)&inet_protos[hash], + return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet_add_protocol); @@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol); int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash], + ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5247d5c211f..c7da1422cbd 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -600,9 +600,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; - int hash; - u8 nexthdr; __be16 frag_off; + u8 nexthdr; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return; @@ -629,10 +628,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) --ANK (980726) */ - hash = nexthdr & (MAX_INET_PROTOS - 1); - rcu_read_lock(); - ipprot = rcu_dereference(inet6_protos[hash]); + ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); rcu_read_unlock(); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 21a15dfe4a9..5ab923e51af 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,13 +168,12 @@ drop: static int ip6_input_finish(struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; + struct inet6_dev *idev; unsigned int nhoff; int nexthdr; bool raw; - u8 hash; - struct inet6_dev *idev; - struct net *net = dev_net(skb_dst(skb)->dev); /* * Parse extension headers @@ -189,9 +188,7 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - - hash = nexthdr & (MAX_INET_PROTOS - 1); - if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { + if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 9a7978fdc02..053082dfc93 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet6_add_protocol); @@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 43b0042f15f..b5c1dcb2773 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; - hash = nexthdr & (MAX_INET_PROTOS - 1); + hash = nexthdr & (RAW_HTABLE_SIZE - 1); read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); @@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { struct sock *raw_sk; - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); + raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) raw_sk = NULL; -- cgit v1.2.3-70-g09d2 From 1d861aa4b3fb08822055345f480850205ffe6170 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 03:58:16 -0700 Subject: inet: Minimize use of cached route inetpeer. Only use it in the absolutely required cases: 1) COW'ing metrics 2) ipv4 PMTU 3) ipv4 redirects Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- net/ipv4/route.c | 32 ++++++++++++++++---------------- net/ipv6/icmp.c | 4 +++- net/ipv6/ip6_output.c | 10 ++++++++-- net/ipv6/ndisc.c | 8 ++++++-- 5 files changed, 35 insertions(+), 22 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4bce5a2830a..4a049449305 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -254,9 +254,10 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, /* Limit if icmp type is enabled in ratemask. */ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); + struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + inet_putpeer(peer); } out: return rc; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 78d81543766..e376354dcb6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1289,20 +1289,15 @@ static void ip_select_fb_ident(struct iphdr *iph) void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) { - struct rtable *rt = (struct rtable *) dst; - - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); + struct net *net = dev_net(dst->dev); + struct inet_peer *peer; - /* If peer is attached to destination, it is never detached, - so that we need not to grab a lock to dereference it. - */ - if (peer) { - iph->id = htons(inet_getid(peer, more)); - return; - } - } else if (!rt) - pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0)); + peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); + if (peer) { + iph->id = htons(inet_getid(peer, more)); + inet_putpeer(peer); + return; + } ip_select_fb_ident(iph); } @@ -1492,6 +1487,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; struct inet_peer *peer; + struct net *net; int log_martians; rcu_read_lock(); @@ -1503,7 +1499,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); - peer = rt_get_peer_create(rt, rt->rt_dst); + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); return; @@ -1520,7 +1517,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) */ if (peer->rate_tokens >= ip_rt_redirect_number) { peer->rate_last = jiffies; - return; + goto out_put_peer; } /* Check for load limit; set rate_last to the latest sent @@ -1541,6 +1538,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) &rt->rt_dst, &rt->rt_gateway); #endif } +out_put_peer: + inet_putpeer(peer); } static int ip_error(struct sk_buff *skb) @@ -1583,7 +1582,7 @@ static int ip_error(struct sk_buff *skb) break; } - peer = rt_get_peer_create(rt, rt->rt_dst); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); send = true; if (peer) { @@ -1596,6 +1595,7 @@ static int ip_error(struct sk_buff *skb) peer->rate_tokens -= ip_rt_error_cost; else send = false; + inet_putpeer(peer); } if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c7da1422cbd..a113f7d7e93 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -194,8 +194,10 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - peer = rt6_get_peer_create(rt); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); res = inet_peer_xrlim_allow(peer, tmo); + if (peer) + inet_putpeer(peer); } dst_release(dst); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c6af5963a20..5b2d63ed793 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -466,13 +466,15 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - peer = rt6_get_peer_create(rt); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); + if (peer) + inet_putpeer(peer); } else { int addrtype = ipv6_addr_type(&hdr->saddr); @@ -592,10 +594,14 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) int old, new; if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer = rt6_get_peer_create(rt); + struct inet_peer *peer; + struct net *net; + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); if (peer) { fhdr->identification = htonl(inet_getid(peer, 0)); + inet_putpeer(peer); return; } } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 69a6330dea9..0fddd571400 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1486,6 +1486,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", @@ -1519,8 +1520,11 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - peer = rt6_get_peer_create(rt); - if (!inet_peer_xrlim_allow(peer, 1*HZ)) + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + ret = inet_peer_xrlim_allow(peer, 1*HZ); + if (peer) + inet_putpeer(peer); + if (!ret) goto release; if (dev->addr_len) { -- cgit v1.2.3-70-g09d2 From b94f1c0904da9b8bf031667afc48080ba7c3e8c9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:33:37 -0700 Subject: ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect(). And delete rt6_redirect(), since it is no longer used. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 - include/net/ipv6.h | 2 + net/ipv6/icmp.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/route.c | 107 ------------------------------------------------ 5 files changed, 4 insertions(+), 111 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 693940565d8..b6b6f7d6f3c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,8 +133,6 @@ extern int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); -extern void rt6_redirect(struct sk_buff *skb); - extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d4261d4d6c4..f695f39e892 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -251,6 +251,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) atomic_dec(&fl->users); } +extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); + extern int ip6_ra_control(struct sock *sk, int sel); extern int ipv6_parse_hopopts(struct sk_buff *skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a113f7d7e93..24d69dbca4d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -598,7 +598,7 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b8d53e186a7..ff36194a71a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1350,7 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - rt6_redirect(skb); + icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f52cf83bb1e..7296af144d6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1633,92 +1633,6 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -/* - * Handle redirects - */ -struct ip6rd_flowi { - struct flowi6 fl6; - struct in6_addr gateway; -}; - -static struct rt6_info *__ip6_route_redirect(struct net *net, - struct fib6_table *table, - struct flowi6 *fl6, - int flags) -{ - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt; - struct fib6_node *fn; - - /* - * Get the "current" route for this destination and - * check if the redirect has come from approriate router. - * - * RFC 2461 specifies that redirects should only be - * accepted if they come from the nexthop to the target. - * Due to the way the routes are chosen, this notion - * is a bit fuzzy and one might need to check all possible - * routes. - */ - - read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); -restart: - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - /* - * Current route is on-link; redirect is always invalid. - * - * Seems, previous statement is not true. It could - * be node, which looks for us as on-link (f.e. proxy ndisc) - * But then router serving it might decide, that we should - * know truth 8)8) --ANK (980726). - */ - if (rt6_check_expired(rt)) - continue; - if (!(rt->rt6i_flags & RTF_GATEWAY)) - continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) - continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) - continue; - break; - } - - if (!rt) - rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl6->saddr); -out: - dst_hold(&rt->dst); - - read_unlock_bh(&table->tb6_lock); - - return rt; -}; - -static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, - const struct in6_addr *src, - const struct in6_addr *gateway, - struct net_device *dev) -{ - int flags = RT6_LOOKUP_F_HAS_SADDR; - struct net *net = dev_net(dev); - struct ip6rd_flowi rdfl = { - .fl6 = { - .flowi6_oif = dev->ifindex, - .daddr = *dest, - .saddr = *src, - }, - }; - - rdfl.gateway = *gateway; - - if (rt6_need_strict(dest)) - flags |= RT6_LOOKUP_F_IFACE; - - return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, - flags, __ip6_route_redirect); -} - static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -1848,27 +1762,6 @@ out: neigh_release(neigh); } -void rt6_redirect(struct sk_buff *skb) -{ - const struct in6_addr *target; - const struct in6_addr *dest; - const struct in6_addr *src; - const struct in6_addr *saddr; - struct icmp6hdr *icmph; - struct rt6_info *rt; - - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; - - src = &ipv6_hdr(skb)->daddr; - saddr = &ipv6_hdr(skb)->saddr; - - rt = ip6_route_redirect(dest, src, saddr, skb->dev); - rt6_do_redirect(&rt->dst, skb); - dst_release(&rt->dst); -} - /* * Misc support functions */ -- cgit v1.2.3-70-g09d2