diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 10:01:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 10:01:50 -0700 |
commit | 3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch) | |
tree | 3df72faaacd494d5ac8c9668df4f529b1b5e4457 /net/ipv6/ip6_tunnel.c | |
parent | e017507f37d5cb8b541df165a824958bc333bec3 (diff) | |
parent | 320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller:
1) Remove the ipv4 routing cache. Now lookups go directly into the FIB
trie and use prebuilt routes cached there.
No more garbage collection, no more rDOS attacks on the routing
cache. Instead we now get predictable and consistent performance,
no matter what the pattern of traffic we service.
This has been almost 2 years in the making. Special thanks to
Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who
have helped along the way.
I'm sure that with a change of this magnitude there will be some
kind of fallout, but such things ought the be simple to fix at this
point. Luckily I'm not European so I'll be around all of August to
fix things :-)
The major stages of this work here are each fronted by a forced
merge commit whose commit message contains a top-level description
of the motivations and implementation issues.
2) Pre-demux of established ipv4 TCP sockets, saves a route demux on
input.
3) TCP SYN/ACK performance tweaks from Eric Dumazet.
4) Add namespace support for netfilter L4 conntrack helpers, from Gao
Feng.
5) Add config mechanism for Energy Efficient Ethernet to ethtool, from
Yuval Mintz.
6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet.
7) Support for connection tracker helpers in userspace, from Pablo
Neira Ayuso.
8) Allow userspace driven TX load balancing functions in TEAM driver,
from Jiri Pirko.
9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with
embedded gotos.
10) TCP Small Queues, essentially minimize the amount of TCP data queued
up in the packet scheduler layer. Whereas the existing BQL (Byte
Queue Limits) limits the pkt_sched --> netdevice queuing levels,
this controls the TCP --> pkt_sched queueing levels.
From Eric Dumazet.
11) Reduce the number of get_page/put_page ops done on SKB fragments,
from Alexander Duyck.
12) Implement protection against blind resets in TCP (RFC 5961), from
Eric Dumazet.
13) Support the client side of TCP Fast Open, basically the ability to
send data in the SYN exchange, from Yuchung Cheng.
Basically, the sender queues up data with a sendmsg() call using
MSG_FASTOPEN, then they do the connect() which emits the queued up
fastopen data.
14) Avoid all the problems we get into in TCP when timers or PMTU events
hit a locked socket. The TCP Small Queues changes added a
tcp_release_cb() that allows us to queue work up to the
release_sock() caller, and that's what we use here too. From Eric
Dumazet.
15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits)
genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP
r8169: revert "add byte queue limit support".
ipv4: Change rt->rt_iif encoding.
net: Make skb->skb_iif always track skb->dev
ipv4: Prepare for change of rt->rt_iif encoding.
ipv4: Remove all RTCF_DIRECTSRC handliing.
ipv4: Really ignore ICMP address requests/replies.
decnet: Don't set RTCF_DIRECTSRC.
net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse.
ipv4: Remove redundant assignment
rds: set correct msg_namelen
openvswitch: potential NULL deref in sample()
tcp: dont drop MTU reduction indications
bnx2x: Add new 57840 device IDs
tcp: avoid oops in tcp_metrics and reset tcpm_stamp
niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value
niu: Fix to check for dma mapping errors.
net: Fix references to out-of-scope variables in put_cmsg_compat()
net: ethernet: davinci_emac: add pm_runtime support
net: ethernet: davinci_emac: Remove unnecessary #include
...
Diffstat (limited to 'net/ipv6/ip6_tunnel.c')
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 96 |
1 files changed, 58 insertions, 38 deletions
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c9015fad8d6..9a1d5fe6aef 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -40,6 +40,7 @@ #include <linux/rtnetlink.h> #include <linux/netfilter_ipv6.h> #include <linux/slab.h> +#include <linux/hash.h> #include <asm/uaccess.h> #include <linux/atomic.h> @@ -70,11 +71,15 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 -#define HASH_SIZE 32 +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) -#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ - (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ - (HASH_SIZE - 1)) +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + + return hash_32(hash, HASH_SIZE_SHIFT); +} static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); @@ -166,12 +171,11 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) static struct ip6_tnl * ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(local); + unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) @@ -205,7 +209,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote) ^ HASH(local); + h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } @@ -252,7 +256,7 @@ static void ip6_dev_free(struct net_device *dev) } /** - * ip6_tnl_create() - create a new tunnel + * ip6_tnl_create - create a new tunnel * @p: tunnel parameters * @pt: pointer to new tunnel * @@ -550,6 +554,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; + case NDISC_REDIRECT: + rel_type = ICMP_REDIRECT; + rel_code = ICMP_REDIR_HOST; default: return 0; } @@ -606,8 +613,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } + if (rel_type == ICMP_REDIRECT) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -684,24 +693,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } +static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct ip6_tnl_parm *p = &t->parms; + int ltype = ipv6_addr_type(laddr); + int rtype = ipv6_addr_type(raddr); + __u32 flags = 0; + + if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { + flags = IP6_TNL_F_CAP_PER_PACKET; + } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && + (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { + if (ltype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_XMIT; + if (rtype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_RCV; + } + return flags; +} + /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) +static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); - if (p->flags & IP6_TNL_F_CAP_RCV) { + if ((p->flags & IP6_TNL_F_CAP_RCV) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { struct net_device *ldev = NULL; if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if ((ipv6_addr_is_multicast(&p->laddr) || - likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && - likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) + if ((ipv6_addr_is_multicast(laddr) || + likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(!ipv6_chk_addr(net, raddr, NULL, 0))) ret = 1; - } return ret; } @@ -740,7 +775,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, goto discard; } - if (!ip6_tnl_rcv_ctl(t)) { + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { t->dev->stats.rx_dropped++; rcu_read_unlock(); goto discard; @@ -921,7 +956,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; @@ -1114,25 +1149,6 @@ tx_err: return NETDEV_TX_OK; } -static void ip6_tnl_set_cap(struct ip6_tnl *t) -{ - struct ip6_tnl_parm *p = &t->parms; - int ltype = ipv6_addr_type(&p->laddr); - int rtype = ipv6_addr_type(&p->raddr); - - p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); - - if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && - (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { - if (ltype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_XMIT; - if (rtype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_RCV; - } -} - static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; @@ -1153,7 +1169,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; - ip6_tnl_set_cap(t); + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; @@ -1438,6 +1455,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); + + ip6_tnl_link_config(t); + rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } |