From bf3d6a8f791b2a81279b9ce3201b4970f6fbe51a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 May 2013 23:48:15 +0000 Subject: iptunnel: specify protocol outside IP header Before this patch, ip_tunnel_xmit() was using the field protocol from the IP header passed into argument. There is no functional change, this patch prepares the support of IPv4 over IPv4 for module sit. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4b6f0b28f41..40b4dfce01f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -101,7 +101,7 @@ int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params); + const struct iphdr *tnl_params, const u8 protocol); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); -- cgit v1.2.3-70-g09d2 From 0e6fbc5b6c6218987c93b8c7ca60cf786062899d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:56 -0700 Subject: ip_tunnels: extend iptunnel_xmit() Refactor various ip tunnels xmit functions and extend iptunnel_xmit() so that there is more code sharing. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 32 +++++------------ include/net/ip_tunnels.h | 26 ++++++++------ net/ipv4/Makefile | 2 +- net/ipv4/ip_tunnel.c | 38 +++++--------------- net/ipv4/ip_tunnel_core.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/sit.c | 39 ++++++--------------- 6 files changed, 131 insertions(+), 94 deletions(-) create mode 100644 net/ipv4/ip_tunnel_core.c (limited to 'include/net/ip_tunnels.h') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f6dce13c8f8..284c6c00c35 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1021,7 +1021,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; - struct iphdr *iph; struct vxlanhdr *vxh; struct udphdr *uh; struct flowi4 fl4; @@ -1030,6 +1029,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, u32 vni; __be16 df = 0; __u8 tos, ttl; + int err; dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; vni = rdst->remote_vni; @@ -1097,13 +1097,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vxlan_encap_bypass(skb, vxlan, dst_vxlan); return NETDEV_TX_OK; } - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = htonl(vni << 8); @@ -1118,27 +1111,18 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, uh->len = htons(skb->len); uh->check = 0; - __skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = IPPROTO_UDP; - iph->tos = ip_tunnel_ecn_encap(tos, old_iph, skb); - iph->daddr = dst; - iph->saddr = fl4.saddr; - iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - tunnel_ip_select_ident(skb, old_iph, &rt->dst); - - nf_reset(skb); - vxlan_set_owner(dev, skb); if (handle_offloads(skb)) goto drop; - iptunnel_xmit(skb, dev); + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, dst, + IPPROTO_UDP, tos, ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + return NETDEV_TX_OK; drop: diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1be442f8940..b84f1ab09d7 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -155,23 +155,27 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, (skb_shinfo(skb)->gso_segs ?: 1) - 1); } -static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) +int iptunnel_xmit(struct net *net, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df); + +static inline void iptunnel_xmit_stats(int err, + struct net_device_stats *err_stats, + struct pcpu_tstats __percpu *stats) { - int err; - int pkt_len = skb->len - skb_transport_offset(skb); - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + if (err > 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(stats); - nf_reset(skb); - - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; + tstats->tx_bytes += err; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + } else if (err < 0) { + err_stats->tx_errors++; + err_stats->tx_aborted_errors++; } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; + err_stats->tx_dropped++; } } #endif /* __NET_IP_TUNNELS_H */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 7fcf8101d85..86ded0bac9c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o + inet_fragment.o ping.o ip_tunnel_core.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e189db409b0..a06a2ed4959 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -491,19 +491,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; - struct iphdr *iph; struct flowi4 fl4; u8 tos, ttl; __be16 df; struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int mtu; + int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -571,14 +569,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->stats.tx_carrier_errors++; goto tx_error; } - tdev = rt->dst.dev; - - if (tdev == dev) { + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } - df = tnl_params->frag_off; if (df) @@ -596,6 +591,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (!skb_is_gso(skb) && (inner_iph->frag_off&htons(IP_DF)) && mtu < ntohs(inner_iph->tot_len)) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); goto tx_error; @@ -646,8 +642,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) - + rt->dst.header_len; + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { @@ -657,27 +653,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* Push down and install the IP header. */ - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - - iph = ip_hdr(skb); - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + err = iptunnel_xmit(dev_net(dev), rt, skb, + fl4.saddr, fl4.daddr, protocol, + ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = protocol; - iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - tunnel_ip_select_ident(skb, inner_iph, &rt->dst); - - iptunnel_xmit(skb, dev); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c new file mode 100644 index 00000000000..927687e83f1 --- /dev/null +++ b/net/ipv4/ip_tunnel_core.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int iptunnel_xmit(struct net *net, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df) +{ + int pkt_len = skb->len; + struct iphdr *iph; + int err; + + nf_reset(skb); + secpath_reset(skb); + skb->rxhash = 0; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + __skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = dst; + iph->saddr = src; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, + (const struct iphdr *)skb_inner_network_header(skb), + &rt->dst); + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + return pkt_len; +} +EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6b9c1f128ea..76bb8de435b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -723,13 +723,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; const struct in6_addr *addr6; int addr_type; + u8 ttl; + int err; if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -872,34 +873,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb = new_skb; iph6 = ipv6_hdr(skb); } - - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPV6; - iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = iph6->hop_limit; - - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); - iptunnel_xmit(skb, dev); + ttl = tiph->ttl; + if (ttl == 0) + ttl = iph6->hop_limit; + tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); + + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error_icmp: -- cgit v1.2.3-70-g09d2 From 3d7b46cd20e300bd6989fb1f43d46f1b9645816e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:02 -0700 Subject: ip_tunnel: push generic protocol handling to ip_tunnel module. Process skb tunnel header before sending packet to protocol handler. this allows code sharing between gre and ovs gre modules. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + net/ipv4/gre.c | 3 ++- net/ipv4/ip_tunnel.c | 30 ++++++------------------------ net/ipv4/ip_tunnel_core.c | 34 ++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 6 +++++- net/ipv6/sit.c | 7 ++++++- 6 files changed, 54 insertions(+), 27 deletions(-) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index b84f1ab09d7..32e130b560f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -155,6 +155,7 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, (skb_shinfo(skb)->gso_segs ?: 1) - 1); } +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); int iptunnel_xmit(struct net *net, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 5ecc9c49b4d..ba4803e609b 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -201,7 +201,8 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return 0; + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); } static int gre_cisco_rcv(struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a06a2ed4959..bd227e5ea9d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -408,13 +408,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); int err; - secpath_reset(skb); - - skb->protocol = tpi->proto; - - skb->mac_header = skb->network_header; - __pskb_pull(skb, tunnel->hlen); - skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ @@ -442,23 +435,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - skb->pkt_type = PACKET_HOST; - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -477,6 +453,12 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->dev->type == ARPHRD_ETHER) { + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } gro_cells_receive(&tunnel->gro_cells, skb); return 0; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 927687e83f1..7167b08977d 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,3 +86,37 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, return pkt_len; } EXPORT_SYMBOL_GPL(iptunnel_xmit); + +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) +{ + if (unlikely(!pskb_may_pull(skb, hdr_len))) + return -ENOMEM; + + skb_pull_rcsum(skb, hdr_len); + + if (inner_proto == htons(ETH_P_TEB)) { + struct ethhdr *eh = (struct ethhdr *)skb->data; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + return -ENOMEM; + + if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + skb->protocol = eh->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + + } else { + skb->protocol = inner_proto; + } + + nf_reset(skb); + secpath_reset(skb); + if (!skb->l4_rxhash) + skb->rxhash = 0; + skb_dst_drop(skb); + skb->vlan_tci = 0; + skb_set_queue_mapping(skb, 0); + skb->pkt_type = PACKET_HOST; + return 0; +} +EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9df7ecd393f..e6905fbda2a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 76bb8de435b..6cee844678e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -640,9 +640,14 @@ static const struct tnl_ptk_info tpi = { static int ipip_rcv(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; struct ip_tunnel *tunnel; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { -- cgit v1.2.3-70-g09d2 From 9a628224a61bbcd2b50b3ec96e661fbbb49b619a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:07 -0700 Subject: ip_tunnel: Add dont fragment flag. This flag will be used by ovs tunneling. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 32e130b560f..10bbb4273f7 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -73,6 +73,7 @@ struct ip_tunnel { #define TUNNEL_REC __cpu_to_be16(0x20) #define TUNNEL_VERSION __cpu_to_be16(0x40) #define TUNNEL_NO_KEY __cpu_to_be16(0x80) +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) struct tnl_ptk_info { __be16 flags; -- cgit v1.2.3-70-g09d2 From 5243b6ac9ed1310f2329b3d0a830a55589e518ea Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 21 Jun 2013 16:17:11 -0700 Subject: ip_tunnel: Protect tunnel functions with CONFIG_INET guard. Tunnel constants can be used in generic code but in these cases the inline functions in ip_tunnels.h cause compilation problems if CONFIG_INET is not set. CC: Pravin Shelar Reported-by: Randy Dunlap Signed-off-by: Jesse Gross Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 10bbb4273f7..b0d982471a5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -93,6 +93,8 @@ struct ip_tunnel_net { struct net_device *fb_tunnel_dev; }; +#ifdef CONFIG_INET + int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); @@ -180,4 +182,7 @@ static inline void iptunnel_xmit_stats(int err, err_stats->tx_dropped++; } } + +#endif /* CONFIG_INET */ + #endif /* __NET_IP_TUNNELS_H */ -- cgit v1.2.3-70-g09d2 From 5e6700b3bf98fe98d630bf9c939ad4c85ce95592 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 16:11:28 +0200 Subject: sit: add support of x-netns This patch allows to switch the netns when packet is encapsulated or decapsulated. In other word, the encapsulated packet is received in a netns, where the lookup is done to find the tunnel. Once the tunnel is found, the packet is decapsulated and injecting into the corresponding interface which stands to another netns. When one of the two netns is removed, the tunnel is destroyed. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + net/ipv4/ip_tunnel.c | 10 +++++++++- net/ipv6/sit.c | 42 ++++++++++++++++++++++++++++++++---------- 3 files changed, 42 insertions(+), 11 deletions(-) (limited to 'include/net/ip_tunnels.h') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index b0d982471a5..781b3cf86a2 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -42,6 +42,7 @@ struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; struct net_device *dev; + struct net *net; /* netns for packet i/o */ int err_count; /* Number of arrived ICMP errors */ unsigned long err_time; /* Time when the last ICMP error diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3b00d81c8f1..394cebc96d2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -304,6 +304,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, tunnel = netdev_priv(dev); tunnel->parms = *parms; + tunnel->net = net; err = register_netdevice(dev); if (err) @@ -453,6 +454,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); + if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -541,7 +545,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - rt = ip_route_output_tunnel(dev_net(dev), &fl4, + rt = ip_route_output_tunnel(tunnel->net, &fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, @@ -602,6 +606,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } #endif + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { @@ -888,6 +895,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], if (ip_tunnel_find(itn, p, dev->type)) return -EEXIST; + nt->net = net; nt->parms = *p; err = register_netdevice(dev); if (err) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f639866b3dc..97a0bfe2c29 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) static void ipip6_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { - ipip6_tunnel_unlink(sitn, netdev_priv(dev)); - ipip6_tunnel_del_prl(netdev_priv(dev), NULL); + ipip6_tunnel_unlink(sitn, tunnel); + ipip6_tunnel_del_prl(tunnel, NULL); } dev_put(dev); } @@ -621,6 +621,8 @@ static int ipip6_rcv(struct sk_buff *skb) tstats->rx_packets++; tstats->rx_bytes += skb->len; + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); netif_rx(skb); return 0; @@ -803,7 +805,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + rt = ip_route_output_ports(tunnel->net, &fl4, NULL, dst, tiph->saddr, 0, 0, IPPROTO_IPV6, RT_TOS(tos), @@ -858,6 +860,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tunnel->err_count = 0; } + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + /* * Okay, now see if we can stuff it in the buffer as-is. */ @@ -944,7 +949,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, + NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, @@ -959,7 +965,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -972,7 +978,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); @@ -1248,7 +1254,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; } @@ -1257,6 +1262,7 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1277,6 +1283,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; @@ -1564,8 +1571,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { + struct net *net = dev_net(sitn->fb_tunnel_dev); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &sit_link_ops) + unregister_netdevice_queue(dev, head); + for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1573,7 +1586,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (dev_net(t->dev) != net) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1598,6 +1616,10 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); if (err) -- cgit v1.2.3-70-g09d2