sit: add support of x-netns

This patch allows to switch the netns when packet is encapsulated or decapsulated. In other word, the encapsulated packet is received in a netns, where the lookup is done to find the tunnel. Once the tunnel is found, the packet is decapsulated and injecting into the corresponding interface which stands to another netns. When one of the two netns is removed, the tunnel is destroyed. Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Nicolas Dichtel <nicolas.dichtel@6wind.com> 2013-06-26 16:11:28 +0200
committer: David S. Miller <davem@davemloft.net> 2013-06-27 22:30:47 -0700
commit: 5e6700b3bf98fe98d630bf9c939ad4c85ce95592 (patch)
tree: ce408f081a9dacceb782e138ca0cbec379f4e8a4
parent: 621e84d6f373dcb273ebfd772638b8e7dc3c2c48 (diff)
3 files changed, 42 insertions, 11 deletions
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index b0d982471a5..781b3cf86a2 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -42,6 +42,7 @@ struct ip_tunnel {
 	struct ip_tunnel __rcu	*next;
 	struct hlist_node hash_node;
 	struct net_device	*dev;
+	struct net		*net;	/* netns for packet i/o */
 
 	int		err_count;	/* Number of arrived ICMP errors */
 	unsigned long	err_time;	/* Time when the last ICMP error
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 3b00d81c8f1..394cebc96d2 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -304,6 +304,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
 
 	tunnel = netdev_priv(dev);
 	tunnel->parms = *parms;
+	tunnel->net = net;
 
 	err = register_netdevice(dev);
 	if (err)
@@ -453,6 +454,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 	tstats->rx_bytes += skb->len;
 	u64_stats_update_end(&tstats->syncp);
 
+	if (tunnel->net != dev_net(tunnel->dev))
+		skb_scrub_packet(skb);
+
 	if (tunnel->dev->type == ARPHRD_ETHER) {
 		skb->protocol = eth_type_trans(skb, tunnel->dev);
 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -541,7 +545,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 	}
 
-	rt = ip_route_output_tunnel(dev_net(dev), &fl4,
+	rt = ip_route_output_tunnel(tunnel->net, &fl4,
 				    protocol,
 				    dst, tnl_params->saddr,
 				    tunnel->parms.o_key,
@@ -602,6 +606,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	}
 #endif
 
+	if (tunnel->net != dev_net(dev))
+		skb_scrub_packet(skb);
+
 	if (tunnel->err_count > 0) {
 		if (time_before(jiffies,
 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
@@ -888,6 +895,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 	if (ip_tunnel_find(itn, p, dev->type))
 		return -EEXIST;
 
+	nt->net = net;
 	nt->parms = *p;
 	err = register_netdevice(dev);
 	if (err)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index f639866b3dc..97a0bfe2c29 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
 
 static void ipip6_tunnel_uninit(struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
-	struct sit_net *sitn = net_generic(net, sit_net_id);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
 
 	if (dev == sitn->fb_tunnel_dev) {
 		RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
 	} else {
-		ipip6_tunnel_unlink(sitn, netdev_priv(dev));
-		ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
+		ipip6_tunnel_unlink(sitn, tunnel);
+		ipip6_tunnel_del_prl(tunnel, NULL);
 	}
 	dev_put(dev);
 }
@@ -621,6 +621,8 @@ static int ipip6_rcv(struct sk_buff *skb)
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 
+		if (tunnel->net != dev_net(tunnel->dev))
+			skb_scrub_packet(skb);
 		netif_rx(skb);
 
 		return 0;
@@ -803,7 +805,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			goto tx_error;
 	}
 
-	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+	rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
 				   dst, tiph->saddr,
 				   0, 0,
 				   IPPROTO_IPV6, RT_TOS(tos),
@@ -858,6 +860,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			tunnel->err_count = 0;
 	}
 
+	if (tunnel->net != dev_net(dev))
+		skb_scrub_packet(skb);
+
 	/*
 	 * Okay, now see if we can stuff it in the buffer as-is.
 	 */
@@ -944,7 +949,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 	iph = &tunnel->parms.iph;
 
 	if (iph->daddr) {
-		struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+		struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
+							  NULL,
 							  iph->daddr, iph->saddr,
 							  0, 0,
 							  IPPROTO_IPV6,
@@ -959,7 +965,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 	}
 
 	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 
 	if (tdev) {
 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -972,7 +978,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 
 static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
 {
-	struct net *net = dev_net(t->dev);
+	struct net *net = t->net;
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
 	ipip6_tunnel_unlink(sitn, t);
@@ -1248,7 +1254,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
-	dev->features		|= NETIF_F_NETNS_LOCAL;
 	dev->features		|= NETIF_F_LLTX;
 }
 
@@ -1257,6 +1262,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 
 	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
 
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -1277,6 +1283,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
 	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
 	strcpy(tunnel->parms.name, dev->name);
 
 	iph->version		= 4;
@@ -1564,8 +1571,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
 
 static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
 {
+	struct net *net = dev_net(sitn->fb_tunnel_dev);
+	struct net_device *dev, *aux;
 	int prio;
 
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &sit_link_ops)
+			unregister_netdevice_queue(dev, head);
+
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
@@ -1573,7 +1586,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
 
 			t = rtnl_dereference(sitn->tunnels[prio][h]);
 			while (t != NULL) {
-				unregister_netdevice_queue(t->dev, head);
+				/* If dev is in the same netns, it has already
+				 * been added to the list by the previous loop.
+				 */
+				if (dev_net(t->dev) != net)
+					unregister_netdevice_queue(t->dev,
+								   head);
 				t = rtnl_dereference(t->next);
 			}
 		}
@@ -1598,6 +1616,10 @@ static int __net_init sit_init_net(struct net *net)
 		goto err_alloc_dev;
 	}
 	dev_net_set(sitn->fb_tunnel_dev, net);
+	/* FB netdevice is special: we have one, and only one per netns.
+	 * Allowing to move it to another netns is clearly unsafe.
+	 */
+	sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 
 	err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
 	if (err)
author	Nicolas Dichtel <nicolas.dichtel@6wind.com>	2013-06-26 16:11:28 +0200
committer	David S. Miller <davem@davemloft.net>	2013-06-27 22:30:47 -0700
commit	5e6700b3bf98fe98d630bf9c939ad4c85ce95592 (patch)
tree	ce408f081a9dacceb782e138ca0cbec379f4e8a4
parent	621e84d6f373dcb273ebfd772638b8e7dc3c2c48 (diff)