summaryrefslogtreecommitdiffstats
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c187
1 files changed, 109 insertions, 78 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4dbb2ed85b9..1610d51dbb5 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -127,6 +127,7 @@ struct vxlan_dev {
struct list_head next; /* vxlan's per namespace list */
struct vxlan_sock *vn_sock; /* listening socket */
struct net_device *dev;
+ struct net *net; /* netns for packet i/o */
struct vxlan_rdst default_dst; /* default destination */
union vxlan_addr saddr; /* source address */
__be16 dst_port;
@@ -134,7 +135,7 @@ struct vxlan_dev {
__u16 port_max;
__u8 tos; /* TOS override */
__u8 ttl;
- u32 flags; /* VXLAN_F_* below */
+ u32 flags; /* VXLAN_F_* in vxlan.h */
struct work_struct sock_work;
struct work_struct igmp_join;
@@ -149,13 +150,6 @@ struct vxlan_dev {
struct hlist_head fdb_head[FDB_HASH_SIZE];
};
-#define VXLAN_F_LEARN 0x01
-#define VXLAN_F_PROXY 0x02
-#define VXLAN_F_RSC 0x04
-#define VXLAN_F_L2MISS 0x08
-#define VXLAN_F_L3MISS 0x10
-#define VXLAN_F_IPV6 0x20 /* internal flag */
-
/* salt for hash table */
static u32 vxlan_salt __read_mostly;
static struct workqueue_struct *vxlan_wq;
@@ -571,6 +565,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
goto out;
}
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+ skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
off_eth = skb_gro_offset(skb);
hlen = off_eth + sizeof(*eh);
@@ -605,6 +600,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
}
skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
+ skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
pp = ptype->callbacks.gro_receive(head, skb);
out_unlock:
@@ -1203,6 +1199,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
remote_ip = &vxlan->default_dst.remote_ip;
skb_reset_mac_header(skb);
+ skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
skb->protocol = eth_type_trans(skb, vxlan->dev);
/* Ignore packet loops (and multicast echo) */
@@ -1599,18 +1596,11 @@ __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(vxlan_src_port);
-static int handle_offloads(struct sk_buff *skb)
+static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
+ bool udp_csum)
{
- if (skb_is_gso(skb)) {
- int err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- return err;
-
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- } else if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_NONE;
-
- return 0;
+ int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ return iptunnel_handle_offloads(skb, udp_csum, type);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -1618,7 +1608,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr, __u8 prio, __u8 ttl,
- __be16 src_port, __be16 dst_port, __be32 vni)
+ __be16 src_port, __be16 dst_port, __be32 vni,
+ bool xnet)
{
struct ipv6hdr *ip6h;
struct vxlanhdr *vxh;
@@ -1626,12 +1617,11 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
int min_headroom;
int err;
- if (!skb->encapsulation) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+ if (IS_ERR(skb))
+ return -EINVAL;
- skb_scrub_packet(skb, false);
+ skb_scrub_packet(skb, xnet);
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
+ VXLAN_HLEN + sizeof(struct ipv6hdr)
@@ -1663,27 +1653,14 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
uh->source = src_port;
uh->len = htons(skb->len);
- uh->check = 0;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
skb_dst_set(skb, dst);
- if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
- __wsum csum = skb_checksum(skb, 0, skb->len, 0);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
- IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_ipv6_magic(saddr, daddr,
- skb->len, IPPROTO_UDP, 0);
- }
+ udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
+ saddr, daddr, skb->len);
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
@@ -1699,10 +1676,6 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
- err = handle_offloads(skb);
- if (err)
- return err;
-
ip6tunnel_xmit(skb, dev);
return 0;
}
@@ -1711,17 +1684,16 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni)
+ __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
{
struct vxlanhdr *vxh;
struct udphdr *uh;
int min_headroom;
int err;
- if (!skb->encapsulation) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+ if (IS_ERR(skb))
+ return -EINVAL;
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ VXLAN_HLEN + sizeof(struct iphdr)
@@ -1753,14 +1725,12 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
uh->source = src_port;
uh->len = htons(skb->len);
- uh->check = 0;
- err = handle_offloads(skb);
- if (err)
- return err;
+ udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
+ src, dst, skb->len);
return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
- tos, ttl, df, false);
+ tos, ttl, df, xnet);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
@@ -1853,7 +1823,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
fl4.daddr = dst->sin.sin_addr.s_addr;
fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr;
- rt = ip_route_output_key(dev_net(dev), &fl4);
+ rt = ip_route_output_key(vxlan->net, &fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr);
@@ -1874,7 +1844,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan;
ip_rt_put(rt);
- dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1887,7 +1857,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
fl4.saddr, dst->sin.sin_addr.s_addr,
tos, ttl, df, src_port, dst_port,
- htonl(vni << 8));
+ htonl(vni << 8),
+ !net_eq(vxlan->net, dev_net(vxlan->dev)));
if (err < 0)
goto rt_tx_error;
@@ -1927,7 +1898,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan;
dst_release(ndst);
- dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1938,7 +1909,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
dev, &fl6.saddr, &fl6.daddr, 0, ttl,
- src_port, dst_port, htonl(vni << 8));
+ src_port, dst_port, htonl(vni << 8),
+ !net_eq(vxlan->net, dev_net(vxlan->dev)));
#endif
}
@@ -2082,7 +2054,7 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
static int vxlan_init(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
@@ -2090,7 +2062,7 @@ static int vxlan_init(struct net_device *dev)
return -ENOMEM;
spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
+ vs = vxlan_find_sock(vxlan->net, vxlan->dst_port);
if (vs) {
/* If we have a socket with same port already, reuse it */
atomic_inc(&vs->refcnt);
@@ -2172,8 +2144,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
/* Cleanup timer and forwarding table on shutdown */
static int vxlan_stop(struct net_device *dev)
{
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
@@ -2202,7 +2174,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
struct net_device *lowerdev;
int max_mtu;
- lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex);
+ lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
if (lowerdev == NULL)
return eth_change_mtu(dev, new_mtu);
@@ -2285,7 +2257,6 @@ static void vxlan_setup(struct net_device *dev)
dev->tx_queue_len = 0;
dev->features |= NETIF_F_LLTX;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
dev->features |= NETIF_F_RXCSUM;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -2401,7 +2372,7 @@ static void vxlan_del_work(struct work_struct *work)
* could be used for both IPv4 and IPv6 communications, but
* users may set bindv6only=1.
*/
-static struct socket *create_v6_sock(struct net *net, __be16 port)
+static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags)
{
struct sock *sk;
struct socket *sock;
@@ -2438,18 +2409,25 @@ static struct socket *create_v6_sock(struct net *net, __be16 port)
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
+
+ if (flags & VXLAN_F_UDP_ZERO_CSUM6_TX)
+ udp_set_no_check6_tx(sk, true);
+
+ if (flags & VXLAN_F_UDP_ZERO_CSUM6_RX)
+ udp_set_no_check6_rx(sk, true);
+
return sock;
}
#else
-static struct socket *create_v6_sock(struct net *net, __be16 port)
+static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags)
{
return ERR_PTR(-EPFNOSUPPORT);
}
#endif
-static struct socket *create_v4_sock(struct net *net, __be16 port)
+static struct socket *create_v4_sock(struct net *net, __be16 port, u32 flags)
{
struct sock *sk;
struct socket *sock;
@@ -2482,18 +2460,24 @@ static struct socket *create_v4_sock(struct net *net, __be16 port)
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
+
+ if (!(flags & VXLAN_F_UDP_CSUM))
+ sock->sk->sk_no_check_tx = 1;
+
return sock;
}
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data, bool ipv6)
+ vxlan_rcv_t *rcv, void *data,
+ u32 flags)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
struct sock *sk;
unsigned int h;
+ bool ipv6 = !!(flags & VXLAN_F_IPV6);
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
@@ -2505,9 +2489,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
INIT_WORK(&vs->del_work, vxlan_del_work);
if (ipv6)
- sock = create_v6_sock(net, port);
+ sock = create_v6_sock(net, port, flags);
else
- sock = create_v4_sock(net, port);
+ sock = create_v4_sock(net, port, flags);
if (IS_ERR(sock)) {
kfree(vs);
return ERR_CAST(sock);
@@ -2545,12 +2529,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
- bool no_share, bool ipv6)
+ bool no_share, u32 flags)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
- vs = vxlan_socket_create(net, port, rcv, data, ipv6);
+ vs = vxlan_socket_create(net, port, rcv, data, flags);
if (!IS_ERR(vs))
return vs;
@@ -2578,12 +2562,12 @@ EXPORT_SYMBOL_GPL(vxlan_sock_add);
static void vxlan_sock_work(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
- struct net *net = dev_net(vxlan->dev);
+ struct net *net = vxlan->net;
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
__be16 port = vxlan->dst_port;
struct vxlan_sock *nvs;
- nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags & VXLAN_F_IPV6);
+ nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags);
spin_lock(&vn->sock_lock);
if (!IS_ERR(nvs))
vxlan_vs_add_dev(nvs, vxlan);
@@ -2605,6 +2589,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
if (!data[IFLA_VXLAN_ID])
return -EINVAL;
+ vxlan->net = dev_net(dev);
+
vni = nla_get_u32(data[IFLA_VXLAN_ID]);
dst->remote_vni = vni;
@@ -2705,12 +2691,23 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
if (data[IFLA_VXLAN_PORT])
vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+ if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+ vxlan->flags |= VXLAN_F_UDP_CSUM;
+
+ if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
+ nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
+ vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+
+ if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
+ nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
+ vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+
if (vxlan_find_vni(net, vni, vxlan->dst_port)) {
pr_info("duplicate VNI %u\n", vni);
return -EEXIST;
}
- SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
+ dev->ethtool_ops = &vxlan_ethtool_ops;
/* create an fdb entry for a valid default destination */
if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
@@ -2739,8 +2736,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
{
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
spin_lock(&vn->sock_lock);
if (!hlist_unhashed(&vxlan->hlist))
@@ -2768,7 +2765,10 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
- nla_total_size(sizeof(__be16))+ /* IFLA_VXLAN_PORT */
+ nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
+ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
+ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
+ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
0;
}
@@ -2828,7 +2828,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
!!(vxlan->flags & VXLAN_F_L3MISS)) ||
nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) ||
- nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port))
+ nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) ||
+ nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
+ !!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+ nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+ nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)))
goto nla_put_failure;
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
@@ -2905,8 +2911,33 @@ static __net_init int vxlan_init_net(struct net *net)
return 0;
}
+static void __net_exit vxlan_exit_net(struct net *net)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_dev *vxlan, *next;
+ struct net_device *dev, *aux;
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &vxlan_link_ops)
+ unregister_netdevice_queue(dev, &list);
+
+ list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+ /* If vxlan->dev is in the same netns, it has already been added
+ * to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(vxlan->dev), net))
+ unregister_netdevice_queue(dev, &list);
+ }
+
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net,
+ .exit = vxlan_exit_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
};