diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 84 |
1 files changed, 57 insertions, 27 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index af372d0957f..b6f45c5d84d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -109,11 +109,11 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; -/* 1024 is probably a high enough limit: modern hypervisors seem to support on - * the order of 100-200 CPUs so this leaves us some breathing space if we want - * to match a queue per guest CPU. - */ -#define MAX_TAP_QUEUES 1024 +/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for + * the netdevice to be fit in one page. So we can make sure the success of + * memory allocation. TODO: increase the limit. */ +#define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES +#define MAX_TAP_FLOWS 4096 #define TUN_FLOW_EXPIRE (3 * HZ) @@ -185,6 +185,8 @@ struct tun_struct { unsigned long ageing_time; unsigned int numdisabled; struct list_head disabled; + void *security; + u32 flow_count; }; static inline u32 tun_hashfn(u32 rxhash) @@ -218,6 +220,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); + ++tun->flow_count; } return e; } @@ -228,6 +231,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) e->rxhash, e->queue_index); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); + --tun->flow_count; } static void tun_flow_flush(struct tun_struct *tun) @@ -294,11 +298,12 @@ static void tun_flow_cleanup(unsigned long data) } static void tun_flow_update(struct tun_struct *tun, u32 rxhash, - u16 queue_index) + struct tun_file *tfile) { struct hlist_head *head; struct tun_flow_entry *e; unsigned long delay = tun->ageing_time; + u16 queue_index = tfile->queue_index; if (!rxhash) return; @@ -307,7 +312,9 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, rcu_read_lock(); - if (tun->numqueues == 1) + /* We may get a very small possibility of OOO during switching, not + * worth to optimize.*/ + if (tun->numqueues == 1 || tfile->detached) goto unlock; e = tun_flow_find(head, rxhash); @@ -317,7 +324,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, e->updated = jiffies; } else { spin_lock_bh(&tun->lock); - if (!tun_flow_find(head, rxhash)) + if (!tun_flow_find(head, rxhash) && + tun->flow_count < MAX_TAP_FLOWS) tun_flow_create(tun, head, rxhash, queue_index); if (!timer_pending(&tun->flow_gc_timer)) @@ -406,21 +414,21 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); - if (tun) { + if (tun && !tfile->detached) { u16 index = tfile->queue_index; BUG_ON(index >= tun->numqueues); dev = tun->dev; rcu_assign_pointer(tun->tfiles[index], tun->tfiles[tun->numqueues - 1]); - rcu_assign_pointer(tfile->tun, NULL); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; --tun->numqueues; - if (clean) + if (clean) { + rcu_assign_pointer(tfile->tun, NULL); sock_put(&tfile->sk); - else + } else tun_disable_queue(tun, tfile); synchronize_net(); @@ -434,10 +442,13 @@ static void __tun_detach(struct tun_file *tfile, bool clean) } if (clean) { - if (tun && tun->numqueues == 0 && tun->numdisabled == 0 && - !(tun->flags & TUN_PERSIST)) - if (tun->dev->reg_state == NETREG_REGISTERED) + if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { + netif_carrier_off(tun->dev); + + if (!(tun->flags & TUN_PERSIST) && + tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); + } BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags)); @@ -465,6 +476,10 @@ static void tun_detach_all(struct net_device *dev) rcu_assign_pointer(tfile->tun, NULL); --tun->numqueues; } + list_for_each_entry(tfile, &tun->disabled, next) { + wake_up_all(&tfile->wq.wait); + rcu_assign_pointer(tfile->tun, NULL); + } BUG_ON(tun->numqueues != 0); synchronize_net(); @@ -490,8 +505,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file) struct tun_file *tfile = file->private_data; int err; + err = security_tun_dev_attach(tfile->socket.sk, tun->security); + if (err < 0) + goto out; + err = -EINVAL; - if (rtnl_dereference(tfile->tun)) + if (rtnl_dereference(tfile->tun) && !tfile->detached) goto out; err = -EBUSY; @@ -1181,6 +1200,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) { skb_shinfo(skb)->destructor_arg = msg_control; skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } skb_reset_network_header(skb); @@ -1190,7 +1210,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, tun->dev->stats.rx_packets++; tun->dev->stats.rx_bytes += len; - tun_flow_update(tun, rxhash, tfile->queue_index); + tun_flow_update(tun, rxhash, tfile); return total_len; } @@ -1373,6 +1393,7 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); free_netdev(dev); } @@ -1562,7 +1583,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tun_not_capable(tun)) return -EPERM; - err = security_tun_dev_attach(tfile->socket.sk); + err = security_tun_dev_open(tun->security); if (err < 0) return err; @@ -1577,6 +1598,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else { char *name; unsigned long flags = 0; + int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? + MAX_TAP_QUEUES : 1; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1600,8 +1623,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) name = ifr->ifr_name; dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, - tun_setup, - MAX_TAP_QUEUES, MAX_TAP_QUEUES); + tun_setup, queues, queues); + if (!dev) return -ENOMEM; @@ -1619,7 +1642,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) spin_lock_init(&tun->lock); - security_tun_dev_post_create(&tfile->sk); + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) + goto err_free_dev; tun_net_init(dev); @@ -1644,10 +1669,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) device_create_file(&tun->dev->dev, &dev_attr_owner) || device_create_file(&tun->dev->dev, &dev_attr_group)) pr_err("Failed to create tun sysfs files\n"); - - netif_carrier_on(tun->dev); } + netif_carrier_on(tun->dev); + tun_debug(KERN_INFO, tun, "tun_set_iff\n"); if (ifr->ifr_flags & IFF_NO_PI) @@ -1789,19 +1814,24 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { tun = tfile->detached; - if (!tun) + if (!tun) { ret = -EINVAL; - else - ret = tun_attach(tun, file); + goto unlock; + } + ret = security_tun_dev_attach_queue(tun->security); + if (ret < 0) + goto unlock; + ret = tun_attach(tun, file); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); - if (!tun || !(tun->flags & TUN_TAP_MQ)) + if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached) ret = -EINVAL; else __tun_detach(tfile, false); } else ret = -EINVAL; +unlock: rtnl_unlock(); return ret; } |