diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 125 |
1 files changed, 77 insertions, 48 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fbd106edbe5..2917a86f4c4 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -109,11 +109,11 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; -/* 1024 is probably a high enough limit: modern hypervisors seem to support on - * the order of 100-200 CPUs so this leaves us some breathing space if we want - * to match a queue per guest CPU. - */ -#define MAX_TAP_QUEUES 1024 +/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for + * the netdevice to be fit in one page. So we can make sure the success of + * memory allocation. TODO: increase the limit. */ +#define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES +#define MAX_TAP_FLOWS 4096 #define TUN_FLOW_EXPIRE (3 * HZ) @@ -185,6 +185,8 @@ struct tun_struct { unsigned long ageing_time; unsigned int numdisabled; struct list_head disabled; + void *security; + u32 flow_count; }; static inline u32 tun_hashfn(u32 rxhash) @@ -218,6 +220,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); + ++tun->flow_count; } return e; } @@ -228,6 +231,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) e->rxhash, e->queue_index); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); + --tun->flow_count; } static void tun_flow_flush(struct tun_struct *tun) @@ -294,11 +298,12 @@ static void tun_flow_cleanup(unsigned long data) } static void tun_flow_update(struct tun_struct *tun, u32 rxhash, - u16 queue_index) + struct tun_file *tfile) { struct hlist_head *head; struct tun_flow_entry *e; unsigned long delay = tun->ageing_time; + u16 queue_index = tfile->queue_index; if (!rxhash) return; @@ -307,7 +312,9 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, rcu_read_lock(); - if (tun->numqueues == 1) + /* We may get a very small possibility of OOO during switching, not + * worth to optimize.*/ + if (tun->numqueues == 1 || tfile->detached) goto unlock; e = tun_flow_find(head, rxhash); @@ -317,7 +324,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, e->updated = jiffies; } else { spin_lock_bh(&tun->lock); - if (!tun_flow_find(head, rxhash)) + if (!tun_flow_find(head, rxhash) && + tun->flow_count < MAX_TAP_FLOWS) tun_flow_create(tun, head, rxhash, queue_index); if (!timer_pending(&tun->flow_gc_timer)) @@ -404,24 +412,23 @@ static void __tun_detach(struct tun_file *tfile, bool clean) struct tun_struct *tun; struct net_device *dev; - tun = rcu_dereference_protected(tfile->tun, - lockdep_rtnl_is_held()); - if (tun) { + tun = rtnl_dereference(tfile->tun); + + if (tun && !tfile->detached) { u16 index = tfile->queue_index; BUG_ON(index >= tun->numqueues); dev = tun->dev; rcu_assign_pointer(tun->tfiles[index], tun->tfiles[tun->numqueues - 1]); - rcu_assign_pointer(tfile->tun, NULL); - ntfile = rcu_dereference_protected(tun->tfiles[index], - lockdep_rtnl_is_held()); + ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; --tun->numqueues; - if (clean) + if (clean) { + rcu_assign_pointer(tfile->tun, NULL); sock_put(&tfile->sk); - else + } else tun_disable_queue(tun, tfile); synchronize_net(); @@ -429,14 +436,19 @@ static void __tun_detach(struct tun_file *tfile, bool clean) /* Drop read queue */ skb_queue_purge(&tfile->sk.sk_receive_queue); tun_set_real_num_queues(tun); - } else if (tfile->detached && clean) + } else if (tfile->detached && clean) { tun = tun_enable_queue(tfile); + sock_put(&tfile->sk); + } if (clean) { - if (tun && tun->numqueues == 0 && tun->numdisabled == 0 && - !(tun->flags & TUN_PERSIST)) - if (tun->dev->reg_state == NETREG_REGISTERED) + if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { + netif_carrier_off(tun->dev); + + if (!(tun->flags & TUN_PERSIST) && + tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); + } BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags)); @@ -458,19 +470,21 @@ static void tun_detach_all(struct net_device *dev) int i, n = tun->numqueues; for (i = 0; i < n; i++) { - tfile = rcu_dereference_protected(tun->tfiles[i], - lockdep_rtnl_is_held()); + tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); wake_up_all(&tfile->wq.wait); rcu_assign_pointer(tfile->tun, NULL); --tun->numqueues; } + list_for_each_entry(tfile, &tun->disabled, next) { + wake_up_all(&tfile->wq.wait); + rcu_assign_pointer(tfile->tun, NULL); + } BUG_ON(tun->numqueues != 0); synchronize_net(); for (i = 0; i < n; i++) { - tfile = rcu_dereference_protected(tun->tfiles[i], - lockdep_rtnl_is_held()); + tfile = rtnl_dereference(tun->tfiles[i]); /* Drop read queue */ skb_queue_purge(&tfile->sk.sk_receive_queue); sock_put(&tfile->sk); @@ -481,6 +495,9 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } BUG_ON(tun->numdisabled != 0); + + if (tun->flags & TUN_PERSIST) + module_put(THIS_MODULE); } static int tun_attach(struct tun_struct *tun, struct file *file) @@ -488,8 +505,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file) struct tun_file *tfile = file->private_data; int err; + err = security_tun_dev_attach(tfile->socket.sk, tun->security); + if (err < 0) + goto out; + err = -EINVAL; - if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held())) + if (rtnl_dereference(tfile->tun) && !tfile->detached) goto out; err = -EBUSY; @@ -1188,7 +1209,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, tun->dev->stats.rx_packets++; tun->dev->stats.rx_bytes += len; - tun_flow_update(tun, rxhash, tfile->queue_index); + tun_flow_update(tun, rxhash, tfile); return total_len; } @@ -1371,6 +1392,7 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); free_netdev(dev); } @@ -1544,6 +1566,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) struct net_device *dev; int err; + if (tfile->detached) + return -EINVAL; + dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { if (ifr->ifr_flags & IFF_TUN_EXCL) @@ -1557,7 +1582,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tun_not_capable(tun)) return -EPERM; - err = security_tun_dev_attach(tfile->socket.sk); + err = security_tun_dev_open(tun->security); if (err < 0) return err; @@ -1572,6 +1597,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else { char *name; unsigned long flags = 0; + int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? + MAX_TAP_QUEUES : 1; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1595,8 +1622,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) name = ifr->ifr_name; dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, - tun_setup, - MAX_TAP_QUEUES, MAX_TAP_QUEUES); + tun_setup, queues, queues); + if (!dev) return -ENOMEM; @@ -1614,7 +1641,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) spin_lock_init(&tun->lock); - security_tun_dev_post_create(&tfile->sk); + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) + goto err_free_dev; tun_net_init(dev); @@ -1639,10 +1668,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) device_create_file(&tun->dev->dev, &dev_attr_owner) || device_create_file(&tun->dev->dev, &dev_attr_group)) pr_err("Failed to create tun sysfs files\n"); - - netif_carrier_on(tun->dev); } + netif_carrier_on(tun->dev); + tun_debug(KERN_INFO, tun, "tun_set_iff\n"); if (ifr->ifr_flags & IFF_NO_PI) @@ -1738,8 +1767,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n) struct tun_file *tfile; for (i = 0; i < n; i++) { - tfile = rcu_dereference_protected(tun->tfiles[i], - lockdep_rtnl_is_held()); + tfile = rtnl_dereference(tun->tfiles[i]); sk_detach_filter(tfile->socket.sk); } @@ -1752,8 +1780,7 @@ static int tun_attach_filter(struct tun_struct *tun) struct tun_file *tfile; for (i = 0; i < tun->numqueues; i++) { - tfile = rcu_dereference_protected(tun->tfiles[i], - lockdep_rtnl_is_held()); + tfile = rtnl_dereference(tun->tfiles[i]); ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); if (ret) { tun_detach_filter(tun, i); @@ -1771,8 +1798,7 @@ static void tun_set_sndbuf(struct tun_struct *tun) int i; for (i = 0; i < tun->numqueues; i++) { - tfile = rcu_dereference_protected(tun->tfiles[i], - lockdep_rtnl_is_held()); + tfile = rtnl_dereference(tun->tfiles[i]); tfile->socket.sk->sk_sndbuf = tun->sndbuf; } } @@ -1787,22 +1813,24 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { tun = tfile->detached; - if (!tun) + if (!tun) { ret = -EINVAL; - else if (tun_not_capable(tun)) - ret = -EPERM; - else - ret = tun_attach(tun, file); + goto unlock; + } + ret = security_tun_dev_attach_queue(tun->security); + if (ret < 0) + goto unlock; + ret = tun_attach(tun, file); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { - tun = rcu_dereference_protected(tfile->tun, - lockdep_rtnl_is_held()); - if (!tun || !(tun->flags & TUN_TAP_MQ)) + tun = rtnl_dereference(tfile->tun); + if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached) ret = -EINVAL; else __tun_detach(tfile, false); } else ret = -EINVAL; +unlock: rtnl_unlock(); return ret; } @@ -1880,10 +1908,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, /* Disable/Enable persist mode. Keep an extra reference to the * module to prevent the module being unprobed. */ - if (arg) { + if (arg && !(tun->flags & TUN_PERSIST)) { tun->flags |= TUN_PERSIST; __module_get(THIS_MODULE); - } else { + } + if (!arg && (tun->flags & TUN_PERSIST)) { tun->flags &= ~TUN_PERSIST; module_put(THIS_MODULE); } |