diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 371 |
1 files changed, 228 insertions, 143 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d7b81e4fdd5..e9bcbdfe015 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -63,6 +63,7 @@ #include <linux/virtio_net.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/rtnetlink.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -87,14 +88,19 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; +struct tun_file { + atomic_t count; + struct tun_struct *tun; + struct net *net; + wait_queue_head_t read_wait; +}; + struct tun_struct { - struct list_head list; + struct tun_file *tfile; unsigned int flags; - int attached; uid_t owner; gid_t group; - wait_queue_head_t read_wait; struct sk_buff_head readq; struct net_device *dev; @@ -107,6 +113,88 @@ struct tun_struct { #endif }; +static int tun_attach(struct tun_struct *tun, struct file *file) +{ + struct tun_file *tfile = file->private_data; + const struct cred *cred = current_cred(); + int err; + + ASSERT_RTNL(); + + /* Check permissions */ + if (((tun->owner != -1 && cred->euid != tun->owner) || + (tun->group != -1 && cred->egid != tun->group)) && + !capable(CAP_NET_ADMIN)) + return -EPERM; + + netif_tx_lock_bh(tun->dev); + + err = -EINVAL; + if (tfile->tun) + goto out; + + err = -EBUSY; + if (tun->tfile) + goto out; + + err = 0; + tfile->tun = tun; + tun->tfile = tfile; + dev_hold(tun->dev); + atomic_inc(&tfile->count); + +out: + netif_tx_unlock_bh(tun->dev); + return err; +} + +static void __tun_detach(struct tun_struct *tun) +{ + struct tun_file *tfile = tun->tfile; + + /* Detach from net device */ + netif_tx_lock_bh(tun->dev); + tfile->tun = NULL; + tun->tfile = NULL; + netif_tx_unlock_bh(tun->dev); + + /* Drop read queue */ + skb_queue_purge(&tun->readq); + + /* Drop the extra count on the net device */ + dev_put(tun->dev); +} + +static void tun_detach(struct tun_struct *tun) +{ + rtnl_lock(); + __tun_detach(tun); + rtnl_unlock(); +} + +static struct tun_struct *__tun_get(struct tun_file *tfile) +{ + struct tun_struct *tun = NULL; + + if (atomic_inc_not_zero(&tfile->count)) + tun = tfile->tun; + + return tun; +} + +static struct tun_struct *tun_get(struct file *file) +{ + return __tun_get(file->private_data); +} + +static void tun_put(struct tun_struct *tun) +{ + struct tun_file *tfile = tun->tfile; + + if (atomic_dec_and_test(&tfile->count)) + tun_detach(tfile->tun); +} + /* TAP filterting */ static void addr_hash_set(u32 *mask, const u8 *addr) { @@ -213,13 +301,23 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) /* Network device part of the driver */ -static int tun_net_id; -struct tun_net { - struct list_head dev_list; -}; - static const struct ethtool_ops tun_ethtool_ops; +/* Net device detach from fd. */ +static void tun_net_uninit(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile = tun->tfile; + + /* Inform the methods they need to stop using the dev. + */ + if (tfile) { + wake_up_all(&tfile->read_wait); + if (atomic_dec_and_test(&tfile->count)) + __tun_detach(tun); + } +} + /* Net device open. */ static int tun_net_open(struct net_device *dev) { @@ -242,7 +340,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len); /* Drop packet if interface is not attached */ - if (!tun->attached) + if (!tun->tfile) goto drop; /* Drop if the filter does not like it. @@ -274,7 +372,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&tun->read_wait); + wake_up_interruptible(&tun->tfile->read_wait); return 0; drop: @@ -306,6 +404,7 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops tun_netdev_ops = { + .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, @@ -313,6 +412,7 @@ static const struct net_device_ops tun_netdev_ops = { }; static const struct net_device_ops tap_netdev_ops = { + .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, @@ -359,19 +459,24 @@ static void tun_net_init(struct net_device *dev) /* Poll */ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) { - struct tun_struct *tun = file->private_data; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); unsigned int mask = POLLOUT | POLLWRNORM; if (!tun) - return -EBADFD; + return POLLERR; DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); - poll_wait(file, &tun->read_wait, wait); + poll_wait(file, &tfile->read_wait, wait); if (!skb_queue_empty(&tun->readq)) mask |= POLLIN | POLLRDNORM; + if (tun->dev->reg_state != NETREG_REGISTERED) + mask = POLLERR; + + tun_put(tun); return mask; } @@ -556,14 +661,18 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { - struct tun_struct *tun = iocb->ki_filp->private_data; + struct tun_struct *tun = tun_get(iocb->ki_filp); + ssize_t result; if (!tun) return -EBADFD; DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count); - return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); + result = tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); + + tun_put(tun); + return result; } /* Put packet to the user space buffer */ @@ -636,7 +745,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { struct file *file = iocb->ki_filp; - struct tun_struct *tun = file->private_data; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; ssize_t len, ret = 0; @@ -647,10 +757,12 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); len = iov_length(iv, count); - if (len < 0) - return -EINVAL; + if (len < 0) { + ret = -EINVAL; + goto out; + } - add_wait_queue(&tun->read_wait, &wait); + add_wait_queue(&tfile->read_wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; @@ -664,6 +776,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = -ERESTARTSYS; break; } + if (tun->dev->reg_state != NETREG_REGISTERED) { + ret = -EIO; + break; + } /* Nothing to read, let's sleep */ schedule(); @@ -677,8 +793,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, } current->state = TASK_RUNNING; - remove_wait_queue(&tun->read_wait, &wait); + remove_wait_queue(&tfile->read_wait, &wait); +out: + tun_put(tun); return ret; } @@ -687,54 +805,49 @@ static void tun_setup(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); skb_queue_head_init(&tun->readq); - init_waitqueue_head(&tun->read_wait); tun->owner = -1; tun->group = -1; dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = free_netdev; - dev->features |= NETIF_F_NETNS_LOCAL; } -static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name) +/* Trivial set of netlink ops to allow deleting tun or tap + * device with netlink. + */ +static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) { - struct tun_struct *tun; + return -EINVAL; +} - ASSERT_RTNL(); - list_for_each_entry(tun, &tn->dev_list, list) { - if (!strncmp(tun->dev->name, name, IFNAMSIZ)) - return tun; - } +static struct rtnl_link_ops tun_link_ops __read_mostly = { + .kind = DRV_NAME, + .priv_size = sizeof(struct tun_struct), + .setup = tun_setup, + .validate = tun_validate, +}; - return NULL; -} static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { - struct tun_net *tn; struct tun_struct *tun; struct net_device *dev; - const struct cred *cred = current_cred(); int err; - tn = net_generic(net, tun_net_id); - tun = tun_get_by_name(tn, ifr->ifr_name); - if (tun) { - if (tun->attached) - return -EBUSY; - - /* Check permissions */ - if (((tun->owner != -1 && - cred->euid != tun->owner) || - (tun->group != -1 && - cred->egid != tun->group)) && - !capable(CAP_NET_ADMIN)) { - return -EPERM; - } + dev = __dev_get_by_name(net, ifr->ifr_name); + if (dev) { + if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) + tun = netdev_priv(dev); + else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) + tun = netdev_priv(dev); + else + return -EINVAL; + + err = tun_attach(tun, file); + if (err < 0) + return err; } - else if (__dev_get_by_name(net, ifr->ifr_name)) - return -EINVAL; else { char *name; unsigned long flags = 0; @@ -765,6 +878,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return -ENOMEM; dev_net_set(dev, net); + dev->rtnl_link_ops = &tun_link_ops; tun = netdev_priv(dev); tun->dev = dev; @@ -783,7 +897,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (err < 0) goto err_free_dev; - list_add(&tun->list, &tn->dev_list); + err = tun_attach(tun, file); + if (err < 0) + goto err_free_dev; } DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name); @@ -803,10 +919,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_VNET_HDR; - file->private_data = tun; - tun->attached = 1; - get_net(dev_net(tun->dev)); - /* Make sure persistent devices do not get stuck in * xoff state. */ @@ -824,7 +936,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); if (!tun) return -EBADFD; @@ -849,6 +961,7 @@ static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tun->flags & TUN_VNET_HDR) ifr->ifr_flags |= IFF_VNET_HDR; + tun_put(tun); return 0; } @@ -895,7 +1008,8 @@ static int set_offload(struct net_device *dev, unsigned long arg) static int tun_chr_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct tun_struct *tun = file->private_data; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun; void __user* argp = (void __user*)arg; struct ifreq ifr; int ret; @@ -904,13 +1018,23 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, if (copy_from_user(&ifr, argp, sizeof ifr)) return -EFAULT; + if (cmd == TUNGETFEATURES) { + /* Currently this just means: "what IFF flags are valid?". + * This is needed because we never checked for invalid flags on + * TUNSETIFF. */ + return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | + IFF_VNET_HDR, + (unsigned int __user*)argp); + } + + tun = __tun_get(tfile); if (cmd == TUNSETIFF && !tun) { int err; ifr.ifr_name[IFNAMSIZ-1] = '\0'; rtnl_lock(); - err = tun_set_iff(current->nsproxy->net_ns, file, &ifr); + err = tun_set_iff(tfile->net, file, &ifr); rtnl_unlock(); if (err) @@ -921,28 +1045,21 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, return 0; } - if (cmd == TUNGETFEATURES) { - /* Currently this just means: "what IFF flags are valid?". - * This is needed because we never checked for invalid flags on - * TUNSETIFF. */ - return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, - (unsigned int __user*)argp); - } if (!tun) return -EBADFD; DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd); + ret = 0; switch (cmd) { case TUNGETIFF: ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr); if (ret) - return ret; + break; if (copy_to_user(argp, &ifr, sizeof(ifr))) - return -EFAULT; + ret = -EFAULT; break; case TUNSETNOCSUM: @@ -994,7 +1111,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, ret = 0; } rtnl_unlock(); - return ret; + break; #ifdef TUN_DEBUG case TUNSETDEBUG: @@ -1005,24 +1122,25 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, rtnl_lock(); ret = set_offload(tun->dev, arg); rtnl_unlock(); - return ret; + break; case TUNSETTXFILTER: /* Can be set only for TAPs */ + ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) - return -EINVAL; + break; rtnl_lock(); ret = update_filter(&tun->txflt, (void __user *)arg); rtnl_unlock(); - return ret; + break; case SIOCGIFHWADDR: /* Get hw addres */ memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); ifr.ifr_hwaddr.sa_family = tun->dev->type; if (copy_to_user(argp, &ifr, sizeof ifr)) - return -EFAULT; - return 0; + ret = -EFAULT; + break; case SIOCSIFHWADDR: /* Set hw address */ @@ -1032,18 +1150,19 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, rtnl_lock(); ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); rtnl_unlock(); - return ret; - + break; default: - return -EINVAL; + ret = -EINVAL; + break; }; - return 0; + tun_put(tun); + return ret; } static int tun_chr_fasync(int fd, struct file *file, int on) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); int ret; if (!tun) @@ -1065,42 +1184,48 @@ static int tun_chr_fasync(int fd, struct file *file, int on) ret = 0; out: unlock_kernel(); + tun_put(tun); return ret; } static int tun_chr_open(struct inode *inode, struct file * file) { + struct tun_file *tfile; cycle_kernel_lock(); DBG1(KERN_INFO "tunX: tun_chr_open\n"); - file->private_data = NULL; + + tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); + if (!tfile) + return -ENOMEM; + atomic_set(&tfile->count, 0); + tfile->tun = NULL; + tfile->net = get_net(current->nsproxy->net_ns); + init_waitqueue_head(&tfile->read_wait); + file->private_data = tfile; return 0; } static int tun_chr_close(struct inode *inode, struct file *file) { - struct tun_struct *tun = file->private_data; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); - if (!tun) - return 0; - - DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name); - rtnl_lock(); + if (tun) { + DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name); - /* Detach from net device */ - file->private_data = NULL; - tun->attached = 0; - put_net(dev_net(tun->dev)); + rtnl_lock(); + __tun_detach(tun); - /* Drop read queue */ - skb_queue_purge(&tun->readq); + /* If desireable, unregister the netdevice. */ + if (!(tun->flags & TUN_PERSIST)) + unregister_netdevice(tun->dev); - if (!(tun->flags & TUN_PERSIST)) { - list_del(&tun->list); - unregister_netdevice(tun->dev); + rtnl_unlock(); } - rtnl_unlock(); + put_net(tfile->net); + kfree(tfile); return 0; } @@ -1181,7 +1306,7 @@ static void tun_set_msglevel(struct net_device *dev, u32 value) static u32 tun_get_link(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - return tun->attached; + return !!tun->tfile; } static u32 tun_get_rx_csum(struct net_device *dev) @@ -1210,45 +1335,6 @@ static const struct ethtool_ops tun_ethtool_ops = { .set_rx_csum = tun_set_rx_csum }; -static int tun_init_net(struct net *net) -{ - struct tun_net *tn; - - tn = kmalloc(sizeof(*tn), GFP_KERNEL); - if (tn == NULL) - return -ENOMEM; - - INIT_LIST_HEAD(&tn->dev_list); - - if (net_assign_generic(net, tun_net_id, tn)) { - kfree(tn); - return -ENOMEM; - } - - return 0; -} - -static void tun_exit_net(struct net *net) -{ - struct tun_net *tn; - struct tun_struct *tun, *nxt; - - tn = net_generic(net, tun_net_id); - - rtnl_lock(); - list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) { - DBG(KERN_INFO "%s cleaned up\n", tun->dev->name); - unregister_netdevice(tun->dev); - } - rtnl_unlock(); - - kfree(tn); -} - -static struct pernet_operations tun_net_ops = { - .init = tun_init_net, - .exit = tun_exit_net, -}; static int __init tun_init(void) { @@ -1257,10 +1343,10 @@ static int __init tun_init(void) printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT); - ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops); + ret = rtnl_link_register(&tun_link_ops); if (ret) { - printk(KERN_ERR "tun: Can't register pernet ops\n"); - goto err_pernet; + printk(KERN_ERR "tun: Can't register link_ops\n"); + goto err_linkops; } ret = misc_register(&tun_miscdev); @@ -1268,18 +1354,17 @@ static int __init tun_init(void) printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR); goto err_misc; } - return 0; - + return 0; err_misc: - unregister_pernet_gen_device(tun_net_id, &tun_net_ops); -err_pernet: + rtnl_link_unregister(&tun_link_ops); +err_linkops: return ret; } static void tun_cleanup(void) { misc_deregister(&tun_miscdev); - unregister_pernet_gen_device(tun_net_id, &tun_net_ops); + rtnl_link_unregister(&tun_link_ops); } module_init(tun_init); |