diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2007-10-12 21:27:47 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2007-10-12 21:27:47 -0400 |
commit | b981d8b3f5e008ff10d993be633ad00564fc22cd (patch) | |
tree | e292dc07b22308912cf6a58354a608b9e5e8e1fd /net/core | |
parent | b11d2127c4893a7315d1e16273bc8560049fa3ca (diff) | |
parent | 2b9e0aae1d50e880c58d46788e5e3ebd89d75d62 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
drivers/macintosh/adbhid.c
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 4 | ||||
-rw-r--r-- | net/core/datagram.c | 3 | ||||
-rw-r--r-- | net/core/dev.c | 1007 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 72 | ||||
-rw-r--r-- | net/core/dst.c | 202 | ||||
-rw-r--r-- | net/core/ethtool.c | 412 | ||||
-rw-r--r-- | net/core/fib_rules.c | 30 | ||||
-rw-r--r-- | net/core/flow.c | 2 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 81 | ||||
-rw-r--r-- | net/core/neighbour.c | 102 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 42 | ||||
-rw-r--r-- | net/core/net_namespace.c | 317 | ||||
-rw-r--r-- | net/core/netpoll.c | 87 | ||||
-rw-r--r-- | net/core/pktgen.c | 279 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 183 | ||||
-rw-r--r-- | net/core/scm.c | 9 | ||||
-rw-r--r-- | net/core/skbuff.c | 22 | ||||
-rw-r--r-- | net/core/sock.c | 154 | ||||
-rw-r--r-- | net/core/utils.c | 1 |
19 files changed, 1985 insertions, 1024 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 4751613e1b5..b1332f6d004 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o + gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -11,7 +11,7 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o -obj-$(CONFIG_SYSFS) += net-sysfs.o +obj-y += net-sysfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/datagram.c b/net/core/datagram.c index cb056f47612..029b93e246b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -450,6 +450,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, __wsum csum; int chunk = skb->len - hlen; + if (!chunk) + return 0; + /* Skip filled elements. * Pretty silly, look at memcpy_toiovec, though 8) */ diff --git a/net/core/dev.c b/net/core/dev.c index 13a0d9f6da5..1e169a541ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -92,6 +92,7 @@ #include <linux/etherdevice.h> #include <linux/notifier.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> @@ -189,25 +190,50 @@ static struct net_dma net_dma = { * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -LIST_HEAD(dev_base_head); DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base_head); EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 -static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; -static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; +#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) -static inline struct hlist_head *dev_name_hash(const char *name) +static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; + return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; } -static inline struct hlist_head *dev_index_hash(int ifindex) +static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; + return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; +} + +/* Device list insertion */ +static int list_netdevice(struct net_device *dev) +{ + struct net *net = dev->nd_net; + + ASSERT_RTNL(); + + write_lock_bh(&dev_base_lock); + list_add_tail(&dev->dev_list, &net->dev_base_head); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + write_unlock_bh(&dev_base_lock); + return 0; +} + +/* Device list removal */ +static void unlist_netdevice(struct net_device *dev) +{ + ASSERT_RTNL(); + + /* Unlink dev from the device chain */ + write_lock_bh(&dev_base_lock); + list_del(&dev->dev_list); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + write_unlock_bh(&dev_base_lock); } /* @@ -220,17 +246,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain); * Device drivers call our routines to queue packets here. We empty the * queue in the local softnet handler. */ -DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; -#ifdef CONFIG_SYSFS -extern int netdev_sysfs_init(void); -extern int netdev_register_sysfs(struct net_device *); -extern void netdev_unregister_sysfs(struct net_device *); -#else -#define netdev_sysfs_init() (0) -#define netdev_register_sysfs(dev) (0) -#define netdev_unregister_sysfs(dev) do { } while(0) -#endif +DEFINE_PER_CPU(struct softnet_data, softnet_data); + +extern int netdev_kobject_init(void); +extern int netdev_register_kobject(struct net_device *); +extern void netdev_unregister_kobject(struct net_device *); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -490,7 +511,7 @@ unsigned long netdev_boot_base(const char *prefix, int unit) * If device already registered then return base of 1 * to indicate not to probe for this interface */ - if (__dev_get_by_name(name)) + if (__dev_get_by_name(&init_net, name)) return 1; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) @@ -545,11 +566,11 @@ __setup("netdev=", netdev_boot_setup); * careful with locks. */ -struct net_device *__dev_get_by_name(const char *name) +struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; - hlist_for_each(p, dev_name_hash(name)) { + hlist_for_each(p, dev_name_hash(net, name)) { struct net_device *dev = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(dev->name, name, IFNAMSIZ)) @@ -569,12 +590,12 @@ struct net_device *__dev_get_by_name(const char *name) * matching device is found. */ -struct net_device *dev_get_by_name(const char *name) +struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -592,11 +613,11 @@ struct net_device *dev_get_by_name(const char *name) * or @dev_base_lock. */ -struct net_device *__dev_get_by_index(int ifindex) +struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; - hlist_for_each(p, dev_index_hash(ifindex)) { + hlist_for_each(p, dev_index_hash(net, ifindex)) { struct net_device *dev = hlist_entry(p, struct net_device, index_hlist); if (dev->ifindex == ifindex) @@ -616,12 +637,12 @@ struct net_device *__dev_get_by_index(int ifindex) * dev_put to indicate they have finished with it. */ -struct net_device *dev_get_by_index(int ifindex) +struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -642,13 +663,13 @@ struct net_device *dev_get_by_index(int ifindex) * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; @@ -658,12 +679,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) EXPORT_SYMBOL(dev_getbyhwaddr); -struct net_device *__dev_getfirstbyhwtype(unsigned short type) +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(net, dev) if (dev->type == type) return dev; @@ -672,12 +693,12 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type) EXPORT_SYMBOL(__dev_getfirstbyhwtype); -struct net_device *dev_getfirstbyhwtype(unsigned short type) +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; rtnl_lock(); - dev = __dev_getfirstbyhwtype(type); + dev = __dev_getfirstbyhwtype(net, type); if (dev) dev_hold(dev); rtnl_unlock(); @@ -697,13 +718,13 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) +struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; @@ -740,9 +761,10 @@ int dev_valid_name(const char *name) } /** - * dev_alloc_name - allocate a name for a device - * @dev: device + * __dev_alloc_name - allocate a name for a device + * @net: network namespace to allocate the device name in * @name: name format string + * @buf: scratch buffer and result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -753,13 +775,12 @@ int dev_valid_name(const char *name) * Returns the number of the unit assigned or a negative errno code. */ -int dev_alloc_name(struct net_device *dev, const char *name) +static int __dev_alloc_name(struct net *net, const char *name, char *buf) { int i = 0; - char buf[IFNAMSIZ]; const char *p; const int max_netdevices = 8*PAGE_SIZE; - long *inuse; + unsigned long *inuse; struct net_device *d; p = strnchr(name, IFNAMSIZ-1, '%'); @@ -773,18 +794,18 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -EINVAL; /* Use one page as a bit array of possible slots */ - inuse = (long *) get_zeroed_page(GFP_ATOMIC); + inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); if (!inuse) return -ENOMEM; - for_each_netdev(d) { + for_each_netdev(net, d) { if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, sizeof(buf), name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!strncmp(buf, d->name, IFNAMSIZ)) set_bit(i, inuse); } @@ -793,11 +814,9 @@ int dev_alloc_name(struct net_device *dev, const char *name) free_page((unsigned long) inuse); } - snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(buf)) { - strlcpy(dev->name, buf, IFNAMSIZ); + snprintf(buf, IFNAMSIZ, name, i); + if (!__dev_get_by_name(net, buf)) return i; - } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -806,6 +825,34 @@ int dev_alloc_name(struct net_device *dev, const char *name) return -ENFILE; } +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + char buf[IFNAMSIZ]; + struct net *net; + int ret; + + BUG_ON(!dev->nd_net); + net = dev->nd_net; + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_change_name - change name of a device @@ -817,31 +864,56 @@ int dev_alloc_name(struct net_device *dev, const char *name) */ int dev_change_name(struct net_device *dev, char *newname) { + char oldname[IFNAMSIZ]; int err = 0; + int ret; + struct net *net; ASSERT_RTNL(); + BUG_ON(!dev->nd_net); + net = dev->nd_net; if (dev->flags & IFF_UP) return -EBUSY; if (!dev_valid_name(newname)) return -EINVAL; + memcpy(oldname, dev->name, IFNAMSIZ); + if (strchr(newname, '%')) { err = dev_alloc_name(dev, newname); if (err < 0) return err; strcpy(newname, dev->name); } - else if (__dev_get_by_name(newname)) + else if (__dev_get_by_name(net, newname)) return -EEXIST; else strlcpy(dev->name, newname, IFNAMSIZ); +rollback: device_rename(&dev->dev, dev->name); + + write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); - raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + write_unlock_bh(&dev_base_lock); + + ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); + ret = notifier_to_errno(ret); + + if (ret) { + if (err) { + printk(KERN_ERR + "%s: name change rollback failed: %d.\n", + dev->name, ret); + } else { + err = ret; + memcpy(dev->name, oldname, IFNAMSIZ); + goto rollback; + } + } return err; } @@ -854,7 +926,7 @@ int dev_change_name(struct net_device *dev, char *newname) */ void netdev_features_change(struct net_device *dev) { - raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); + call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); } EXPORT_SYMBOL(netdev_features_change); @@ -869,8 +941,7 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGE, dev); + call_netdevice_notifiers(NETDEV_CHANGE, dev); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } @@ -884,26 +955,18 @@ void netdev_state_change(struct net_device *dev) * available in this kernel then it becomes a nop. */ -void dev_load(const char *name) +void dev_load(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); read_unlock(&dev_base_lock); if (!dev && capable(CAP_SYS_MODULE)) request_module("%s", name); } -static int default_rebuild_header(struct sk_buff *skb) -{ - printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", - skb->dev ? skb->dev->name : "NULL!!!"); - kfree_skb(skb); - return 1; -} - /** * dev_open - prepare an interface for use. * @dev: device to open @@ -966,7 +1029,7 @@ int dev_open(struct net_device *dev) /* * ... and announce new interface. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + call_netdevice_notifiers(NETDEV_UP, dev); } return ret; } @@ -982,6 +1045,8 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + might_sleep(); + if (!(dev->flags & IFF_UP)) return 0; @@ -989,23 +1054,19 @@ int dev_close(struct net_device *dev) * Tell people we are going down, so that they can * prepare to death, when device is still operating. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); dev_deactivate(dev); clear_bit(__LINK_STATE_START, &dev->state); /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(), - * and wait when poll really will happen. Actually, the best place - * for this is inside dev->stop() after device stopped its irq - * engine, but this requires more changes in devices. */ - + * it can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ smp_mb__after_clear_bit(); /* Commit netif_running(). */ - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { - /* No hurry. */ - msleep(1); - } /* * Call the device specific close. This cannot fail. @@ -1026,12 +1087,14 @@ int dev_close(struct net_device *dev) /* * Tell people we are down */ - raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + call_netdevice_notifiers(NETDEV_DOWN, dev); return 0; } +static int dev_boot_phase = 1; + /* * Device change register/unregister. These are not inline or static * as we export them to the world. @@ -1054,20 +1117,49 @@ int dev_close(struct net_device *dev) int register_netdevice_notifier(struct notifier_block *nb) { struct net_device *dev; + struct net_device *last; + struct net *net; int err; rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); - if (!err) { - for_each_netdev(dev) { - nb->notifier_call(nb, NETDEV_REGISTER, dev); + if (err) + goto unlock; + if (dev_boot_phase) + goto unlock; + for_each_net(net) { + for_each_netdev(net, dev) { + err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + goto rollback; + + if (!(dev->flags & IFF_UP)) + continue; - if (dev->flags & IFF_UP) - nb->notifier_call(nb, NETDEV_UP, dev); + nb->notifier_call(nb, NETDEV_UP, dev); } } + +unlock: rtnl_unlock(); return err; + +rollback: + last = dev; + for_each_net(net) { + for_each_netdev(net, dev) { + if (dev == last) + break; + + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + } + } + goto unlock; } /** @@ -1099,9 +1191,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb) * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers(unsigned long val, void *v) +int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - return raw_notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, dev); } /* When > 0 there are consumers of rx skb time stamps */ @@ -1188,21 +1280,21 @@ void __netif_schedule(struct net_device *dev) } EXPORT_SYMBOL(__netif_schedule); -void __netif_rx_schedule(struct net_device *dev) +void dev_kfree_skb_irq(struct sk_buff *skb) { - unsigned long flags; + if (atomic_dec_and_test(&skb->users)) { + struct softnet_data *sd; + unsigned long flags; - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + skb->next = sd->completion_queue; + sd->completion_queue = skb; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } } -EXPORT_SYMBOL(__netif_rx_schedule); +EXPORT_SYMBOL(dev_kfree_skb_irq); void dev_kfree_skb_any(struct sk_buff *skb) { @@ -1214,7 +1306,12 @@ void dev_kfree_skb_any(struct sk_buff *skb) EXPORT_SYMBOL(dev_kfree_skb_any); -/* Hot-plugging. */ +/** + * netif_device_detach - mark device as removed + * @dev: network device + * + * Mark device as removed from system and therefore no longer available. + */ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1224,6 +1321,12 @@ void netif_device_detach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_detach); +/** + * netif_device_attach - mark device as attached + * @dev: network device + * + * Mark device as attached from system and restart if needed. + */ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1456,18 +1559,6 @@ out_kfree_skb: return 0; } -#define HARD_TX_LOCK(dev, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_lock(dev); \ - } \ -} - -#define HARD_TX_UNLOCK(dev) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_unlock(dev); \ - } \ -} - /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1685,7 +1776,7 @@ enqueue: return NET_RX_SUCCESS; } - netif_rx_schedule(&queue->backlog_dev); + napi_schedule(&queue->backlog); goto enqueue; } @@ -1726,6 +1817,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) return dev; } + static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = &__get_cpu_var(softnet_data); @@ -1882,7 +1974,7 @@ int netif_receive_skb(struct sk_buff *skb) __be16 type; /* if we've gotten here through NAPI, check netpoll */ - if (skb->dev->poll && netpoll_rx(skb)) + if (netpoll_receive_skb(skb)) return NET_RX_DROP; if (!skb->tstamp.tv64) @@ -1972,22 +2064,25 @@ out: return ret; } -static int process_backlog(struct net_device *backlog_dev, int *budget) +static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - int quota = min(backlog_dev->quota, *budget); struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; - backlog_dev->weight = weight_p; - for (;;) { + napi->weight = weight_p; + do { struct sk_buff *skb; struct net_device *dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) - goto job_done; + if (!skb) { + __napi_complete(napi); + local_irq_enable(); + break; + } + local_irq_enable(); dev = skb->dev; @@ -1995,67 +2090,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) netif_receive_skb(skb); dev_put(dev); + } while (++work < quota && jiffies == start_time); - work++; - - if (work >= quota || jiffies - start_time > 1) - break; - - } - - backlog_dev->quota -= work; - *budget -= work; - return -1; - -job_done: - backlog_dev->quota -= work; - *budget -= work; + return work; +} - list_del(&backlog_dev->poll_list); - smp_mb__before_clear_bit(); - netif_poll_enable(backlog_dev); +/** + * __napi_schedule - schedule for receive + * @napi: entry to schedule + * + * The entry's receive function will be scheduled to run + */ +void fastcall __napi_schedule(struct napi_struct *n) +{ + unsigned long flags; - local_irq_enable(); - return 0; + local_irq_save(flags); + list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); } +EXPORT_SYMBOL(__napi_schedule); + static void net_rx_action(struct softirq_action *h) { - struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct list_head *list = &__get_cpu_var(softnet_data).poll_list; unsigned long start_time = jiffies; int budget = netdev_budget; void *have; local_irq_disable(); - while (!list_empty(&queue->poll_list)) { - struct net_device *dev; + while (!list_empty(list)) { + struct napi_struct *n; + int work, weight; - if (budget <= 0 || jiffies - start_time > 1) + /* If softirq window is exhuasted then punt. + * + * Note that this is a slight policy change from the + * previous NAPI code, which would allow up to 2 + * jiffies to pass before breaking out. The test + * used to be "jiffies - start_time > 1". + */ + if (unlikely(budget <= 0 || jiffies != start_time)) goto softnet_break; local_irq_enable(); - dev = list_entry(queue->poll_list.next, - struct net_device, poll_list); - have = netpoll_poll_lock(dev); + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + n = list_entry(list->next, struct napi_struct, poll_list); - if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(have); - local_irq_disable(); - list_move_tail(&dev->poll_list, &queue->poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - } else { - netpoll_poll_unlock(have); - dev_put(dev); - local_irq_disable(); - } + have = netpoll_poll_lock(n); + + weight = n->weight; + + work = n->poll(n, weight); + + WARN_ON_ONCE(work > weight); + + budget -= work; + + local_irq_disable(); + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(work == weight)) + list_move_tail(&n->poll_list, list); + + netpoll_poll_unlock(have); } out: local_irq_enable(); + #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -2070,6 +2184,7 @@ out: } } #endif + return; softnet_break: @@ -2109,7 +2224,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf) * match. --pb */ -static int dev_ifname(struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -2122,7 +2237,7 @@ static int dev_ifname(struct ifreq __user *arg) return -EFAULT; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); + dev = __dev_get_by_index(net, ifr.ifr_ifindex); if (!dev) { read_unlock(&dev_base_lock); return -ENODEV; @@ -2142,7 +2257,7 @@ static int dev_ifname(struct ifreq __user *arg) * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char __user *arg) +static int dev_ifconf(struct net *net, char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2166,7 +2281,7 @@ static int dev_ifconf(char __user *arg) */ total = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2200,6 +2315,7 @@ static int dev_ifconf(char __user *arg) */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; loff_t off; struct net_device *dev; @@ -2208,7 +2324,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(dev) + for_each_netdev(net, dev) if (off++ == *pos) return dev; @@ -2217,9 +2333,10 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return v == SEQ_START_TOKEN ? - first_net_device() : next_net_device((struct net_device *)v); + first_net_device(net) : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -2315,7 +2432,26 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int dev_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_seq_fops = { @@ -2323,7 +2459,7 @@ static const struct file_operations dev_seq_fops = { .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_seq_release, }; static const struct seq_operations softnet_seq_ops = { @@ -2475,30 +2611,49 @@ static const struct file_operations ptype_seq_fops = { }; -static int __init dev_proc_init(void) +static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; - if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) - goto out_dev2; - - if (wext_proc_init()) + if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; rc = 0; out: return rc; +out_ptype: + proc_net_remove(net, "ptype"); out_softnet: - proc_net_remove("ptype"); -out_dev2: - proc_net_remove("softnet_stat"); + proc_net_remove(net, "softnet_stat"); out_dev: - proc_net_remove("dev"); + proc_net_remove(net, "dev"); goto out; } + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + proc_net_remove(net, "ptype"); + proc_net_remove(net, "softnet_stat"); + proc_net_remove(net, "dev"); +} + +static struct pernet_operations __net_initdata dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} #else #define dev_proc_init() 0 #endif /* CONFIG_PROC_FS */ @@ -2629,7 +2784,7 @@ void __dev_set_rx_mode(struct net_device *dev) return; if (!netif_device_present(dev)) - return; + return; if (dev->set_rx_mode) dev->set_rx_mode(dev); @@ -2715,26 +2870,14 @@ int __dev_addr_add(struct dev_addr_list **list, int *count, return 0; } -void __dev_addr_discard(struct dev_addr_list **list) -{ - struct dev_addr_list *tmp; - - while (*list != NULL) { - tmp = *list; - *list = tmp->next; - if (tmp->da_users > tmp->da_gusers) - printk("__dev_addr_discard: address leakage! " - "da_users=%d\n", tmp->da_users); - kfree(tmp); - } -} - /** * dev_unicast_delete - Release secondary unicast address. * @dev: device + * @addr: address to delete + * @alen: length of @addr * * Release reference to a secondary unicast address and remove it - * from the device if the reference count drop to zero. + * from the device if the reference count drops to zero. * * The caller must hold the rtnl_mutex. */ @@ -2756,6 +2899,8 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device + * @addr: address to delete + * @alen: length of @addr * * Add a secondary unicast address to the device or increase * the reference count if it already exists. @@ -2777,11 +2922,30 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) } EXPORT_SYMBOL(dev_unicast_add); -static void dev_unicast_discard(struct net_device *dev) +static void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + +static void dev_addr_discard(struct net_device *dev) { netif_tx_lock_bh(dev); + __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; + + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + netif_tx_unlock_bh(dev); } @@ -2852,8 +3016,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) if (dev->flags & IFF_UP && ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGE, dev); + call_netdevice_notifiers(NETDEV_CHANGE, dev); if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? +1 : -1; @@ -2899,8 +3062,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; if (!err && dev->flags & IFF_UP) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } @@ -2916,18 +3078,17 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) return -ENODEV; err = dev->set_mac_address(dev, sa); if (!err) - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } /* - * Perform the SIOCxIFxxx calls. + * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) */ -static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -2937,25 +3098,15 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_flags = dev_get_flags(dev); return 0; - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ ifr->ifr_metric = 0; return 0; - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - case SIOCGIFMTU: /* Get the MTU of a device */ ifr->ifr_mtu = dev->mtu; return 0; - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - case SIOCGIFHWADDR: if (!dev->addr_len) memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); @@ -2965,17 +3116,9 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_hwaddr.sa_family = dev->type; return 0; - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - raw_notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); - return 0; + case SIOCGIFSLAVE: + err = -EINVAL; + break; case SIOCGIFMAP: ifr->ifr_map.mem_start = dev->mem_start; @@ -2986,6 +3129,59 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_map.port = dev->if_port; return 0; + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; + + default: + /* dev_ioctl() should ensure this case + * is never reached + */ + WARN_ON(1); + err = -EINVAL; + break; + + } + return err; +} + +/* + * Perform the SIOCxIFxxx calls, inside rtnl_lock() + */ +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + + if (!dev) + return -ENODEV; + + switch (cmd) { + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); + + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ + return -EOPNOTSUPP; + + case SIOCSIFMTU: /* Set the MTU of a device */ + return dev_set_mtu(dev, ifr->ifr_mtu); + + case SIOCSIFHWADDR: + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); + + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family != dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return 0; + case SIOCSIFMAP: if (dev->set_config) { if (!netif_device_present(dev)) @@ -3012,14 +3208,6 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; @@ -3080,7 +3268,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -3093,12 +3281,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCGIFCONF) { rtnl_lock(); - ret = dev_ifconf((char __user *) arg); + ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq __user *)arg); + return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -3128,9 +3316,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc_locked(net, &ifr, cmd); read_unlock(&dev_base_lock); if (!ret) { if (colon) @@ -3142,9 +3330,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) return ret; case SIOCETHTOOL: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(&ifr); + ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) @@ -3166,9 +3354,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) @@ -3207,9 +3395,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; @@ -3229,9 +3417,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -3240,7 +3428,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) } /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(&ifr, cmd, arg); + return wext_handle_ioctl(net, &ifr, cmd, arg); return -EINVAL; } } @@ -3253,19 +3441,17 @@ int dev_ioctl(unsigned int cmd, void __user *arg) * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -static int dev_new_index(void) +static int dev_new_index(struct net *net) { static int ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; - if (!__dev_get_by_index(ifindex)) + if (!__dev_get_by_index(net, ifindex)) return ifindex; } } -static int dev_boot_phase = 1; - /* Delayed registration/unregisteration */ static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); @@ -3299,6 +3485,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; + struct net *net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3307,6 +3494,8 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + BUG_ON(!dev->nd_net); + net = dev->nd_net; spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -3328,21 +3517,21 @@ int register_netdevice(struct net_device *dev) if (!dev_valid_name(dev->name)) { ret = -EINVAL; - goto out; + goto err_uninit; } - dev->ifindex = dev_new_index(); + dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence of name */ - head = dev_name_hash(dev->name); + head = dev_name_hash(net, dev->name); hlist_for_each(p, head) { struct net_device *d = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(d->name, dev->name, IFNAMSIZ)) { ret = -EEXIST; - goto out; + goto err_uninit; } } @@ -3392,17 +3581,9 @@ int register_netdevice(struct net_device *dev) } } - /* - * nil rebuild_header routine, - * that should be never called and used as just bug trap. - */ - - if (!dev->rebuild_header) - dev->rebuild_header = default_rebuild_header; - - ret = netdev_register_sysfs(dev); + ret = netdev_register_kobject(dev); if (ret) - goto out; + goto err_uninit; dev->reg_state = NETREG_REGISTERED; /* @@ -3413,20 +3594,22 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &dev_base_head); - hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - write_unlock_bh(&dev_base_lock); + list_netdevice(dev); /* Notify protocols, that a new device appeared. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); - - ret = 0; + ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); + ret = notifier_to_errno(ret); + if (ret) + unregister_netdevice(dev); out: return ret; + +err_uninit: + if (dev->uninit) + dev->uninit(dev); + goto out; } /** @@ -3486,8 +3669,7 @@ static void netdev_wait_allrefs(struct net_device *dev) rtnl_lock(); /* Rebroadcast unregister notification */ - raw_notifier_call_chain(&netdev_chain, - NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -3619,7 +3801,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, /* ensure 32-byte alignment of both the device and private area */ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + - (sizeof(struct net_device_subqueue) * queue_count)) & + (sizeof(struct net_device_subqueue) * (queue_count - 1))) & ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; @@ -3632,18 +3814,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; + dev->nd_net = &init_net; if (sizeof_priv) { dev->priv = ((char *)dev + ((sizeof(struct net_device) + (sizeof(struct net_device_subqueue) * - queue_count) + NETDEV_ALIGN_CONST) + (queue_count - 1)) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST)); } dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; + netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); return dev; @@ -3660,7 +3844,6 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { -#ifdef CONFIG_SYSFS /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -3672,9 +3855,6 @@ void free_netdev(struct net_device *dev) /* will free via device release */ put_device(&dev->dev); -#else - kfree((char *)dev - dev->padded); -#endif } /* Synchronize with packet receive processing. */ @@ -3713,15 +3893,10 @@ void unregister_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_REGISTERED); /* If device is running, close it first. */ - if (dev->flags & IFF_UP) - dev_close(dev); + dev_close(dev); /* And unlink it from device chain. */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); + unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; @@ -3734,13 +3909,12 @@ void unregister_netdevice(struct net_device *dev) /* Notify protocols, that we are about to destroy this device. They should clean all the things. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); /* * Flush the unicast and multicast chains */ - dev_unicast_discard(dev); - dev_mc_discard(dev); + dev_addr_discard(dev); if (dev->uninit) dev->uninit(dev); @@ -3748,8 +3922,8 @@ void unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); - /* Remove entries from sysfs */ - netdev_unregister_sysfs(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -3780,6 +3954,121 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); +/** + * dev_change_net_namespace - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + */ + +int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +{ + char buf[IFNAMSIZ]; + const char *destname; + int err; + + ASSERT_RTNL(); + + /* Don't allow namespace local devices to be moved. */ + err = -EINVAL; + if (dev->features & NETIF_F_NETNS_LOCAL) + goto out; + + /* Ensure the device has been registrered */ + err = -EINVAL; + if (dev->reg_state != NETREG_REGISTERED) + goto out; + + /* Get out if there is nothing todo */ + err = 0; + if (dev->nd_net == net) + goto out; + + /* Pick the destination device name, and ensure + * we can use it in the destination network namespace. + */ + err = -EEXIST; + destname = dev->name; + if (__dev_get_by_name(net, destname)) { + /* We get here if we can't use the current device name */ + if (!pat) + goto out; + if (!dev_valid_name(pat)) + goto out; + if (strchr(pat, '%')) { + if (__dev_alloc_name(net, pat, buf) < 0) + goto out; + destname = buf; + } else + destname = pat; + if (__dev_get_by_name(net, destname)) + goto out; + } + + /* + * And now a mini version of register_netdevice unregister_netdevice. + */ + + /* If device is running close it first. */ + dev_close(dev); + + /* And unlink it from device chain */ + err = -ENODEV; + unlist_netdevice(dev); + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the unicast and multicast chains + */ + dev_addr_discard(dev); + + /* Actually switch the network namespace */ + dev->nd_net = net; + + /* Assign the new device name */ + if (destname != dev->name) + strcpy(dev->name, destname); + + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) { + int iflink = (dev->iflink == dev->ifindex); + dev->ifindex = dev_new_index(net); + if (iflink) + dev->iflink = dev->ifindex; + } + + /* Fixup kobjects */ + err = device_rename(&dev->dev, dev->name); + WARN_ON(err); + + /* Add the device back in the hashes */ + list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + call_netdevice_notifiers(NETDEV_REGISTER, dev); + + synchronize_net(); + err = 0; +out: + return err; +} + static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3826,9 +4115,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, #ifdef CONFIG_NET_DMA /** - * net_dma_rebalance - - * This is called when the number of channels allocated to the net_dma_client - * changes. The net_dma_client tries to have one DMA channel per CPU. + * net_dma_rebalance - try to maintain one DMA channel per CPU + * @net_dma: DMA client and associated data (lock, channels, channel_mask) + * + * This is called when the number of channels allocated to the net_dma client + * changes. The net_dma client tries to have one DMA channel per CPU. */ static void net_dma_rebalance(struct net_dma *net_dma) @@ -3865,7 +4156,7 @@ static void net_dma_rebalance(struct net_dma *net_dma) * netdev_dma_event - event callback for the net_dma_client * @client: should always be net_dma_client * @chan: DMA channel for the event - * @event: event type + * @state: DMA state to be handled */ static enum dma_state_client netdev_dma_event(struct dma_client *client, struct dma_chan *chan, @@ -3932,6 +4223,121 @@ static int __init netdev_dma_register(void) static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ +/** + * netdev_compute_feature - compute conjunction of two feature sets + * @all: first feature set + * @one: second feature set + * + * Computes a new feature set after adding a device with feature set + * @one to the master device with current feature set @all. Returns + * the new feature set. + */ +int netdev_compute_features(unsigned long all, unsigned long one) +{ + /* if device needs checksumming, downgrade to hw checksumming */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; + + /* if device can't do all checksum, downgrade to ipv4/ipv6 */ + if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) + all ^= NETIF_F_HW_CSUM + | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + + if (one & NETIF_F_GSO) + one |= NETIF_F_GSO_SOFTWARE; + one |= NETIF_F_GSO; + + /* If even one device supports robust GSO, enable it for all. */ + if (one & NETIF_F_GSO_ROBUST) + all |= NETIF_F_GSO_ROBUST; + + all &= one | NETIF_F_LLTX; + + if (!(all & NETIF_F_ALL_CSUM)) + all &= ~NETIF_F_SG; + if (!(all & NETIF_F_SG)) + all &= ~NETIF_F_GSO_MASK; + + return all; +} +EXPORT_SYMBOL(netdev_compute_features); + +static struct hlist_head *netdev_create_hash(void) +{ + int i; + struct hlist_head *hash; + + hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); + if (hash != NULL) + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&hash[i]); + + return hash; +} + +/* Initialize per network namespace state */ +static int __net_init netdev_init(struct net *net) +{ + INIT_LIST_HEAD(&net->dev_base_head); + rwlock_init(&dev_base_lock); + + net->dev_name_head = netdev_create_hash(); + if (net->dev_name_head == NULL) + goto err_name; + + net->dev_index_head = netdev_create_hash(); + if (net->dev_index_head == NULL) + goto err_idx; + + return 0; + +err_idx: + kfree(net->dev_name_head); +err_name: + return -ENOMEM; +} + +static void __net_exit netdev_exit(struct net *net) +{ + kfree(net->dev_name_head); + kfree(net->dev_index_head); +} + +static struct pernet_operations __net_initdata netdev_net_ops = { + .init = netdev_init, + .exit = netdev_exit, +}; + +static void __net_exit default_device_exit(struct net *net) +{ + struct net_device *dev, *next; + /* + * Push all migratable of the network devices back to the + * initial network namespace + */ + rtnl_lock(); + for_each_netdev_safe(net, dev, next) { + int err; + + /* Ignore unmoveable devices (i.e. loopback) */ + if (dev->features & NETIF_F_NETNS_LOCAL) + continue; + + /* Push remaing network devices to init_net */ + err = dev_change_net_namespace(dev, &init_net, "dev%d"); + if (err) { + printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); + unregister_netdevice(dev); + } + } + rtnl_unlock(); +} + +static struct pernet_operations __net_initdata default_device_ops = { + .exit = default_device_exit, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -3952,18 +4358,18 @@ static int __init net_dev_init(void) if (dev_proc_init()) goto out; - if (netdev_sysfs_init()) + if (netdev_kobject_init()) goto out; INIT_LIST_HEAD(&ptype_all); for (i = 0; i < 16; i++) INIT_LIST_HEAD(&ptype_base[i]); - for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) - INIT_HLIST_HEAD(&dev_name_head[i]); + if (register_pernet_subsys(&netdev_net_ops)) + goto out; - for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) - INIT_HLIST_HEAD(&dev_index_head[i]); + if (register_pernet_device(&default_device_ops)) + goto out; /* * Initialise the packet receive queues. @@ -3976,10 +4382,9 @@ static int __init net_dev_init(void) skb_queue_head_init(&queue->input_pkt_queue); queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); - set_bit(__LINK_STATE_START, &queue->backlog_dev.state); - queue->backlog_dev.weight = weight_p; - queue->backlog_dev.poll = process_backlog; - atomic_set(&queue->backlog_dev.refcnt, 1); + + queue->backlog.poll = process_backlog; + queue->backlog.weight = weight_p; } netdev_dma_register(); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 235a2a8a0d0..15241cf48af 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -41,6 +41,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> #include <linux/skbuff.h> @@ -116,11 +117,13 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) */ int dev_mc_sync(struct net_device *to, struct net_device *from) { - struct dev_addr_list *da; + struct dev_addr_list *da, *next; int err = 0; netif_tx_lock_bh(to); - for (da = from->mc_list; da != NULL; da = da->next) { + da = from->mc_list; + while (da != NULL) { + next = da->next; if (!da->da_synced) { err = __dev_addr_add(&to->mc_list, &to->mc_count, da->da_addr, da->da_addrlen, 0); @@ -134,6 +137,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) __dev_addr_delete(&from->mc_list, &from->mc_count, da->da_addr, da->da_addrlen, 0); } + da = next; } if (!err) __dev_set_rx_mode(to); @@ -156,12 +160,14 @@ EXPORT_SYMBOL(dev_mc_sync); */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { - struct dev_addr_list *da; + struct dev_addr_list *da, *next; netif_tx_lock_bh(from); netif_tx_lock_bh(to); - for (da = from->mc_list; da != NULL; da = da->next) { + da = from->mc_list; + while (da != NULL) { + next = da->next; if (!da->da_synced) continue; __dev_addr_delete(&to->mc_list, &to->mc_count, @@ -169,6 +175,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) da->da_synced = 0; __dev_addr_delete(&from->mc_list, &from->mc_count, da->da_addr, da->da_addrlen, 0); + da = next; } __dev_set_rx_mode(to); @@ -177,26 +184,15 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) } EXPORT_SYMBOL(dev_mc_unsync); -/* - * Discard multicast list when a device is downed - */ - -void dev_mc_discard(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_addr_discard(&dev->mc_list); - dev->mc_count = 0; - netif_tx_unlock_bh(dev); -} - #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; struct net_device *dev; loff_t off = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (off++ == *pos) return dev; } @@ -245,7 +241,26 @@ static const struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_mc_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_mc_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_proc_net(inode); + if (!seq->private) { + seq_release(inode, file); + res = -ENXIO; + } + } + return res; +} + +static int dev_mc_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_mc_seq_fops = { @@ -253,14 +268,31 @@ static const struct file_operations dev_mc_seq_fops = { .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_mc_seq_release, }; #endif +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + proc_net_remove(net, "dev_mcast"); +} + +static struct pernet_operations __net_initdata dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + void __init dev_mcast_init(void) { - proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); + register_pernet_subsys(&dev_mc_net_ops); } EXPORT_SYMBOL(dev_mc_add); diff --git a/net/core/dst.c b/net/core/dst.c index c6a05879d58..16958e64e57 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -9,59 +9,84 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/workqueue.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> +#include <net/net_namespace.h> +#include <net/net_namespace.h> #include <net/dst.h> -/* Locking strategy: - * 1) Garbage collection state of dead destination cache - * entries is protected by dst_lock. - * 2) GC is run only from BH context, and is the only remover - * of entries. - * 3) Entries are added to the garbage list from both BH - * and non-BH context, so local BH disabling is needed. - * 4) All operations modify state, so a spinlock is used. +/* + * Theory of operations: + * 1) We use a list, protected by a spinlock, to add + * new entries from both BH and non-BH context. + * 2) In order to keep spinlock held for a small delay, + * we use a second list where are stored long lived + * entries, that are handled by the garbage collect thread + * fired by a workqueue. + * 3) This list is guarded by a mutex, + * so that the gc_task and dst_dev_event() can be synchronized. */ -static struct dst_entry *dst_garbage_list; #if RT_CACHE_DEBUG >= 2 static atomic_t dst_total = ATOMIC_INIT(0); #endif -static DEFINE_SPINLOCK(dst_lock); -static unsigned long dst_gc_timer_expires; -static unsigned long dst_gc_timer_inc = DST_GC_MAX; -static void dst_run_gc(unsigned long); +/* + * We want to keep lock & list close together + * to dirty as few cache lines as possible in __dst_free(). + * As this is not a very strong hint, we dont force an alignment on SMP. + */ +static struct { + spinlock_t lock; + struct dst_entry *list; + unsigned long timer_inc; + unsigned long timer_expires; +} dst_garbage = { + .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock), + .timer_inc = DST_GC_MAX, +}; +static void dst_gc_task(struct work_struct *work); static void ___dst_free(struct dst_entry * dst); -static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0); +static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); -static void dst_run_gc(unsigned long dummy) +static DEFINE_MUTEX(dst_gc_mutex); +/* + * long lived entries are maintained in this list, guarded by dst_gc_mutex + */ +static struct dst_entry *dst_busy_list; + +static void dst_gc_task(struct work_struct *work) { int delayed = 0; - int work_performed; - struct dst_entry * dst, **dstp; + int work_performed = 0; + unsigned long expires = ~0L; + struct dst_entry *dst, *next, head; + struct dst_entry *last = &head; +#if RT_CACHE_DEBUG >= 2 + ktime_t time_start = ktime_get(); + struct timespec elapsed; +#endif - if (!spin_trylock(&dst_lock)) { - mod_timer(&dst_gc_timer, jiffies + HZ/10); - return; - } + mutex_lock(&dst_gc_mutex); + next = dst_busy_list; - del_timer(&dst_gc_timer); - dstp = &dst_garbage_list; - work_performed = 0; - while ((dst = *dstp) != NULL) { - if (atomic_read(&dst->__refcnt)) { - dstp = &dst->next; +loop: + while ((dst = next) != NULL) { + next = dst->next; + prefetch(&next->next); + if (likely(atomic_read(&dst->__refcnt))) { + last->next = dst; + last = dst; delayed++; continue; } - *dstp = dst->next; - work_performed = 1; + work_performed++; dst = dst_destroy(dst); if (dst) { @@ -77,38 +102,56 @@ static void dst_run_gc(unsigned long dummy) continue; ___dst_free(dst); - dst->next = *dstp; - *dstp = dst; - dstp = &dst->next; + dst->next = next; + next = dst; } } - if (!dst_garbage_list) { - dst_gc_timer_inc = DST_GC_MAX; - goto out; + + spin_lock_bh(&dst_garbage.lock); + next = dst_garbage.list; + if (next) { + dst_garbage.list = NULL; + spin_unlock_bh(&dst_garbage.lock); + goto loop; } - if (!work_performed) { - if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) - dst_gc_timer_expires = DST_GC_MAX; - dst_gc_timer_inc += DST_GC_INC; - } else { - dst_gc_timer_inc = DST_GC_INC; - dst_gc_timer_expires = DST_GC_MIN; + last->next = NULL; + dst_busy_list = head.next; + if (!dst_busy_list) + dst_garbage.timer_inc = DST_GC_MAX; + else { + /* + * if we freed less than 1/10 of delayed entries, + * we can sleep longer. + */ + if (work_performed <= delayed/10) { + dst_garbage.timer_expires += dst_garbage.timer_inc; + if (dst_garbage.timer_expires > DST_GC_MAX) + dst_garbage.timer_expires = DST_GC_MAX; + dst_garbage.timer_inc += DST_GC_INC; + } else { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + } + expires = dst_garbage.timer_expires; + /* + * if the next desired timer is more than 4 seconds in the future + * then round the timer to whole seconds + */ + if (expires > 4*HZ) + expires = round_jiffies_relative(expires); + schedule_delayed_work(&dst_gc_work, expires); } + + spin_unlock_bh(&dst_garbage.lock); + mutex_unlock(&dst_gc_mutex); #if RT_CACHE_DEBUG >= 2 - printk("dst_total: %d/%d %ld\n", - atomic_read(&dst_total), delayed, dst_gc_timer_expires); + elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start)); + printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d" + " expires: %lu elapsed: %lu us\n", + atomic_read(&dst_total), delayed, work_performed, + expires, + elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); #endif - /* if the next desired timer is more than 4 seconds in the future - * then round the timer to whole seconds - */ - if (dst_gc_timer_expires > 4*HZ) - mod_timer(&dst_gc_timer, - round_jiffies(jiffies + dst_gc_timer_expires)); - else - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); - -out: - spin_unlock(&dst_lock); } static int dst_discard(struct sk_buff *skb) @@ -153,16 +196,16 @@ static void ___dst_free(struct dst_entry * dst) void __dst_free(struct dst_entry * dst) { - spin_lock_bh(&dst_lock); + spin_lock_bh(&dst_garbage.lock); ___dst_free(dst); - dst->next = dst_garbage_list; - dst_garbage_list = dst; - if (dst_gc_timer_inc > DST_GC_INC) { - dst_gc_timer_inc = DST_GC_INC; - dst_gc_timer_expires = DST_GC_MIN; - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); + dst->next = dst_garbage.list; + dst_garbage.list = dst; + if (dst_garbage.timer_inc > DST_GC_INC) { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); } - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&dst_garbage.lock); } struct dst_entry *dst_destroy(struct dst_entry * dst) @@ -236,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = init_net.loopback_dev; dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(dst->neighbour->dev); } } } @@ -250,16 +293,33 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct dst_entry *dst; + struct dst_entry *dst, *last = NULL; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: - spin_lock_bh(&dst_lock); - for (dst = dst_garbage_list; dst; dst = dst->next) { + mutex_lock(&dst_gc_mutex); + for (dst = dst_busy_list; dst; dst = dst->next) { + last = dst; + dst_ifdown(dst, dev, event != NETDEV_DOWN); + } + + spin_lock_bh(&dst_garbage.lock); + dst = dst_garbage.list; + dst_garbage.list = NULL; + spin_unlock_bh(&dst_garbage.lock); + + if (last) + last->next = dst; + else + dst_busy_list = dst; + for (; dst; dst = dst->next) { dst_ifdown(dst, dev, event != NETDEV_DOWN); } - spin_unlock_bh(&dst_lock); + mutex_unlock(&dst_gc_mutex); break; } return NOTIFY_DONE; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0b531e98ec3..1163eb2256d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3,10 +3,12 @@ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> * * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. We fall back to calling do_ioctl() - * for drivers which haven't been converted to ethtool_ops yet. + * the information ethtool needs. * - * It's GPL, stupid. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. */ #include <linux/module.h> @@ -93,18 +95,6 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) return 0; } -int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) -{ - unsigned char len = dev->addr_len; - if ( addr->size < len ) - return -ETOOSMALL; - - addr->size = len; - memcpy(data, dev->perm_addr, len); - return 0; -} - - u32 ethtool_op_get_ufo(struct net_device *dev) { return (dev->features & NETIF_F_UFO) != 0; @@ -119,6 +109,32 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data) return 0; } +/* the following list of flags are the same as their associated + * NETIF_F_xxx values in include/linux/netdevice.h + */ +static const u32 flags_dup_features = + ETH_FLAG_LRO; + +u32 ethtool_op_get_flags(struct net_device *dev) +{ + /* in the future, this function will probably contain additional + * handling for flags which are not so easily handled + * by a simple masking operation + */ + + return dev->features & flags_dup_features; +} + +int ethtool_op_set_flags(struct net_device *dev, u32 data) +{ + if (data & ETH_FLAG_LRO) + dev->features |= NETIF_F_LRO; + else + dev->features &= ~NETIF_F_LRO; + + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -163,10 +179,26 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); + if (ops->get_sset_count) { + int rc; + + rc = ops->get_sset_count(dev, ETH_SS_TEST); + if (rc >= 0) + info.testinfo_len = rc; + rc = ops->get_sset_count(dev, ETH_SS_STATS); + if (rc >= 0) + info.n_stats = rc; + rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (rc >= 0) + info.n_priv_flags = rc; + } else { + /* code path for obsolete hooks */ + + if (ops->self_test_count) + info.testinfo_len = ops->self_test_count(dev); + if (ops->get_stats_count) + info.n_stats = ops->get_stats_count(dev); + } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); if (ops->get_eeprom_len) @@ -240,34 +272,6 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_wol(dev, &wol); } -static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!dev->ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - static int ethtool_nway_reset(struct net_device *dev) { if (!dev->ethtool_ops->nway_reset) @@ -276,20 +280,6 @@ static int ethtool_nway_reset(struct net_device *dev) return dev->ethtool_ops->nway_reset(dev); } -static int ethtool_get_link(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!dev->ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -457,48 +447,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); } -static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!dev->ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!dev->ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; @@ -537,20 +485,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -static int ethtool_get_sg(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!dev->ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -568,20 +502,6 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) return __ethtool_set_sg(dev, edata.data); } -static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!dev->ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -598,18 +518,6 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tso(dev, edata.data); } -static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GUFO }; - - if (!dev->ethtool_ops->get_ufo) - return -EOPNOTSUPP; - edata.data = dev->ethtool_ops->get_ufo(dev); - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -653,16 +561,27 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) struct ethtool_test test; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, test_len; - if (!ops->self_test || !ops->self_test_count) + if (!ops->self_test) + return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->self_test_count) return -EOPNOTSUPP; + if (ops->get_sset_count) + test_len = ops->get_sset_count(dev, ETH_SS_TEST); + else + /* code path for obsolete hook */ + test_len = ops->self_test_count(dev); + if (test_len < 0) + return test_len; + WARN_ON(test_len == 0); + if (copy_from_user(&test, useraddr, sizeof(test))) return -EFAULT; - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); + test.len = test_len; + data = kmalloc(test_len * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -694,19 +613,29 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; + if (ops->get_sset_count) { + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; + } else { + /* code path for obsolete hooks */ + + switch (gstrings.string_set) { + case ETH_SS_TEST: + if (!ops->self_test_count) + return -EOPNOTSUPP; + gstrings.len = ops->self_test_count(dev); + break; + case ETH_SS_STATS: + if (!ops->get_stats_count) + return -EOPNOTSUPP; + gstrings.len = ops->get_stats_count(dev); + break; + default: + return -EINVAL; + } } data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); @@ -746,16 +675,27 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) struct ethtool_stats stats; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, n_stats; - if (!ops->get_ethtool_stats || !ops->get_stats_count) + if (!ops->get_ethtool_stats) return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->get_stats_count) + return -EOPNOTSUPP; + + if (ops->get_sset_count) + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); + else + /* code path for obsolete hook */ + n_stats = ops->get_stats_count(dev); + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); + stats.n_stats = n_stats; + data = kmalloc(n_stats * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -777,41 +717,71 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; - u8 *data; - int ret; - if (!dev->ethtool_ops->get_perm_addr) + if (copy_from_user(&epaddr, useraddr, sizeof(epaddr))) + return -EFAULT; + + if (epaddr.size < dev->addr_len) + return -ETOOSMALL; + epaddr.size = dev->addr_len; + + if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) + return -EFAULT; + useraddr += sizeof(epaddr); + if (copy_to_user(useraddr, dev->perm_addr, epaddr.size)) + return -EFAULT; + return 0; +} + +static int ethtool_get_value(struct net_device *dev, char __user *useraddr, + u32 cmd, u32 (*actor)(struct net_device *)) +{ + struct ethtool_value edata = { cmd }; + + if (!actor) return -EOPNOTSUPP; - if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) + edata.data = actor(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) return -EFAULT; + return 0; +} - data = kmalloc(epaddr.size, GFP_USER); - if (!data) - return -ENOMEM; +static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr, + void (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; - ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); - if (ret) - return ret; + if (!actor) + return -EOPNOTSUPP; - ret = -EFAULT; - if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) - goto out; - useraddr += sizeof(epaddr); - if (copy_to_user(useraddr, data, epaddr.size)) - goto out; - ret = 0; + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; - out: - kfree(data); - return ret; + actor(dev, edata.data); + return 0; +} + +static int ethtool_set_value(struct net_device *dev, char __user *useraddr, + int (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return actor(dev, edata.data); } /* The main entry point in this file. Called from net/core/dev.c */ -int dev_ethtool(struct ifreq *ifr) +int dev_ethtool(struct net *net, struct ifreq *ifr) { - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; @@ -821,7 +791,7 @@ int dev_ethtool(struct ifreq *ifr) return -ENODEV; if (!dev->ethtool_ops) - goto ioctl; + return -EOPNOTSUPP; if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; @@ -841,6 +811,8 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: case ETHTOOL_GGSO: + case ETHTOOL_GFLAGS: + case ETHTOOL_GPFLAGS: break; default: if (!capable(CAP_NET_ADMIN)) @@ -873,16 +845,19 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_wol(dev, useraddr); break; case ETHTOOL_GMSGLVL: - rc = ethtool_get_msglevel(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_msglevel); break; case ETHTOOL_SMSGLVL: - rc = ethtool_set_msglevel(dev, useraddr); + rc = ethtool_set_value_void(dev, useraddr, + dev->ethtool_ops->set_msglevel); break; case ETHTOOL_NWAY_RST: rc = ethtool_nway_reset(dev); break; case ETHTOOL_GLINK: - rc = ethtool_get_link(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_link); break; case ETHTOOL_GEEPROM: rc = ethtool_get_eeprom(dev, useraddr); @@ -909,25 +884,36 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_pauseparam(dev, useraddr); break; case ETHTOOL_GRXCSUM: - rc = ethtool_get_rx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_rx_csum); break; case ETHTOOL_GTXCSUM: - rc = ethtool_get_tx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_tx_csum ? + dev->ethtool_ops->get_tx_csum : + ethtool_op_get_tx_csum)); break; case ETHTOOL_STXCSUM: rc = ethtool_set_tx_csum(dev, useraddr); break; case ETHTOOL_GSG: - rc = ethtool_get_sg(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_sg ? + dev->ethtool_ops->get_sg : + ethtool_op_get_sg)); break; case ETHTOOL_SSG: rc = ethtool_set_sg(dev, useraddr); break; case ETHTOOL_GTSO: - rc = ethtool_get_tso(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_tso ? + dev->ethtool_ops->get_tso : + ethtool_op_get_tso)); break; case ETHTOOL_STSO: rc = ethtool_set_tso(dev, useraddr); @@ -948,7 +934,10 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_get_perm_addr(dev, useraddr); break; case ETHTOOL_GUFO: - rc = ethtool_get_ufo(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + (dev->ethtool_ops->get_ufo ? + dev->ethtool_ops->get_ufo : + ethtool_op_get_ufo)); break; case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); @@ -959,8 +948,24 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SGSO: rc = ethtool_set_gso(dev, useraddr); break; + case ETHTOOL_GFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_flags); + break; + case ETHTOOL_SFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_flags); + break; + case ETHTOOL_GPFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_priv_flags); + break; + case ETHTOOL_SPFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_priv_flags); + break; default: - rc = -EOPNOTSUPP; + rc = -EOPNOTSUPP; } if (dev->ethtool_ops->complete) @@ -970,20 +975,9 @@ int dev_ethtool(struct ifreq *ifr) netdev_features_change(dev); return rc; - - ioctl: - /* Keep existing behaviour for the moment. */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (dev->do_ioctl) - return dev->do_ioctl(dev, ifr, SIOCETHTOOL); - return -EOPNOTSUPP; } -EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); -EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); EXPORT_SYMBOL(ethtool_op_get_tx_csum); @@ -994,3 +988,5 @@ EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); +EXPORT_SYMBOL(ethtool_op_set_flags); +EXPORT_SYMBOL(ethtool_op_get_flags); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8c5474e1668..13de6f53f09 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -11,6 +11,8 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> +#include <net/net_namespace.h> +#include <net/sock.h> #include <net/fib_rules.h> static LIST_HEAD(rules_ops); @@ -82,7 +84,7 @@ static void cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; - list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); fib_rule_put(rule); } @@ -137,7 +139,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, rcu_read_lock(); - list_for_each_entry_rcu(rule, ops->rules_list, list) { + list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped: if (!fib_rule_match(rule, ops, fl, flags)) continue; @@ -197,6 +199,7 @@ errout: static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -234,7 +237,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = -1; nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); + dev = __dev_get_by_name(net, rule->ifname); if (dev) rule->ifindex = dev->ifindex; } @@ -268,7 +271,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (rule->target <= rule->pref) goto errout_free; - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref == rule->target) { rule->ctarget = r; break; @@ -284,7 +287,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout_free; - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; last = r; @@ -297,7 +300,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) * There are unresolved goto rules in the list, check if * any of them are pointing to this new rule. */ - list_for_each_entry(r, ops->rules_list, list) { + list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && r->target == rule->pref) { BUG_ON(r->ctarget != NULL); @@ -317,7 +320,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (last) list_add_rcu(&rule->list, &last->list); else - list_add_rcu(&rule->list, ops->rules_list); + list_add_rcu(&rule->list, &ops->rules_list); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); flush_route_cache(ops); @@ -356,7 +359,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; - list_for_each_entry(rule, ops->rules_list, list) { + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -399,7 +402,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) * actually been added. */ if (ops->nr_goto_rules > 0) { - list_for_each_entry(tmp, ops->rules_list, list) { + list_for_each_entry(tmp, &ops->rules_list, list) { if (tmp->ctarget == rule) { rcu_assign_pointer(tmp->ctarget, NULL); ops->unresolved_rules++; @@ -495,7 +498,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, int idx = 0; struct fib_rule *rule; - list_for_each_entry(rule, ops->rules_list, list) { + list_for_each_entry(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; @@ -596,18 +599,21 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct fib_rules_ops *ops; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); rcu_read_lock(); switch (event) { case NETDEV_REGISTER: list_for_each_entry(ops, &rules_ops, list) - attach_rules(ops->rules_list, dev); + attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: list_for_each_entry(ops, &rules_ops, list) - detach_rules(ops->rules_list, dev); + detach_rules(&ops->rules_list, dev); break; } diff --git a/net/core/flow.c b/net/core/flow.c index 051430545a0..0ab5234b17d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -350,7 +350,7 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cc84d8d8a3c..590a767b029 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -79,27 +79,27 @@ struct gen_estimator { - struct gen_estimator *next; + struct list_head list; struct gnet_stats_basic *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; - unsigned interval; int ewma_log; u64 last_bytes; u32 last_packets; u32 avpps; u32 avbps; + struct rcu_head e_rcu; }; struct gen_estimator_head { struct timer_list timer; - struct gen_estimator *list; + struct list_head list; }; static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; -/* Estimator array lock */ +/* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); static void est_timer(unsigned long arg) @@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) int idx = (int)arg; struct gen_estimator *e; - read_lock(&est_lock); - for (e = elist[idx].list; e; e = e->next) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; u32 npackets; u32 rate; spin_lock(e->stats_lock); + read_lock(&est_lock); + if (e->bstats == NULL) + goto skip; + nbytes = e->bstats->bytes; npackets = e->bstats->packets; rate = (nbytes - e->last_bytes)<<(7 - idx); @@ -125,12 +129,14 @@ static void est_timer(unsigned long arg) e->last_packets = npackets; e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; e->rate_est->pps = (e->avpps+0x1FF)>>10; +skip: + read_unlock(&est_lock); spin_unlock(e->stats_lock); } - if (elist[idx].list != NULL) + if (!list_empty(&elist[idx].list)) mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); - read_unlock(&est_lock); + rcu_read_unlock(); } /** @@ -147,12 +153,17 @@ static void est_timer(unsigned long arg) * &rate_est with the statistics lock grabed during this period. * * Returns 0 on success or a negative error code. + * + * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) + struct gnet_stats_rate_est *rate_est, + spinlock_t *stats_lock, + struct rtattr *opt) { struct gen_estimator *est; struct gnet_estimator *parm = RTA_DATA(opt); + int idx; if (RTA_PAYLOAD(opt) < sizeof(*parm)) return -EINVAL; @@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, if (est == NULL) return -ENOBUFS; - est->interval = parm->interval + 2; + idx = parm->interval + 2; est->bstats = bstats; est->rate_est = rate_est; est->stats_lock = stats_lock; @@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - est->next = elist[est->interval].list; - if (est->next == NULL) { - init_timer(&elist[est->interval].timer); - elist[est->interval].timer.data = est->interval; - elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); - elist[est->interval].timer.function = est_timer; - add_timer(&elist[est->interval].timer); + if (!elist[idx].timer.function) { + INIT_LIST_HEAD(&elist[idx].list); + setup_timer(&elist[idx].timer, est_timer, idx); } - write_lock_bh(&est_lock); - elist[est->interval].list = est; - write_unlock_bh(&est_lock); + + if (list_empty(&elist[idx].list)) + mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); + + list_add_rcu(&est->list, &elist[idx].list); return 0; } +static void __gen_kill_estimator(struct rcu_head *head) +{ + struct gen_estimator *e = container_of(head, + struct gen_estimator, e_rcu); + kfree(e); +} + /** * gen_kill_estimator - remove a rate estimator * @bstats: basic statistics @@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, * * Removes the rate estimator specified by &bstats and &rate_est * and deletes the timer. + * + * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est) { int idx; - struct gen_estimator *est, **pest; + struct gen_estimator *e, *n; for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - int killed = 0; - pest = &elist[idx].list; - while ((est=*pest) != NULL) { - if (est->rate_est != rate_est || est->bstats != bstats) { - pest = &est->next; + + /* Skip non initialized indexes */ + if (!elist[idx].timer.function) + continue; + + list_for_each_entry_safe(e, n, &elist[idx].list, list) { + if (e->rate_est != rate_est || e->bstats != bstats) continue; - } write_lock_bh(&est_lock); - *pest = est->next; + e->bstats = NULL; write_unlock_bh(&est_lock); - kfree(est); - killed++; + list_del_rcu(&e->list); + call_rcu(&e->e_rcu, __gen_kill_estimator); } - if (killed && elist[idx].list == NULL) - del_timer(&elist[idx].timer); } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9df26a07f06..c52df858d0b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -25,6 +25,7 @@ #include <linux/sysctl.h> #endif #include <linux/times.h> +#include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/sock.h> @@ -33,6 +34,7 @@ #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> +#include <linux/log2.h> #define NEIGH_DEBUG 1 @@ -54,9 +56,8 @@ #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(unsigned long arg); -#ifdef CONFIG_ARPD -static void neigh_app_notify(struct neighbour *n); -#endif +static void __neigh_notify(struct neighbour *n, int type, int flags); +static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); @@ -104,6 +105,15 @@ static int neigh_blackhole(struct sk_buff *skb) return -ENETDOWN; } +static void neigh_cleanup_and_release(struct neighbour *neigh) +{ + if (neigh->parms->neigh_cleanup) + neigh->parms->neigh_cleanup(neigh); + + __neigh_notify(neigh, RTM_DELNEIGH, 0); + neigh_release(neigh); +} + /* * It is random distribution in the interval (1/2)*base...(3/2)*base. * It corresponds to default IPv6 settings and is not overridable, @@ -140,9 +150,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) n->dead = 1; shrunk = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -213,9 +221,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) NEIGH_PRINTK2("neigh %p is stray.\n", n); } write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); } } } @@ -311,7 +317,7 @@ static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(new_entries & (new_entries - 1)); + BUG_ON(!is_power_of_2(new_entries)); new_hash = neigh_hash_alloc(new_entries); if (!new_hash) return; @@ -676,9 +682,7 @@ static void neigh_periodic_timer(unsigned long arg) *np = n->next; n->dead = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -827,13 +831,10 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + neigh_update_notify(neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif neigh_release(neigh); } @@ -896,8 +897,8 @@ out_unlock_bh: static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; - void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = - neigh->dev->header_cache_update; + void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) + = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { @@ -1062,11 +1063,8 @@ out: write_unlock_bh(&neigh->lock); if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif + neigh_update_notify(neigh); + return err; } @@ -1097,7 +1095,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; - if (dev->hard_header_cache(n, hh)) { + + if (dev->header_ops->cache(n, hh)) { kfree(hh); hh = NULL; } else { @@ -1127,10 +1126,9 @@ int neigh_compat_output(struct sk_buff *skb) __skb_pull(skb, skb_network_offset(skb)); - if (dev->hard_header && - dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, - skb->len) < 0 && - dev->rebuild_header(skb)) + if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, + skb->len) < 0 && + dev->header_ops->rebuild(skb)) return 0; return dev_queue_xmit(skb); @@ -1152,17 +1150,17 @@ int neigh_resolve_output(struct sk_buff *skb) if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; - if (dev->hard_header_cache && !dst->hh) { + if (dev->header_ops->cache && !dst->hh) { write_lock_bh(&neigh->lock); if (!dst->hh) neigh_hh_init(neigh, dst, dst->ops->protocol); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); write_unlock_bh(&neigh->lock); } else { read_lock_bh(&neigh->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); read_unlock_bh(&neigh->lock); } if (err >= 0) @@ -1193,8 +1191,8 @@ int neigh_connected_output(struct sk_buff *skb) __skb_pull(skb, skb_network_offset(skb)); read_lock_bh(&neigh->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); read_unlock_bh(&neigh->lock); if (err >= 0) err = neigh->ops->queue_xmit(skb); @@ -1347,13 +1345,13 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->kmem_cachep = kmem_cache_create(tbl->id, tbl->entry_size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); tbl->pde->proc_fops = &neigh_stat_seq_fops; @@ -1443,6 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl) static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1458,7 +1457,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1508,6 +1507,7 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1524,7 +1524,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1999,6 +1999,11 @@ nla_put_failure: return -EMSGSIZE; } +static void neigh_update_notify(struct neighbour *neigh) +{ + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + __neigh_notify(neigh, RTM_NEWNEIGH, 0); +} static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) @@ -2094,11 +2099,8 @@ void __neigh_for_each_release(struct neigh_table *tbl, } else np = &n->next; write_unlock(&n->lock); - if (release) { - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); - } + if (release) + neigh_cleanup_and_release(n); } } } @@ -2421,7 +2423,6 @@ static const struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_ARPD static inline size_t neigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) @@ -2453,16 +2454,11 @@ errout: rtnl_set_sk_err(RTNLGRP_NEIGH, err); } +#ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } - -static void neigh_app_notify(struct neighbour *n) -{ - __neigh_notify(n, RTM_NEWNEIGH, 0); -} - #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5c19b0646d7..6628e457ddc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -18,6 +18,7 @@ #include <linux/wireless.h> #include <net/iw_handler.h> +#ifdef CONFIG_SYSFS static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; @@ -216,20 +217,6 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } -NETDEVICE_SHOW(weight, fmt_dec); - -static int change_weight(struct net_device *net, unsigned long new_weight) -{ - net->weight = new_weight; - return 0; -} - -static ssize_t store_weight(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) -{ - return netdev_store(dev, attr, buf, len, change_weight); -} - static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(iflink, S_IRUGO, show_iflink, NULL), @@ -246,7 +233,6 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), {} }; @@ -407,29 +393,25 @@ static struct attribute_group wireless_group = { }; #endif +#endif /* CONFIG_SYSFS */ + #ifdef CONFIG_HOTPLUG -static int netdev_uevent(struct device *d, char **envp, - int num_envp, char *buf, int size) +static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { struct net_device *dev = to_net_dev(d); - int retval, len = 0, i = 0; + int retval; /* pass interface to uevent. */ - retval = add_uevent_var(envp, num_envp, &i, - buf, size, &len, - "INTERFACE=%s", dev->name); + retval = add_uevent_var(env, "INTERFACE=%s", dev->name); if (retval) goto exit; /* pass ifindex to uevent. * ifindex is useful as it won't change (interface name may change) * and is what RtNetlink uses natively. */ - retval = add_uevent_var(envp, num_envp, &i, - buf, size, &len, - "IFINDEX=%d", dev->ifindex); + retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex); exit: - envp[i] = NULL; return retval; } #endif @@ -450,7 +432,9 @@ static void netdev_release(struct device *d) static struct class net_class = { .name = "net", .dev_release = netdev_release, +#ifdef CONFIG_SYSFS .dev_attrs = net_class_attributes, +#endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, #endif @@ -459,7 +443,7 @@ static struct class net_class = { /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -void netdev_unregister_sysfs(struct net_device * net) +void netdev_unregister_kobject(struct net_device * net) { struct device *dev = &(net->dev); @@ -468,7 +452,7 @@ void netdev_unregister_sysfs(struct net_device * net) } /* Create sysfs entries for network device. */ -int netdev_register_sysfs(struct net_device *net) +int netdev_register_kobject(struct net_device *net) { struct device *dev = &(net->dev); struct attribute_group **groups = net->sysfs_groups; @@ -481,6 +465,7 @@ int netdev_register_sysfs(struct net_device *net) BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); +#ifdef CONFIG_SYSFS if (net->get_stats) *groups++ = &netstat_group; @@ -488,11 +473,12 @@ int netdev_register_sysfs(struct net_device *net) if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif +#endif /* CONFIG_SYSFS */ return device_add(dev); } -int netdev_sysfs_init(void) +int netdev_kobject_init(void) { return class_register(&net_class); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c new file mode 100644 index 00000000000..6f71db8c442 --- /dev/null +++ b/net/core/net_namespace.c @@ -0,0 +1,317 @@ +#include <linux/workqueue.h> +#include <linux/rtnetlink.h> +#include <linux/cache.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <net/net_namespace.h> + +/* + * Our network namespace constructor/destructor lists + */ + +static LIST_HEAD(pernet_list); +static struct list_head *first_device = &pernet_list; +static DEFINE_MUTEX(net_mutex); + +LIST_HEAD(net_namespace_list); + +static struct kmem_cache *net_cachep; + +struct net init_net; +EXPORT_SYMBOL_GPL(init_net); + +static struct net *net_alloc(void) +{ + return kmem_cache_zalloc(net_cachep, GFP_KERNEL); +} + +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + rtnl_lock(); + list_del(&net->list); + rtnl_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +/* + * setup_net runs the initializers for the network namespace object. + */ +static int setup_net(struct net *net) +{ + /* Must be called with net_mutex held */ + struct pernet_operations *ops; + int error; + + atomic_set(&net->count, 1); + atomic_set(&net->use_count, 0); + + error = 0; + list_for_each_entry(ops, &pernet_list, list) { + if (ops->init) { + error = ops->init(net); + if (error < 0) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* Walk through the list backwards calling the exit functions + * for the pernet modules whose init functions did not fail. + */ + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + goto out; +} + +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + struct net *new_net = NULL; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return old_net; + +#ifndef CONFIG_NET_NS + return ERR_PTR(-EINVAL); +#endif + + err = -ENOMEM; + new_net = net_alloc(); + if (!new_net) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + rtnl_lock(); + list_add_tail(&new_net->list, &net_namespace_list); + rtnl_unlock(); + + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + if (err) { + net_free(new_net); + new_net = ERR_PTR(err); + } + return new_net; +} + +static int __init net_ns_init(void) +{ + int err; + + printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); + net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), + SMP_CACHE_BYTES, + SLAB_PANIC, NULL); + mutex_lock(&net_mutex); + err = setup_net(&init_net); + + rtnl_lock(); + list_add_tail(&init_net.list, &net_namespace_list); + rtnl_unlock(); + + mutex_unlock(&net_mutex); + if (err) + panic("Could not setup the initial network namespace"); + + return 0; +} + +pure_initcall(net_ns_init); + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + struct net *net, *undo_net; + int error; + + error = 0; + list_add_tail(&ops->list, list); + for_each_net(net) { + if (ops->init) { + error = ops->init(net); + if (error) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* If I have an error cleanup all namespaces I initialized */ + list_del(&ops->list); + for_each_net(undo_net) { + if (undo_net == net) + goto undone; + if (ops->exit) + ops->exit(undo_net); + } +undone: + goto out; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + struct net *net; + + list_del(&ops->list); + for_each_net(net) + if (ops->exit) + ops->exit(net); +} + +/** + * register_pernet_subsys - register a network namespace subsystem + * @ops: pernet operations structure for the subsystem + * + * Register a subsystem which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_subsys(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(first_device, ops); + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_subsys); + +/** + * unregister_pernet_subsys - unregister a network namespace subsystem + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_subsys(struct pernet_operations *module) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(module); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_subsys); + +/** + * register_pernet_device - register a network namespace device + * @ops: pernet operations structure for the subsystem + * + * Register a device which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_device(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(&pernet_list, ops); + if (!error && (first_device == &pernet_list)) + first_device = &ops->list; + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_device); + +/** + * unregister_pernet_device - unregister a network namespace netdevice + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_device(struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index de1b26aa572..95daba62496 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, static void poll_napi(struct netpoll *np) { struct netpoll_info *npinfo = np->dev->npinfo; + struct napi_struct *napi; int budget = 16; - if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - npinfo->poll_owner != smp_processor_id() && - spin_trylock(&npinfo->poll_lock)) { - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); + list_for_each_entry(napi, &np->dev->napi_list, dev_list) { + if (test_bit(NAPI_STATE_SCHED, &napi->state) && + napi->poll_owner != smp_processor_id() && + spin_trylock(&napi->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; + atomic_inc(&trapped); - np->dev->poll(np->dev, &budget); + napi->poll(napi, budget); - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&npinfo->poll_lock); + atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&napi->poll_lock); + } } } @@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np) /* Process pending work on NIC */ np->dev->poll_controller(np->dev); - if (np->dev->poll) + if (!list_empty(&np->dev->napi_list)) poll_napi(np); service_arp_queue(np->dev->npinfo); @@ -233,6 +236,17 @@ repeat: return skb; } +static int netpoll_owner_active(struct net_device *dev) +{ + struct napi_struct *napi; + + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (napi->poll_owner == smp_processor_id()) + return 1; + } + return 0; +} + static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; @@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } /* don't get messages out of order, and no recursion */ - if (skb_queue_len(&npinfo->txq) == 0 && - npinfo->poll_owner != smp_processor_id()) { + if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { unsigned long flags; local_irq_save(flags); @@ -402,11 +415,9 @@ static void arp_reply(struct sk_buff *skb) send_skb->protocol = htons(ETH_P_ARP); /* Fill the device header for the ARP frame */ - - if (np->dev->hard_header && - np->dev->hard_header(send_skb, skb->dev, ptype, - sha, np->local_mac, - send_skb->len) < 0) { + if (dev_hard_header(send_skb, skb->dev, ptype, + sha, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -519,6 +530,23 @@ out: return 0; } +void netpoll_print_options(struct netpoll *np) +{ + DECLARE_MAC_BUF(mac); + printk(KERN_INFO "%s: local port %d\n", + np->name, np->local_port); + printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->local_ip)); + printk(KERN_INFO "%s: interface %s\n", + np->name, np->dev_name); + printk(KERN_INFO "%s: remote port %d\n", + np->name, np->remote_port); + printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->remote_ip)); + printk(KERN_INFO "%s: remote ethernet address %s\n", + np->name, print_mac(mac, np->remote_mac)); +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; @@ -531,7 +559,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur = delim; } cur++; - printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) @@ -539,9 +566,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) *delim = 0; np->local_ip = ntohl(in_aton(cur)); cur = delim; - - printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->local_ip)); } cur++; @@ -555,8 +579,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } cur++; - printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) @@ -566,7 +588,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur = delim; } cur++; - printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) @@ -575,9 +596,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->remote_ip = ntohl(in_aton(cur)); cur = delim + 1; - printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); - if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) @@ -608,15 +626,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->remote_mac[5] = simple_strtol(cur, NULL, 16); } - printk(KERN_INFO "%s: remote ethernet address " - "%02x:%02x:%02x:%02x:%02x:%02x\n", - np->name, - np->remote_mac[0], - np->remote_mac[1], - np->remote_mac[2], - np->remote_mac[3], - np->remote_mac[4], - np->remote_mac[5]); + netpoll_print_options(np); return 0; @@ -635,7 +645,7 @@ int netpoll_setup(struct netpoll *np) int err; if (np->dev_name) - ndev = dev_get_by_name(np->dev_name); + ndev = dev_get_by_name(&init_net, np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); @@ -652,8 +662,6 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_flags = 0; npinfo->rx_np = NULL; - spin_lock_init(&npinfo->poll_lock); - npinfo->poll_owner = -1; spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); @@ -820,6 +828,7 @@ void netpoll_set_trap(int trap) EXPORT_SYMBOL(netpoll_set_trap); EXPORT_SYMBOL(netpoll_trap); +EXPORT_SYMBOL(netpoll_print_options); EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index bca787fdbc5..2100c734b10 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -111,6 +111,9 @@ * * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com> * + * Fixed src_mac command to set source mac of packet to value specified in + * command by Adit Ranadive <adit.262@gmail.com> + * */ #include <linux/sys.h> #include <linux/types.h> @@ -149,6 +152,7 @@ #include <linux/wait.h> #include <linux/etherdevice.h> #include <linux/kthread.h> +#include <net/net_namespace.h> #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -164,7 +168,7 @@ #include <asm/div64.h> /* do_div */ #include <asm/timex.h> -#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" /* The buckets are exponential in 'width' */ #define LAT_BUCKETS_MAX 32 @@ -186,6 +190,7 @@ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ #define F_FLOW_SEQ (1<<11) /* Sequential flows */ #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ +#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -328,6 +333,7 @@ struct pktgen_dev { __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; + __u16 cur_queue_map; __u32 cur_pkt_size; __u8 hh[14]; @@ -355,6 +361,10 @@ struct pktgen_dev { unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ unsigned curfl; /* current sequenced flow (state)*/ + + u16 queue_map_min; + u16 queue_map_max; + #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ @@ -375,12 +385,10 @@ struct pktgen_thread { struct list_head th_list; struct task_struct *tsk; char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ /* Field for thread to receive "posted" events terminate, stop ifs etc. */ u32 control; - int pid; int cpu; wait_queue_head_t queue; @@ -567,7 +575,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, pktgen_run_all_threads(); else - printk("pktgen: Unknown command: %s\n", data); + printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); err = count; @@ -591,11 +599,11 @@ static const struct file_operations pktgen_fops = { static int pktgen_if_show(struct seq_file *seq, void *v) { - int i; struct pktgen_dev *pkt_dev = seq->private; __u64 sa; __u64 stopped; __u64 now = getCurUs(); + DECLARE_MAC_BUF(mac); seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", @@ -611,6 +619,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, + " queue_map_min: %u queue_map_max: %u\n", + pkt_dev->queue_map_min, + pkt_dev->queue_map_max); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -635,19 +648,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " src_mac: "); - if (is_zero_ether_addr(pkt_dev->src_mac)) - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], - i == 5 ? " " : ":"); - else - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], - i == 5 ? " " : ":"); + seq_printf(seq, "%s ", + print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ? + pkt_dev->odev->dev_addr : pkt_dev->src_mac)); seq_printf(seq, "dst_mac: "); - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], - i == 5 ? "\n" : ":"); + seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac)); seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", @@ -707,6 +713,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->flags & F_QUEUE_MAP_RND) + seq_printf(seq, "QUEUE_MAP_RND "); + if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ @@ -762,6 +771,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) @@ -908,14 +919,14 @@ static ssize_t pktgen_if_write(struct file *file, pg_result = &(pkt_dev->result[0]); if (count < 1) { - printk("pktgen: wrong command format\n"); + printk(KERN_WARNING "pktgen: wrong command format\n"); return -EINVAL; } max = count - i; tmp = count_trail_chars(&user_buffer[i], max); if (tmp < 0) { - printk("pktgen: illegal format\n"); + printk(KERN_WARNING "pktgen: illegal format\n"); return tmp; } i += tmp; @@ -943,7 +954,7 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(tb, user_buffer, count)) return -EFAULT; tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, + printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, (unsigned long)count, tb); } @@ -1213,6 +1224,11 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "FLOW_SEQ") == 0) pkt_dev->flags |= F_FLOW_SEQ; + else if (strcmp(f, "QUEUE_MAP_RND") == 0) + pkt_dev->flags |= F_QUEUE_MAP_RND; + + else if (strcmp(f, "!QUEUE_MAP_RND") == 0) + pkt_dev->flags &= ~F_QUEUE_MAP_RND; #ifdef CONFIG_XFRM else if (strcmp(f, "IPSEC") == 0) pkt_dev->flags |= F_IPSEC_ON; @@ -1248,7 +1264,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_min; } if (debug) - printk("pktgen: dst_min set to: %s\n", + printk(KERN_DEBUG "pktgen: dst_min set to: %s\n", pkt_dev->dst_min); i += len; sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); @@ -1271,7 +1287,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_max; } if (debug) - printk("pktgen: dst_max set to: %s\n", + printk(KERN_DEBUG "pktgen: dst_max set to: %s\n", pkt_dev->dst_max); i += len; sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); @@ -1294,7 +1310,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); if (debug) - printk("pktgen: dst6 set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6=%s", buf); @@ -1317,7 +1333,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr); if (debug) - printk("pktgen: dst6_min set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_min=%s", buf); @@ -1338,7 +1354,7 @@ static ssize_t pktgen_if_write(struct file *file, fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); if (debug) - printk("pktgen: dst6_max set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_max=%s", buf); @@ -1361,7 +1377,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); if (debug) - printk("pktgen: src6 set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: src6=%s", buf); @@ -1382,7 +1398,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_min; } if (debug) - printk("pktgen: src_min set to: %s\n", + printk(KERN_DEBUG "pktgen: src_min set to: %s\n", pkt_dev->src_min); i += len; sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); @@ -1403,7 +1419,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_max; } if (debug) - printk("pktgen: src_max set to: %s\n", + printk(KERN_DEBUG "pktgen: src_max set to: %s\n", pkt_dev->src_max); i += len; sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); @@ -1452,8 +1468,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_mac")) { char *v = valstr; + unsigned char old_smac[ETH_ALEN]; unsigned char *m = pkt_dev->src_mac; + memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); + len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { return len; @@ -1482,6 +1501,10 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Src MAC */ + if (compare_ether_addr(old_smac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } @@ -1517,23 +1540,47 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "queue_map_min")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_min = value; + sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); + return count; + } + + if (!strcmp(name, "queue_map_max")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_max = value; + sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); + return count; + } + if (!strcmp(name, "mpls")) { - unsigned n, offset; + unsigned n, cnt; + len = get_labels(&user_buffer[i], pkt_dev); - if (len < 0) { return len; } + if (len < 0) + return len; i += len; - offset = sprintf(pg_result, "OK: mpls="); + cnt = sprintf(pg_result, "OK: mpls="); for (n = 0; n < pkt_dev->nr_labels; n++) - offset += sprintf(pg_result + offset, - "%08x%s", ntohl(pkt_dev->labels[n]), - n == pkt_dev->nr_labels-1 ? "" : ","); + cnt += sprintf(pg_result + cnt, + "%08x%s", ntohl(pkt_dev->labels[n]), + n == pkt_dev->nr_labels-1 ? "" : ","); if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) { pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */ pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN auto turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n"); } return count; } @@ -1548,10 +1595,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->vlan_id = value; /* turn on VLAN */ if (debug) - printk("pktgen: VLAN turned on\n"); + printk(KERN_DEBUG "pktgen: VLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk("pktgen: MPLS auto turned off\n"); + printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id); @@ -1560,7 +1607,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); } return count; } @@ -1605,10 +1652,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = value; /* turn on SVLAN */ if (debug) - printk("pktgen: SVLAN turned on\n"); + printk(KERN_DEBUG "pktgen: SVLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk("pktgen: MPLS auto turned off\n"); + printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id); @@ -1617,7 +1664,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); } return count; } @@ -1709,9 +1756,6 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->tsk->comm, t->max_before_softirq); - seq_printf(seq, "Running: "); if_lock(t); @@ -1744,7 +1788,6 @@ static ssize_t pktgen_thread_write(struct file *file, int i = 0, max, len, ret; char name[40]; char *pg_result; - unsigned long value = 0; if (count < 1) { // sprintf(pg_result, "Wrong command format"); @@ -1777,10 +1820,11 @@ static ssize_t pktgen_thread_write(struct file *file, i += len; if (debug) - printk("pktgen: t=%s, count=%lu\n", name, (unsigned long)count); + printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n", + name, (unsigned long)count); if (!t) { - printk("pktgen: ERROR: No thread\n"); + printk(KERN_ERR "pktgen: ERROR: No thread\n"); ret = -EINVAL; goto out; } @@ -1817,12 +1861,8 @@ static ssize_t pktgen_thread_write(struct file *file, } if (!strcmp(name, "max_before_softirq")) { - len = num_arg(&user_buffer[i], 10, &value); - mutex_lock(&pktgen_thread_lock); - t->max_before_softirq = value; - mutex_unlock(&pktgen_thread_lock); + sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use"); ret = count; - sprintf(pg_result, "OK: max_before_softirq=%lu", value); goto out; } @@ -1891,8 +1931,8 @@ static void pktgen_mark_device(const char *ifname) mutex_lock(&pktgen_thread_lock); if (++i >= max_tries) { - printk("pktgen_mark_device: timed out after waiting " - "%d msec for device %s to be removed\n", + printk(KERN_ERR "pktgen_mark_device: timed out after " + "waiting %d msec for device %s to be removed\n", msec_per_try * i, ifname); break; } @@ -1930,6 +1970,9 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ @@ -1960,17 +2003,17 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = dev_get_by_name(ifname); + odev = dev_get_by_name(&init_net, ifname); if (!odev) { - printk("pktgen: no such netdevice: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; } if (odev->type != ARPHRD_ETHER) { - printk("pktgen: not an ethernet device: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { - printk("pktgen: device is down: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; @@ -1987,7 +2030,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { if (!pkt_dev->odev) { - printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); + printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " + "setup_inject.\n"); sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); return; @@ -2049,7 +2093,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } rcu_read_unlock(); if (err) - printk("pktgen: ERROR: IPv6 link address not availble.\n"); + printk(KERN_ERR "pktgen: ERROR: IPv6 link " + "address not availble.\n"); } #endif } else { @@ -2099,7 +2144,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) if (spin_until_us - now > jiffies_to_usecs(1) + 1) schedule_timeout_interruptible(1); else if (spin_until_us - now > 100) { - do_softirq(); if (!pkt_dev->running) return; if (need_resched()) @@ -2156,8 +2200,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) /* If there was already an IPSEC SA, we keep it as is, else * we go look for it ... */ -inline -void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) +static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { @@ -2376,6 +2419,20 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } + pkt_dev->flows[flow].count++; } @@ -2441,7 +2498,8 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (nhead >0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); if (ret < 0) { - printk("Error expanding ipsec packet %d\n",ret); + printk(KERN_ERR "Error expanding " + "ipsec packet %d\n",ret); return 0; } } @@ -2450,7 +2508,8 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, skb_pull(skb, ETH_HLEN); ret = pktgen_output_ipsec(skb, pkt_dev); if (ret) { - printk("Error creating ipsec packet %d\n",ret); + printk(KERN_ERR "Error creating ipsec " + "packet %d\n",ret); kfree_skb(skb); return 0; } @@ -2544,6 +2603,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb->queue_mapping = pkt_dev->cur_queue_map; iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2673,6 +2733,7 @@ static unsigned int scan_ip6(const char *s, char ip[16]) unsigned int prefixlen = 0; unsigned int suffixlen = 0; __be32 tmp; + char *pos; for (i = 0; i < 16; i++) ip[i] = 0; @@ -2687,12 +2748,9 @@ static unsigned int scan_ip6(const char *s, char ip[16]) } s++; } - { - char *tmp; - u = simple_strtoul(s, &tmp, 16); - i = tmp - s; - } + u = simple_strtoul(s, &pos, 16); + i = pos - s; if (!i) return 0; if (prefixlen == 12 && s[i] == '.') { @@ -2720,11 +2778,9 @@ static unsigned int scan_ip6(const char *s, char ip[16]) len++; } else if (suffixlen != 0) break; - { - char *tmp; - u = simple_strtol(s, &tmp, 16); - i = tmp - s; - } + + u = simple_strtol(s, &pos, 16); + i = pos - s; if (!i) { if (*s) len--; @@ -2885,6 +2941,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb->queue_mapping = pkt_dev->cur_queue_map; iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3184,8 +3241,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { - printk("pktgen: interface: %s is already stopped\n", - pkt_dev->odev->name); + printk(KERN_WARNING "pktgen: interface: %s is already " + "stopped\n", pkt_dev->odev->name); return -EINVAL; } @@ -3327,8 +3384,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } if ((netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) || - need_resched()) { + (pkt_dev->skb && + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping))) || + need_resched()) { idle_start = getCurUs(); if (!netif_running(odev)) { @@ -3360,7 +3418,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = fill_packet(odev, pkt_dev); if (pkt_dev->skb == NULL) { - printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n"); + printk(KERN_ERR "pktgen: ERROR: couldn't " + "allocate skb in fill_packet.\n"); schedule(); pkt_dev->clone_count--; /* back out increment, OOM */ goto out; @@ -3450,19 +3509,13 @@ static int pktgen_thread_worker(void *arg) struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - u32 max_before_softirq; - u32 tx_since_softirq = 0; BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); - t->pid = current->pid; - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); - max_before_softirq = t->max_before_softirq; - set_current_state(TASK_INTERRUPTIBLE); set_freezable(); @@ -3481,24 +3534,9 @@ static int pktgen_thread_worker(void *arg) __set_current_state(TASK_RUNNING); - if (pkt_dev) { - + if (pkt_dev) pktgen_xmit(pkt_dev); - /* - * We like to stay RUNNING but must also give - * others fair share. - */ - - tx_since_softirq += pkt_dev->last_ok; - - if (tx_since_softirq > max_before_softirq) { - if (local_softirq_pending()) - do_softirq(); - tx_since_softirq = 0; - } - } - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3565,7 +3603,8 @@ static int add_dev_to_thread(struct pktgen_thread *t, if_lock(t); if (pkt_dev->pg_thread) { - printk("pktgen: ERROR: already assigned to a thread.\n"); + printk(KERN_ERR "pktgen: ERROR: already assigned " + "to a thread.\n"); rv = -EBUSY; goto out; } @@ -3590,7 +3629,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev = __pktgen_NN_threads(ifname, FIND); if (pkt_dev) { - printk("pktgen: ERROR: interface already used.\n"); + printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); return -EBUSY; } @@ -3632,7 +3671,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir); if (!pkt_dev->entry) { - printk("pktgen: cannot create %s/%s procfs entry.\n", + printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, ifname); err = -EINVAL; goto out2; @@ -3665,7 +3704,8 @@ static int __init pktgen_create_thread(int cpu) t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { - printk("pktgen: ERROR: out of memory, can't create new thread.\n"); + printk(KERN_ERR "pktgen: ERROR: out of memory, can't " + "create new thread.\n"); return -ENOMEM; } @@ -3678,7 +3718,8 @@ static int __init pktgen_create_thread(int cpu) p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); + printk(KERN_ERR "pktgen: kernel_thread() failed " + "for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); @@ -3688,7 +3729,7 @@ static int __init pktgen_create_thread(int cpu) pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir); if (!pe) { - printk("pktgen: cannot create %s/%s procfs entry.\n", + printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, t->tsk->comm); kthread_stop(p); list_del(&t->th_list); @@ -3727,7 +3768,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); if (pkt_dev->running) { - printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n"); + printk(KERN_WARNING "pktgen: WARNING: trying to remove a " + "running interface, stopping it now.\n"); pktgen_stop_device(pkt_dev); } @@ -3759,18 +3801,18 @@ static int __init pg_init(void) int cpu; struct proc_dir_entry *pe; - printk(version); + printk(KERN_INFO "%s", version); - pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) return -ENODEV; pg_proc_dir->owner = THIS_MODULE; pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir); if (pe == NULL) { - printk("pktgen: ERROR: cannot create %s procfs entry.\n", - PGCTRL); - proc_net_remove(PG_PROC_DIR); + printk(KERN_ERR "pktgen: ERROR: cannot create %s " + "procfs entry.\n", PGCTRL); + proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3785,15 +3827,16 @@ static int __init pg_init(void) err = pktgen_create_thread(cpu); if (err) - printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n", - cpu, err); + printk(KERN_WARNING "pktgen: WARNING: Cannot create " + "thread for cpu %d (%d)\n", cpu, err); } if (list_empty(&pktgen_threads)) { - printk("pktgen: ERROR: Initialization failed for all threads\n"); + printk(KERN_ERR "pktgen: ERROR: Initialization failed for " + "all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -ENODEV; } @@ -3820,7 +3863,7 @@ static void __exit pg_cleanup(void) /* Clean up proc file system */ remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); } module_init(pg_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 864cbdf31ed..1072d16696c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,6 +35,7 @@ #include <linux/security.h> #include <linux/mutex.h> #include <linux/if_addr.h> +#include <linux/nsproxy.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -74,8 +75,6 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { mutex_unlock(&rtnl_mutex); - if (rtnl && rtnl->sk_receive_queue.qlen) - rtnl->sk_data_ready(rtnl, 0); netdev_run_todo(); } @@ -98,7 +97,7 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) } int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, - struct rtattr *rta, int len) + struct rtattr *rta, int len) { if (RTA_PAYLOAD(rta) < len) return -1; @@ -306,10 +305,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net_device *dev, *n; + struct net *net; - for_each_netdev_safe(dev, n) { - if (dev->rtnl_link_ops == ops) - ops->dellink(dev); + for_each_net(net) { + for_each_netdev_safe(net, dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } } list_del(&ops->list); } @@ -634,7 +636,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); NLA_PUT_U8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); @@ -694,12 +695,13 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx; int s_idx = cb->args[0]; struct net_device *dev; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -714,7 +716,7 @@ cont: return skb->len; } -static const struct nla_policy ifla_policy[IFLA_MAX+1] = { +const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -724,6 +726,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, [IFLA_LINKMODE] = { .type = NLA_U8 }, + [IFLA_NET_NS_PID] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -731,12 +734,45 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +static struct net *get_net_ns_by_pid(pid_t pid) +{ + struct task_struct *tsk; + struct net *net; + + /* Lookup the network namespace */ + net = ERR_PTR(-ESRCH); + rcu_read_lock(); + tsk = find_task_by_pid(pid); + if (tsk) { + task_lock(tsk); + if (tsk->nsproxy) + net = get_net(tsk->nsproxy->net_ns); + task_unlock(tsk); + } + rcu_read_unlock(); + return net; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { int send_addr_notify = 0; int err; + if (tb[IFLA_NET_NS_PID]) { + struct net *net; + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + if (IS_ERR(net)) { + err = PTR_ERR(net); + goto errout; + } + err = dev_change_net_namespace(dev, net, ifname); + put_net(net); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; @@ -834,9 +870,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -862,6 +895,7 @@ errout: static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct net_device *dev; int err; @@ -880,9 +914,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(net, ifname); else goto errout; @@ -908,6 +942,7 @@ errout: static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -924,9 +959,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else return -EINVAL; @@ -941,8 +976,52 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return 0; } +struct net_device *rtnl_create_link(struct net *net, char *ifname, + const struct rtnl_link_ops *ops, struct nlattr *tb[]) +{ + int err; + struct net_device *dev; + + err = -ENOMEM; + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + goto err; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + + dev->nd_net = net; + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + return dev; + +err_free: + free_netdev(dev); +err: + return ERR_PTR(err); +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -952,7 +1031,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *linkinfo[IFLA_INFO_MAX+1]; int err; +#ifdef CONFIG_KMOD replay: +#endif err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) return err; @@ -964,9 +1045,9 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (ifname[0]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else dev = NULL; @@ -1051,40 +1132,17 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = alloc_netdev(ops->priv_size, ifname, ops->setup); - if (!dev) - return -ENOMEM; - - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - dev->rtnl_link_ops = ops; - - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) - memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); - if (tb[IFLA_BROADCAST]) - memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), - nla_len(tb[IFLA_BROADCAST])); - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - if (tb[IFLA_LINKMODE]) - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - - if (ops->newlink) + + dev = rtnl_create_link(net, ifname, ops, tb); + + if (IS_ERR(dev)) + err = PTR_ERR(dev); + else if (ops->newlink) err = ops->newlink(dev, tb, data); else err = register_netdevice(dev); -err_free: - if (err < 0) + + if (err < 0 && !IS_ERR(dev)) free_netdev(dev); return err; } @@ -1092,6 +1150,7 @@ err_free: static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -1104,7 +1163,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); if (dev == NULL) return -ENODEV; } else @@ -1253,22 +1312,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return doit(skb, nlh, (void *)&rta_buf[0]); } -static void rtnetlink_rcv(struct sock *sk, int len) +static void rtnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&rtnl_mutex); - netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg); - mutex_unlock(&rtnl_mutex); - - netdev_run_todo(); - } while (qlen); + rtnl_lock(); + netlink_rcv_skb(skb, &rtnetlink_rcv_msg); + rtnl_unlock(); } static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); @@ -1306,8 +1363,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - &rtnl_mutex, THIS_MODULE); + rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); @@ -1333,3 +1390,5 @@ EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); EXPORT_SYMBOL(rtnl_notify); EXPORT_SYMBOL(rtnl_set_sk_err); +EXPORT_SYMBOL(rtnl_create_link); +EXPORT_SYMBOL(ifla_policy); diff --git a/net/core/scm.c b/net/core/scm.c index 44c4ec2c876..530bee8d9ed 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -167,7 +167,8 @@ error: int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { - struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control; + struct cmsghdr __user *cm + = (__force struct cmsghdr __user *)msg->msg_control; struct cmsghdr cmhdr; int cmlen = CMSG_LEN(len); int err; @@ -202,7 +203,8 @@ out: void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { - struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control; + struct cmsghdr __user *cm + = (__force struct cmsghdr __user*)msg->msg_control; int fdmax = 0; int fdnum = scm->fp->count; @@ -222,7 +224,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) if (fdnum < fdmax) fdmax = fdnum; - for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) + for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; + i++, cmfptr++) { int new_fd; err = security_file_receive(fp[i]); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0583e8498f1..944189d9632 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -472,6 +472,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_INET new->sp = secpath_get(old->sp); #endif + new->csum_start = old->csum_start; + new->csum_offset = old->csum_offset; + new->ip_summed = old->ip_summed; new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; @@ -545,8 +548,6 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) skb_reserve(n, headerlen); /* Set the tail pointer and length */ skb_put(n, skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) BUG(); @@ -589,8 +590,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) skb_put(n, skb_headlen(skb)); /* Copy the bytes */ skb_copy_from_linear_data(skb, n->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; n->truesize += skb->data_len; n->data_len = skb->data_len; @@ -686,6 +685,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->transport_header += off; skb->network_header += off; skb->mac_header += off; + skb->csum_start += off; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -734,9 +734,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. - * - * BUG ALERT: ip_summed is not copied. Why does this work? Is it used - * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, @@ -749,7 +746,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, gfp_mask); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; - int off = 0; + int off; if (!n) return NULL; @@ -773,12 +770,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); -#ifdef NET_SKBUFF_DATA_USES_OFFSET off = newheadroom - oldheadroom; -#endif + n->csum_start += off; +#ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; n->mac_header += off; +#endif return n; } @@ -2021,13 +2019,13 @@ void __init skb_init(void) sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); } /** diff --git a/net/core/sock.c b/net/core/sock.c index 091032a250c..4ed9b507c1e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -119,6 +119,7 @@ #include <linux/netdevice.h> #include <net/protocol.h> #include <linux/skbuff.h> +#include <net/net_namespace.h> #include <net/request_sock.h> #include <net/sock.h> #include <net/xfrm.h> @@ -171,6 +172,20 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_MAX" }; +static const char *af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , + "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", + "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , + "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , + "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , + "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , + "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , + "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , + "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , + "clock-27" , "clock-28" , "clock-29" , + "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , + "clock-AF_RXRPC" , "clock-AF_MAX" +}; #endif /* @@ -217,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", - current->comm, current->pid); + current->comm, current->pid); return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -348,6 +363,62 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); +static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + struct net *net = sk->sk_net; + char devname[IFNAMSIZ]; + int index; + + /* Sorry... */ + ret = -EPERM; + if (!capable(CAP_NET_RAW)) + goto out; + + ret = -EINVAL; + if (optlen < 0) + goto out; + + /* Bind this socket to a particular device like "eth0", + * as specified in the passed interface name. If the + * name is "" or the option length is zero the socket + * is not bound. + */ + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + memset(devname, 0, sizeof(devname)); + + ret = -EFAULT; + if (copy_from_user(devname, optval, optlen)) + goto out; + + if (devname[0] == '\0') { + index = 0; + } else { + struct net_device *dev = dev_get_by_name(net, devname); + + ret = -ENODEV; + if (!dev) + goto out; + + index = dev->ifindex; + dev_put(dev); + } + + lock_sock(sk); + sk->sk_bound_dev_if = index; + sk_dst_reset(sk); + release_sock(sk); + + ret = 0; + +out: +#endif + + return ret; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -376,6 +447,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } #endif + if (optname == SO_BINDTODEVICE) + return sock_bindtodevice(sk, optval, optlen); + if (optlen < sizeof(int)) return -EINVAL; @@ -564,54 +638,6 @@ set_rcvbuf: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); break; -#ifdef CONFIG_NETDEVICES - case SO_BINDTODEVICE: - { - char devname[IFNAMSIZ]; - - /* Sorry... */ - if (!capable(CAP_NET_RAW)) { - ret = -EPERM; - break; - } - - /* Bind this socket to a particular device like "eth0", - * as specified in the passed interface name. If the - * name is "" or the option length is zero the socket - * is not bound. - */ - - if (!valbool) { - sk->sk_bound_dev_if = 0; - } else { - if (optlen > IFNAMSIZ - 1) - optlen = IFNAMSIZ - 1; - memset(devname, 0, sizeof(devname)); - if (copy_from_user(devname, optval, optlen)) { - ret = -EFAULT; - break; - } - - /* Remove any cached route for this socket. */ - sk_dst_reset(sk); - - if (devname[0] == '\0') { - sk->sk_bound_dev_if = 0; - } else { - struct net_device *dev = dev_get_by_name(devname); - if (!dev) { - ret = -ENODEV; - break; - } - sk->sk_bound_dev_if = dev->ifindex; - dev_put(dev); - } - } - break; - } -#endif - - case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { @@ -848,7 +874,7 @@ static inline void sock_lock_init(struct sock *sk) * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, gfp_t priority, +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -869,6 +895,7 @@ struct sock *sk_alloc(int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); + sk->sk_net = get_net(net); } if (security_sk_alloc(sk, family, priority)) @@ -908,6 +935,7 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + put_net(sk->sk_net); if (sk->sk_prot_creator->slab != NULL) kmem_cache_free(sk->sk_prot_creator->slab, sk); else @@ -917,7 +945,7 @@ void sk_free(struct sock *sk) struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); if (newsk != NULL) { struct sk_filter *filter; @@ -941,8 +969,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family); + lockdep_set_class_and_name(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family, + af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1530,8 +1559,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); - lockdep_set_class(&sk->sk_callback_lock, - af_callback_keys + sk->sk_family); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1559,9 +1589,9 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (sk->sk_lock.owner) + if (sk->sk_lock.owned) __lock_sock(sk); - sk->sk_lock.owner = (void *)1; + sk->sk_lock.owned = 1; spin_unlock(&sk->sk_lock.slock); /* * The sk_lock has mutex_lock() semantics here: @@ -1582,7 +1612,7 @@ void fastcall release_sock(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); - sk->sk_lock.owner = NULL; + sk->sk_lock.owned = 0; if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); @@ -1752,7 +1782,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", @@ -1770,7 +1800,7 @@ int proto_register(struct proto *prot, int alloc_slab) sprintf(request_sock_slab_name, mask, prot->name); prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->rsk_prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", @@ -1792,7 +1822,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } @@ -1947,7 +1977,7 @@ static const struct file_operations proto_seq_fops = { static int __init proto_init(void) { /* register /proc/net/protocols */ - return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; } subsys_initcall(proto_init); diff --git a/net/core/utils.c b/net/core/utils.c index 2030bb8c2d3..0bf17da40d5 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -25,6 +25,7 @@ #include <linux/random.h> #include <linux/percpu.h> #include <linux/init.h> +#include <net/sock.h> #include <asm/byteorder.h> #include <asm/system.h> |