summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2007-10-12 21:27:47 -0400
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2007-10-12 21:27:47 -0400
commitb981d8b3f5e008ff10d993be633ad00564fc22cd (patch)
treee292dc07b22308912cf6a58354a608b9e5e8e1fd /net/core
parentb11d2127c4893a7315d1e16273bc8560049fa3ca (diff)
parent2b9e0aae1d50e880c58d46788e5e3ebd89d75d62 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: drivers/macintosh/adbhid.c
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/dev.c1007
-rw-r--r--net/core/dev_mcast.c72
-rw-r--r--net/core/dst.c202
-rw-r--r--net/core/ethtool.c412
-rw-r--r--net/core/fib_rules.c30
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/gen_estimator.c81
-rw-r--r--net/core/neighbour.c102
-rw-r--r--net/core/net-sysfs.c42
-rw-r--r--net/core/net_namespace.c317
-rw-r--r--net/core/netpoll.c87
-rw-r--r--net/core/pktgen.c279
-rw-r--r--net/core/rtnetlink.c183
-rw-r--r--net/core/scm.c9
-rw-r--r--net/core/skbuff.c22
-rw-r--r--net/core/sock.c154
-rw-r--r--net/core/utils.c1
19 files changed, 1985 insertions, 1024 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 4751613e1b5..b1332f6d004 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,7 +3,7 @@
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o
+ gen_stats.o gen_estimator.o net_namespace.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
@@ -11,7 +11,7 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_XFRM) += flow.o
-obj-$(CONFIG_SYSFS) += net-sysfs.o
+obj-y += net-sysfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cb056f47612..029b93e246b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -450,6 +450,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
__wsum csum;
int chunk = skb->len - hlen;
+ if (!chunk)
+ return 0;
+
/* Skip filled elements.
* Pretty silly, look at memcpy_toiovec, though 8)
*/
diff --git a/net/core/dev.c b/net/core/dev.c
index 13a0d9f6da5..1e169a541ce 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -92,6 +92,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <net/net_namespace.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
@@ -189,25 +190,50 @@ static struct net_dma net_dma = {
* unregister_netdevice(), which must be called with the rtnl
* semaphore held.
*/
-LIST_HEAD(dev_base_head);
DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base_head);
EXPORT_SYMBOL(dev_base_lock);
#define NETDEV_HASHBITS 8
-static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
-static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
-static inline struct hlist_head *dev_name_hash(const char *name)
+static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
- return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
}
-static inline struct hlist_head *dev_index_hash(int ifindex)
+static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
{
- return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
+}
+
+/* Device list insertion */
+static int list_netdevice(struct net_device *dev)
+{
+ struct net *net = dev->nd_net;
+
+ ASSERT_RTNL();
+
+ write_lock_bh(&dev_base_lock);
+ list_add_tail(&dev->dev_list, &net->dev_base_head);
+ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+ write_unlock_bh(&dev_base_lock);
+ return 0;
+}
+
+/* Device list removal */
+static void unlist_netdevice(struct net_device *dev)
+{
+ ASSERT_RTNL();
+
+ /* Unlink dev from the device chain */
+ write_lock_bh(&dev_base_lock);
+ list_del(&dev->dev_list);
+ hlist_del(&dev->name_hlist);
+ hlist_del(&dev->index_hlist);
+ write_unlock_bh(&dev_base_lock);
}
/*
@@ -220,17 +246,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
-#ifdef CONFIG_SYSFS
-extern int netdev_sysfs_init(void);
-extern int netdev_register_sysfs(struct net_device *);
-extern void netdev_unregister_sysfs(struct net_device *);
-#else
-#define netdev_sysfs_init() (0)
-#define netdev_register_sysfs(dev) (0)
-#define netdev_unregister_sysfs(dev) do { } while(0)
-#endif
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
+
+extern int netdev_kobject_init(void);
+extern int netdev_register_kobject(struct net_device *);
+extern void netdev_unregister_kobject(struct net_device *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
@@ -490,7 +511,7 @@ unsigned long netdev_boot_base(const char *prefix, int unit)
* If device already registered then return base of 1
* to indicate not to probe for this interface
*/
- if (__dev_get_by_name(name))
+ if (__dev_get_by_name(&init_net, name))
return 1;
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
@@ -545,11 +566,11 @@ __setup("netdev=", netdev_boot_setup);
* careful with locks.
*/
-struct net_device *__dev_get_by_name(const char *name)
+struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
struct hlist_node *p;
- hlist_for_each(p, dev_name_hash(name)) {
+ hlist_for_each(p, dev_name_hash(net, name)) {
struct net_device *dev
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -569,12 +590,12 @@ struct net_device *__dev_get_by_name(const char *name)
* matching device is found.
*/
-struct net_device *dev_get_by_name(const char *name)
+struct net_device *dev_get_by_name(struct net *net, const char *name)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_name(name);
+ dev = __dev_get_by_name(net, name);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
@@ -592,11 +613,11 @@ struct net_device *dev_get_by_name(const char *name)
* or @dev_base_lock.
*/
-struct net_device *__dev_get_by_index(int ifindex)
+struct net_device *__dev_get_by_index(struct net *net, int ifindex)
{
struct hlist_node *p;
- hlist_for_each(p, dev_index_hash(ifindex)) {
+ hlist_for_each(p, dev_index_hash(net, ifindex)) {
struct net_device *dev
= hlist_entry(p, struct net_device, index_hlist);
if (dev->ifindex == ifindex)
@@ -616,12 +637,12 @@ struct net_device *__dev_get_by_index(int ifindex)
* dev_put to indicate they have finished with it.
*/
-struct net_device *dev_get_by_index(int ifindex)
+struct net_device *dev_get_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
@@ -642,13 +663,13 @@ struct net_device *dev_get_by_index(int ifindex)
* If the API was consistent this would be __dev_get_by_hwaddr
*/
-struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
{
struct net_device *dev;
ASSERT_RTNL();
- for_each_netdev(dev)
+ for_each_netdev(&init_net, dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
return dev;
@@ -658,12 +679,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
EXPORT_SYMBOL(dev_getbyhwaddr);
-struct net_device *__dev_getfirstbyhwtype(unsigned short type)
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev;
ASSERT_RTNL();
- for_each_netdev(dev)
+ for_each_netdev(net, dev)
if (dev->type == type)
return dev;
@@ -672,12 +693,12 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type)
EXPORT_SYMBOL(__dev_getfirstbyhwtype);
-struct net_device *dev_getfirstbyhwtype(unsigned short type)
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev;
rtnl_lock();
- dev = __dev_getfirstbyhwtype(type);
+ dev = __dev_getfirstbyhwtype(net, type);
if (dev)
dev_hold(dev);
rtnl_unlock();
@@ -697,13 +718,13 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
* dev_put to indicate they have finished with it.
*/
-struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
+struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
{
struct net_device *dev, *ret;
ret = NULL;
read_lock(&dev_base_lock);
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
dev_hold(dev);
ret = dev;
@@ -740,9 +761,10 @@ int dev_valid_name(const char *name)
}
/**
- * dev_alloc_name - allocate a name for a device
- * @dev: device
+ * __dev_alloc_name - allocate a name for a device
+ * @net: network namespace to allocate the device name in
* @name: name format string
+ * @buf: scratch buffer and result name string
*
* Passed a format string - eg "lt%d" it will try and find a suitable
* id. It scans list of devices to build up a free map, then chooses
@@ -753,13 +775,12 @@ int dev_valid_name(const char *name)
* Returns the number of the unit assigned or a negative errno code.
*/
-int dev_alloc_name(struct net_device *dev, const char *name)
+static int __dev_alloc_name(struct net *net, const char *name, char *buf)
{
int i = 0;
- char buf[IFNAMSIZ];
const char *p;
const int max_netdevices = 8*PAGE_SIZE;
- long *inuse;
+ unsigned long *inuse;
struct net_device *d;
p = strnchr(name, IFNAMSIZ-1, '%');
@@ -773,18 +794,18 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -EINVAL;
/* Use one page as a bit array of possible slots */
- inuse = (long *) get_zeroed_page(GFP_ATOMIC);
+ inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
if (!inuse)
return -ENOMEM;
- for_each_netdev(d) {
+ for_each_netdev(net, d) {
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
continue;
/* avoid cases where sscanf is not exact inverse of printf */
- snprintf(buf, sizeof(buf), name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
set_bit(i, inuse);
}
@@ -793,11 +814,9 @@ int dev_alloc_name(struct net_device *dev, const char *name)
free_page((unsigned long) inuse);
}
- snprintf(buf, sizeof(buf), name, i);
- if (!__dev_get_by_name(buf)) {
- strlcpy(dev->name, buf, IFNAMSIZ);
+ snprintf(buf, IFNAMSIZ, name, i);
+ if (!__dev_get_by_name(net, buf))
return i;
- }
/* It is possible to run out of possible slots
* when the name is long and there isn't enough space left
@@ -806,6 +825,34 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -ENFILE;
}
+/**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+ * @name: name format string
+ *
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. It scans list of devices to build up a free map, then chooses
+ * the first empty slot. The caller must hold the dev_base or rtnl lock
+ * while allocating the name and adding the device in order to avoid
+ * duplicates.
+ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ * Returns the number of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+ char buf[IFNAMSIZ];
+ struct net *net;
+ int ret;
+
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
/**
* dev_change_name - change name of a device
@@ -817,31 +864,56 @@ int dev_alloc_name(struct net_device *dev, const char *name)
*/
int dev_change_name(struct net_device *dev, char *newname)
{
+ char oldname[IFNAMSIZ];
int err = 0;
+ int ret;
+ struct net *net;
ASSERT_RTNL();
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
if (dev->flags & IFF_UP)
return -EBUSY;
if (!dev_valid_name(newname))
return -EINVAL;
+ memcpy(oldname, dev->name, IFNAMSIZ);
+
if (strchr(newname, '%')) {
err = dev_alloc_name(dev, newname);
if (err < 0)
return err;
strcpy(newname, dev->name);
}
- else if (__dev_get_by_name(newname))
+ else if (__dev_get_by_name(net, newname))
return -EEXIST;
else
strlcpy(dev->name, newname, IFNAMSIZ);
+rollback:
device_rename(&dev->dev, dev->name);
+
+ write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
- raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ write_unlock_bh(&dev_base_lock);
+
+ ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
+ ret = notifier_to_errno(ret);
+
+ if (ret) {
+ if (err) {
+ printk(KERN_ERR
+ "%s: name change rollback failed: %d.\n",
+ dev->name, ret);
+ } else {
+ err = ret;
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ goto rollback;
+ }
+ }
return err;
}
@@ -854,7 +926,7 @@ int dev_change_name(struct net_device *dev, char *newname)
*/
void netdev_features_change(struct net_device *dev)
{
- raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
}
EXPORT_SYMBOL(netdev_features_change);
@@ -869,8 +941,7 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
@@ -884,26 +955,18 @@ void netdev_state_change(struct net_device *dev)
* available in this kernel then it becomes a nop.
*/
-void dev_load(const char *name)
+void dev_load(struct net *net, const char *name)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_name(name);
+ dev = __dev_get_by_name(net, name);
read_unlock(&dev_base_lock);
if (!dev && capable(CAP_SYS_MODULE))
request_module("%s", name);
}
-static int default_rebuild_header(struct sk_buff *skb)
-{
- printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
- skb->dev ? skb->dev->name : "NULL!!!");
- kfree_skb(skb);
- return 1;
-}
-
/**
* dev_open - prepare an interface for use.
* @dev: device to open
@@ -966,7 +1029,7 @@ int dev_open(struct net_device *dev)
/*
* ... and announce new interface.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+ call_netdevice_notifiers(NETDEV_UP, dev);
}
return ret;
}
@@ -982,6 +1045,8 @@ int dev_open(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
+ might_sleep();
+
if (!(dev->flags & IFF_UP))
return 0;
@@ -989,23 +1054,19 @@ int dev_close(struct net_device *dev)
* Tell people we are going down, so that they can
* prepare to death, when device is still operating.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
dev_deactivate(dev);
clear_bit(__LINK_STATE_START, &dev->state);
/* Synchronize to scheduled poll. We cannot touch poll list,
- * it can be even on different cpu. So just clear netif_running(),
- * and wait when poll really will happen. Actually, the best place
- * for this is inside dev->stop() after device stopped its irq
- * engine, but this requires more changes in devices. */
-
+ * it can be even on different cpu. So just clear netif_running().
+ *
+ * dev->stop() will invoke napi_disable() on all of it's
+ * napi_struct instances on this device.
+ */
smp_mb__after_clear_bit(); /* Commit netif_running(). */
- while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
- /* No hurry. */
- msleep(1);
- }
/*
* Call the device specific close. This cannot fail.
@@ -1026,12 +1087,14 @@ int dev_close(struct net_device *dev)
/*
* Tell people we are down
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
return 0;
}
+static int dev_boot_phase = 1;
+
/*
* Device change register/unregister. These are not inline or static
* as we export them to the world.
@@ -1054,20 +1117,49 @@ int dev_close(struct net_device *dev)
int register_netdevice_notifier(struct notifier_block *nb)
{
struct net_device *dev;
+ struct net_device *last;
+ struct net *net;
int err;
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
- if (!err) {
- for_each_netdev(dev) {
- nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ if (err)
+ goto unlock;
+ if (dev_boot_phase)
+ goto unlock;
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ goto rollback;
+
+ if (!(dev->flags & IFF_UP))
+ continue;
- if (dev->flags & IFF_UP)
- nb->notifier_call(nb, NETDEV_UP, dev);
+ nb->notifier_call(nb, NETDEV_UP, dev);
}
}
+
+unlock:
rtnl_unlock();
return err;
+
+rollback:
+ last = dev;
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ if (dev == last)
+ break;
+
+ if (dev->flags & IFF_UP) {
+ nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+ nb->notifier_call(nb, NETDEV_DOWN, dev);
+ }
+ nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ }
+ }
+ goto unlock;
}
/**
@@ -1099,9 +1191,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
* are as for raw_notifier_call_chain().
*/
-int call_netdevice_notifiers(unsigned long val, void *v)
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- return raw_notifier_call_chain(&netdev_chain, val, v);
+ return raw_notifier_call_chain(&netdev_chain, val, dev);
}
/* When > 0 there are consumers of rx skb time stamps */
@@ -1188,21 +1280,21 @@ void __netif_schedule(struct net_device *dev)
}
EXPORT_SYMBOL(__netif_schedule);
-void __netif_rx_schedule(struct net_device *dev)
+void dev_kfree_skb_irq(struct sk_buff *skb)
{
- unsigned long flags;
+ if (atomic_dec_and_test(&skb->users)) {
+ struct softnet_data *sd;
+ unsigned long flags;
- local_irq_save(flags);
- dev_hold(dev);
- list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- local_irq_restore(flags);
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ skb->next = sd->completion_queue;
+ sd->completion_queue = skb;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
}
-EXPORT_SYMBOL(__netif_rx_schedule);
+EXPORT_SYMBOL(dev_kfree_skb_irq);
void dev_kfree_skb_any(struct sk_buff *skb)
{
@@ -1214,7 +1306,12 @@ void dev_kfree_skb_any(struct sk_buff *skb)
EXPORT_SYMBOL(dev_kfree_skb_any);
-/* Hot-plugging. */
+/**
+ * netif_device_detach - mark device as removed
+ * @dev: network device
+ *
+ * Mark device as removed from system and therefore no longer available.
+ */
void netif_device_detach(struct net_device *dev)
{
if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1224,6 +1321,12 @@ void netif_device_detach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_detach);
+/**
+ * netif_device_attach - mark device as attached
+ * @dev: network device
+ *
+ * Mark device as attached from system and restart if needed.
+ */
void netif_device_attach(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1456,18 +1559,6 @@ out_kfree_skb:
return 0;
}
-#define HARD_TX_LOCK(dev, cpu) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
- netif_tx_lock(dev); \
- } \
-}
-
-#define HARD_TX_UNLOCK(dev) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
- netif_tx_unlock(dev); \
- } \
-}
-
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -1685,7 +1776,7 @@ enqueue:
return NET_RX_SUCCESS;
}
- netif_rx_schedule(&queue->backlog_dev);
+ napi_schedule(&queue->backlog);
goto enqueue;
}
@@ -1726,6 +1817,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
return dev;
}
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1882,7 +1974,7 @@ int netif_receive_skb(struct sk_buff *skb)
__be16 type;
/* if we've gotten here through NAPI, check netpoll */
- if (skb->dev->poll && netpoll_rx(skb))
+ if (netpoll_receive_skb(skb))
return NET_RX_DROP;
if (!skb->tstamp.tv64)
@@ -1972,22 +2064,25 @@ out:
return ret;
}
-static int process_backlog(struct net_device *backlog_dev, int *budget)
+static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- int quota = min(backlog_dev->quota, *budget);
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
- backlog_dev->weight = weight_p;
- for (;;) {
+ napi->weight = weight_p;
+ do {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
- if (!skb)
- goto job_done;
+ if (!skb) {
+ __napi_complete(napi);
+ local_irq_enable();
+ break;
+ }
+
local_irq_enable();
dev = skb->dev;
@@ -1995,67 +2090,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
netif_receive_skb(skb);
dev_put(dev);
+ } while (++work < quota && jiffies == start_time);
- work++;
-
- if (work >= quota || jiffies - start_time > 1)
- break;
-
- }
-
- backlog_dev->quota -= work;
- *budget -= work;
- return -1;
-
-job_done:
- backlog_dev->quota -= work;
- *budget -= work;
+ return work;
+}
- list_del(&backlog_dev->poll_list);
- smp_mb__before_clear_bit();
- netif_poll_enable(backlog_dev);
+/**
+ * __napi_schedule - schedule for receive
+ * @napi: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void fastcall __napi_schedule(struct napi_struct *n)
+{
+ unsigned long flags;
- local_irq_enable();
- return 0;
+ local_irq_save(flags);
+ list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ local_irq_restore(flags);
}
+EXPORT_SYMBOL(__napi_schedule);
+
static void net_rx_action(struct softirq_action *h)
{
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
unsigned long start_time = jiffies;
int budget = netdev_budget;
void *have;
local_irq_disable();
- while (!list_empty(&queue->poll_list)) {
- struct net_device *dev;
+ while (!list_empty(list)) {
+ struct napi_struct *n;
+ int work, weight;
- if (budget <= 0 || jiffies - start_time > 1)
+ /* If softirq window is exhuasted then punt.
+ *
+ * Note that this is a slight policy change from the
+ * previous NAPI code, which would allow up to 2
+ * jiffies to pass before breaking out. The test
+ * used to be "jiffies - start_time > 1".
+ */
+ if (unlikely(budget <= 0 || jiffies != start_time))
goto softnet_break;
local_irq_enable();
- dev = list_entry(queue->poll_list.next,
- struct net_device, poll_list);
- have = netpoll_poll_lock(dev);
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ n = list_entry(list->next, struct napi_struct, poll_list);
- if (dev->quota <= 0 || dev->poll(dev, &budget)) {
- netpoll_poll_unlock(have);
- local_irq_disable();
- list_move_tail(&dev->poll_list, &queue->poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- } else {
- netpoll_poll_unlock(have);
- dev_put(dev);
- local_irq_disable();
- }
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ work = n->poll(n, weight);
+
+ WARN_ON_ONCE(work > weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(work == weight))
+ list_move_tail(&n->poll_list, list);
+
+ netpoll_poll_unlock(have);
}
out:
local_irq_enable();
+
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
@@ -2070,6 +2184,7 @@ out:
}
}
#endif
+
return;
softnet_break:
@@ -2109,7 +2224,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
* match. --pb
*/
-static int dev_ifname(struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq __user *arg)
{
struct net_device *dev;
struct ifreq ifr;
@@ -2122,7 +2237,7 @@ static int dev_ifname(struct ifreq __user *arg)
return -EFAULT;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(ifr.ifr_ifindex);
+ dev = __dev_get_by_index(net, ifr.ifr_ifindex);
if (!dev) {
read_unlock(&dev_base_lock);
return -ENODEV;
@@ -2142,7 +2257,7 @@ static int dev_ifname(struct ifreq __user *arg)
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(char __user *arg)
+static int dev_ifconf(struct net *net, char __user *arg)
{
struct ifconf ifc;
struct net_device *dev;
@@ -2166,7 +2281,7 @@ static int dev_ifconf(char __user *arg)
*/
total = 0;
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
@@ -2200,6 +2315,7 @@ static int dev_ifconf(char __user *arg)
*/
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq->private;
loff_t off;
struct net_device *dev;
@@ -2208,7 +2324,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
return SEQ_START_TOKEN;
off = 1;
- for_each_netdev(dev)
+ for_each_netdev(net, dev)
if (off++ == *pos)
return dev;
@@ -2217,9 +2333,10 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq->private;
++*pos;
return v == SEQ_START_TOKEN ?
- first_net_device() : next_net_device((struct net_device *)v);
+ first_net_device(net) : next_net_device((struct net_device *)v);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -2315,7 +2432,26 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &dev_seq_ops);
+ struct seq_file *seq;
+ int res;
+ res = seq_open(file, &dev_seq_ops);
+ if (!res) {
+ seq = file->private_data;
+ seq->private = get_proc_net(inode);
+ if (!seq->private) {
+ seq_release(inode, file);
+ res = -ENXIO;
+ }
+ }
+ return res;
+}
+
+static int dev_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = seq->private;
+ put_net(net);
+ return seq_release(inode, file);
}
static const struct file_operations dev_seq_fops = {
@@ -2323,7 +2459,7 @@ static const struct file_operations dev_seq_fops = {
.open = dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = dev_seq_release,
};
static const struct seq_operations softnet_seq_ops = {
@@ -2475,30 +2611,49 @@ static const struct file_operations ptype_seq_fops = {
};
-static int __init dev_proc_init(void)
+static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+ if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
goto out;
- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+ if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
goto out_dev;
- if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
- goto out_dev2;
-
- if (wext_proc_init())
+ if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
goto out_softnet;
+
+ if (wext_proc_init(net))
+ goto out_ptype;
rc = 0;
out:
return rc;
+out_ptype:
+ proc_net_remove(net, "ptype");
out_softnet:
- proc_net_remove("ptype");
-out_dev2:
- proc_net_remove("softnet_stat");
+ proc_net_remove(net, "softnet_stat");
out_dev:
- proc_net_remove("dev");
+ proc_net_remove(net, "dev");
goto out;
}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+ wext_proc_exit(net);
+
+ proc_net_remove(net, "ptype");
+ proc_net_remove(net, "softnet_stat");
+ proc_net_remove(net, "dev");
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+ .init = dev_proc_net_init,
+ .exit = dev_proc_net_exit,
+};
+
+static int __init dev_proc_init(void)
+{
+ return register_pernet_subsys(&dev_proc_ops);
+}
#else
#define dev_proc_init() 0
#endif /* CONFIG_PROC_FS */
@@ -2629,7 +2784,7 @@ void __dev_set_rx_mode(struct net_device *dev)
return;
if (!netif_device_present(dev))
- return;
+ return;
if (dev->set_rx_mode)
dev->set_rx_mode(dev);
@@ -2715,26 +2870,14 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
return 0;
}
-void __dev_addr_discard(struct dev_addr_list **list)
-{
- struct dev_addr_list *tmp;
-
- while (*list != NULL) {
- tmp = *list;
- *list = tmp->next;
- if (tmp->da_users > tmp->da_gusers)
- printk("__dev_addr_discard: address leakage! "
- "da_users=%d\n", tmp->da_users);
- kfree(tmp);
- }
-}
-
/**
* dev_unicast_delete - Release secondary unicast address.
* @dev: device
+ * @addr: address to delete
+ * @alen: length of @addr
*
* Release reference to a secondary unicast address and remove it
- * from the device if the reference count drop to zero.
+ * from the device if the reference count drops to zero.
*
* The caller must hold the rtnl_mutex.
*/
@@ -2756,6 +2899,8 @@ EXPORT_SYMBOL(dev_unicast_delete);
/**
* dev_unicast_add - add a secondary unicast address
* @dev: device
+ * @addr: address to delete
+ * @alen: length of @addr
*
* Add a secondary unicast address to the device or increase
* the reference count if it already exists.
@@ -2777,11 +2922,30 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
}
EXPORT_SYMBOL(dev_unicast_add);
-static void dev_unicast_discard(struct net_device *dev)
+static void __dev_addr_discard(struct dev_addr_list **list)
+{
+ struct dev_addr_list *tmp;
+
+ while (*list != NULL) {
+ tmp = *list;
+ *list = tmp->next;
+ if (tmp->da_users > tmp->da_gusers)
+ printk("__dev_addr_discard: address leakage! "
+ "da_users=%d\n", tmp->da_users);
+ kfree(tmp);
+ }
+}
+
+static void dev_addr_discard(struct net_device *dev)
{
netif_tx_lock_bh(dev);
+
__dev_addr_discard(&dev->uc_list);
dev->uc_count = 0;
+
+ __dev_addr_discard(&dev->mc_list);
+ dev->mc_count = 0;
+
netif_tx_unlock_bh(dev);
}
@@ -2852,8 +3016,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
if (dev->flags & IFF_UP &&
((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
IFF_VOLATILE)))
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2899,8 +3062,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
else
dev->mtu = new_mtu;
if (!err && dev->flags & IFF_UP)
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
return err;
}
@@ -2916,18 +3078,17 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -ENODEV;
err = dev->set_mac_address(dev, sa);
if (!err)
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return err;
}
/*
- * Perform the SIOCxIFxxx calls.
+ * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
*/
-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
int err;
- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
return -ENODEV;
@@ -2937,25 +3098,15 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_flags = dev_get_flags(dev);
return 0;
- case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
-
case SIOCGIFMETRIC: /* Get the metric on the interface
(currently unused) */
ifr->ifr_metric = 0;
return 0;
- case SIOCSIFMETRIC: /* Set the metric on the interface
- (currently unused) */
- return -EOPNOTSUPP;
-
case SIOCGIFMTU: /* Get the MTU of a device */
ifr->ifr_mtu = dev->mtu;
return 0;
- case SIOCSIFMTU: /* Set the MTU of a device */
- return dev_set_mtu(dev, ifr->ifr_mtu);
-
case SIOCGIFHWADDR:
if (!dev->addr_len)
memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
@@ -2965,17 +3116,9 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_hwaddr.sa_family = dev->type;
return 0;
- case SIOCSIFHWADDR:
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
-
- case SIOCSIFHWBROADCAST:
- if (ifr->ifr_hwaddr.sa_family != dev->type)
- return -EINVAL;
- memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
- min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
- return 0;
+ case SIOCGIFSLAVE:
+ err = -EINVAL;
+ break;
case SIOCGIFMAP:
ifr->ifr_map.mem_start = dev->mem_start;
@@ -2986,6 +3129,59 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_map.port = dev->if_port;
return 0;
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ default:
+ /* dev_ioctl() should ensure this case
+ * is never reached
+ */
+ WARN_ON(1);
+ err = -EINVAL;
+ break;
+
+ }
+ return err;
+}
+
+/*
+ * Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+ int err;
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+
+ if (!dev)
+ return -ENODEV;
+
+ switch (cmd) {
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags);
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface
+ (currently unused) */
+ return -EOPNOTSUPP;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ return dev_set_mtu(dev, ifr->ifr_mtu);
+
+ case SIOCSIFHWADDR:
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+ case SIOCSIFHWBROADCAST:
+ if (ifr->ifr_hwaddr.sa_family != dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return 0;
+
case SIOCSIFMAP:
if (dev->set_config) {
if (!netif_device_present(dev))
@@ -3012,14 +3208,6 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
dev->addr_len, 1);
- case SIOCGIFINDEX:
- ifr->ifr_ifindex = dev->ifindex;
- return 0;
-
- case SIOCGIFTXQLEN:
- ifr->ifr_qlen = dev->tx_queue_len;
- return 0;
-
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
@@ -3080,7 +3268,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
* positive or a negative errno code on error.
*/
-int dev_ioctl(unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
int ret;
@@ -3093,12 +3281,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
if (cmd == SIOCGIFCONF) {
rtnl_lock();
- ret = dev_ifconf((char __user *) arg);
+ ret = dev_ifconf(net, (char __user *) arg);
rtnl_unlock();
return ret;
}
if (cmd == SIOCGIFNAME)
- return dev_ifname((struct ifreq __user *)arg);
+ return dev_ifname(net, (struct ifreq __user *)arg);
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
@@ -3128,9 +3316,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
read_lock(&dev_base_lock);
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc_locked(net, &ifr, cmd);
read_unlock(&dev_base_lock);
if (!ret) {
if (colon)
@@ -3142,9 +3330,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
return ret;
case SIOCETHTOOL:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ethtool(&ifr);
+ ret = dev_ethtool(net, &ifr);
rtnl_unlock();
if (!ret) {
if (colon)
@@ -3166,9 +3354,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
case SIOCSIFNAME:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
if (!ret) {
if (colon)
@@ -3207,9 +3395,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
return ret;
@@ -3229,9 +3417,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
if (cmd == SIOCWANDEV ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
if (!ret && copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
@@ -3240,7 +3428,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
}
/* Take care of Wireless Extensions */
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
- return wext_handle_ioctl(&ifr, cmd, arg);
+ return wext_handle_ioctl(net, &ifr, cmd, arg);
return -EINVAL;
}
}
@@ -3253,19 +3441,17 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
* number. The caller must hold the rtnl semaphore or the
* dev_base_lock to be sure it remains unique.
*/
-static int dev_new_index(void)
+static int dev_new_index(struct net *net)
{
static int ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
- if (!__dev_get_by_index(ifindex))
+ if (!__dev_get_by_index(net, ifindex))
return ifindex;
}
}
-static int dev_boot_phase = 1;
-
/* Delayed registration/unregisteration */
static DEFINE_SPINLOCK(net_todo_list_lock);
static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
@@ -3299,6 +3485,7 @@ int register_netdevice(struct net_device *dev)
struct hlist_head *head;
struct hlist_node *p;
int ret;
+ struct net *net;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -3307,6 +3494,8 @@ int register_netdevice(struct net_device *dev)
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
@@ -3328,21 +3517,21 @@ int register_netdevice(struct net_device *dev)
if (!dev_valid_name(dev->name)) {
ret = -EINVAL;
- goto out;
+ goto err_uninit;
}
- dev->ifindex = dev_new_index();
+ dev->ifindex = dev_new_index(net);
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
/* Check for existence of name */
- head = dev_name_hash(dev->name);
+ head = dev_name_hash(net, dev->name);
hlist_for_each(p, head) {
struct net_device *d
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
ret = -EEXIST;
- goto out;
+ goto err_uninit;
}
}
@@ -3392,17 +3581,9 @@ int register_netdevice(struct net_device *dev)
}
}
- /*
- * nil rebuild_header routine,
- * that should be never called and used as just bug trap.
- */
-
- if (!dev->rebuild_header)
- dev->rebuild_header = default_rebuild_header;
-
- ret = netdev_register_sysfs(dev);
+ ret = netdev_register_kobject(dev);
if (ret)
- goto out;
+ goto err_uninit;
dev->reg_state = NETREG_REGISTERED;
/*
@@ -3413,20 +3594,22 @@ int register_netdevice(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
dev_init_scheduler(dev);
- write_lock_bh(&dev_base_lock);
- list_add_tail(&dev->dev_list, &dev_base_head);
- hlist_add_head(&dev->name_hlist, head);
- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
- write_unlock_bh(&dev_base_lock);
+ list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
- raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
-
- ret = 0;
+ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ unregister_netdevice(dev);
out:
return ret;
+
+err_uninit:
+ if (dev->uninit)
+ dev->uninit(dev);
+ goto out;
}
/**
@@ -3486,8 +3669,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
rtnl_lock();
/* Rebroadcast unregister notification */
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_UNREGISTER, dev);
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
@@ -3619,7 +3801,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of both the device and private area */
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
- (sizeof(struct net_device_subqueue) * queue_count)) &
+ (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
@@ -3632,18 +3814,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
+ dev->nd_net = &init_net;
if (sizeof_priv) {
dev->priv = ((char *)dev +
((sizeof(struct net_device) +
(sizeof(struct net_device_subqueue) *
- queue_count) + NETDEV_ALIGN_CONST)
+ (queue_count - 1)) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}
dev->egress_subqueue_count = queue_count;
dev->get_stats = internal_stats;
+ netpoll_netdev_init(dev);
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -3660,7 +3844,6 @@ EXPORT_SYMBOL(alloc_netdev_mq);
*/
void free_netdev(struct net_device *dev)
{
-#ifdef CONFIG_SYSFS
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -3672,9 +3855,6 @@ void free_netdev(struct net_device *dev)
/* will free via device release */
put_device(&dev->dev);
-#else
- kfree((char *)dev - dev->padded);
-#endif
}
/* Synchronize with packet receive processing. */
@@ -3713,15 +3893,10 @@ void unregister_netdevice(struct net_device *dev)
BUG_ON(dev->reg_state != NETREG_REGISTERED);
/* If device is running, close it first. */
- if (dev->flags & IFF_UP)
- dev_close(dev);
+ dev_close(dev);
/* And unlink it from device chain. */
- write_lock_bh(&dev_base_lock);
- list_del(&dev->dev_list);
- hlist_del(&dev->name_hlist);
- hlist_del(&dev->index_hlist);
- write_unlock_bh(&dev_base_lock);
+ unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
@@ -3734,13 +3909,12 @@ void unregister_netdevice(struct net_device *dev)
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_discard(dev);
- dev_mc_discard(dev);
+ dev_addr_discard(dev);
if (dev->uninit)
dev->uninit(dev);
@@ -3748,8 +3922,8 @@ void unregister_netdevice(struct net_device *dev)
/* Notifier chain MUST detach us from master device. */
BUG_TRAP(!dev->master);
- /* Remove entries from sysfs */
- netdev_unregister_sysfs(dev);
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
/* Finish processing unregister after unlock */
net_set_todo(dev);
@@ -3780,6 +3954,121 @@ void unregister_netdev(struct net_device *dev)
EXPORT_SYMBOL(unregister_netdev);
+/**
+ * dev_change_net_namespace - move device to different nethost namespace
+ * @dev: device
+ * @net: network namespace
+ * @pat: If not NULL name pattern to try if the current device name
+ * is already taken in the destination network namespace.
+ *
+ * This function shuts down a device interface and moves it
+ * to a new network namespace. On success 0 is returned, on
+ * a failure a netagive errno code is returned.
+ *
+ * Callers must hold the rtnl semaphore.
+ */
+
+int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+{
+ char buf[IFNAMSIZ];
+ const char *destname;
+ int err;
+
+ ASSERT_RTNL();
+
+ /* Don't allow namespace local devices to be moved. */
+ err = -EINVAL;
+ if (dev->features & NETIF_F_NETNS_LOCAL)
+ goto out;
+
+ /* Ensure the device has been registrered */
+ err = -EINVAL;
+ if (dev->reg_state != NETREG_REGISTERED)
+ goto out;
+
+ /* Get out if there is nothing todo */
+ err = 0;
+ if (dev->nd_net == net)
+ goto out;
+
+ /* Pick the destination device name, and ensure
+ * we can use it in the destination network namespace.
+ */
+ err = -EEXIST;
+ destname = dev->name;
+ if (__dev_get_by_name(net, destname)) {
+ /* We get here if we can't use the current device name */
+ if (!pat)
+ goto out;
+ if (!dev_valid_name(pat))
+ goto out;
+ if (strchr(pat, '%')) {
+ if (__dev_alloc_name(net, pat, buf) < 0)
+ goto out;
+ destname = buf;
+ } else
+ destname = pat;
+ if (__dev_get_by_name(net, destname))
+ goto out;
+ }
+
+ /*
+ * And now a mini version of register_netdevice unregister_netdevice.
+ */
+
+ /* If device is running close it first. */
+ dev_close(dev);
+
+ /* And unlink it from device chain */
+ err = -ENODEV;
+ unlist_netdevice(dev);
+
+ synchronize_net();
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_addr_discard(dev);
+
+ /* Actually switch the network namespace */
+ dev->nd_net = net;
+
+ /* Assign the new device name */
+ if (destname != dev->name)
+ strcpy(dev->name, destname);
+
+ /* If there is an ifindex conflict assign a new one */
+ if (__dev_get_by_index(net, dev->ifindex)) {
+ int iflink = (dev->iflink == dev->ifindex);
+ dev->ifindex = dev_new_index(net);
+ if (iflink)
+ dev->iflink = dev->ifindex;
+ }
+
+ /* Fixup kobjects */
+ err = device_rename(&dev->dev, dev->name);
+ WARN_ON(err);
+
+ /* Add the device back in the hashes */
+ list_netdevice(dev);
+
+ /* Notify protocols, that a new device appeared. */
+ call_netdevice_notifiers(NETDEV_REGISTER, dev);
+
+ synchronize_net();
+ err = 0;
+out:
+ return err;
+}
+
static int dev_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *ocpu)
@@ -3826,9 +4115,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
#ifdef CONFIG_NET_DMA
/**
- * net_dma_rebalance -
- * This is called when the number of channels allocated to the net_dma_client
- * changes. The net_dma_client tries to have one DMA channel per CPU.
+ * net_dma_rebalance - try to maintain one DMA channel per CPU
+ * @net_dma: DMA client and associated data (lock, channels, channel_mask)
+ *
+ * This is called when the number of channels allocated to the net_dma client
+ * changes. The net_dma client tries to have one DMA channel per CPU.
*/
static void net_dma_rebalance(struct net_dma *net_dma)
@@ -3865,7 +4156,7 @@ static void net_dma_rebalance(struct net_dma *net_dma)
* netdev_dma_event - event callback for the net_dma_client
* @client: should always be net_dma_client
* @chan: DMA channel for the event
- * @event: event type
+ * @state: DMA state to be handled
*/
static enum dma_state_client
netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
@@ -3932,6 +4223,121 @@ static int __init netdev_dma_register(void)
static int __init netdev_dma_register(void) { return -ENODEV; }
#endif /* CONFIG_NET_DMA */
+/**
+ * netdev_compute_feature - compute conjunction of two feature sets
+ * @all: first feature set
+ * @one: second feature set
+ *
+ * Computes a new feature set after adding a device with feature set
+ * @one to the master device with current feature set @all. Returns
+ * the new feature set.
+ */
+int netdev_compute_features(unsigned long all, unsigned long one)
+{
+ /* if device needs checksumming, downgrade to hw checksumming */
+ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
+ all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
+
+ /* if device can't do all checksum, downgrade to ipv4/ipv6 */
+ if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
+ all ^= NETIF_F_HW_CSUM
+ | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+ if (one & NETIF_F_GSO)
+ one |= NETIF_F_GSO_SOFTWARE;
+ one |= NETIF_F_GSO;
+
+ /* If even one device supports robust GSO, enable it for all. */
+ if (one & NETIF_F_GSO_ROBUST)
+ all |= NETIF_F_GSO_ROBUST;
+
+ all &= one | NETIF_F_LLTX;
+
+ if (!(all & NETIF_F_ALL_CSUM))
+ all &= ~NETIF_F_SG;
+ if (!(all & NETIF_F_SG))
+ all &= ~NETIF_F_GSO_MASK;
+
+ return all;
+}
+EXPORT_SYMBOL(netdev_compute_features);
+
+static struct hlist_head *netdev_create_hash(void)
+{
+ int i;
+ struct hlist_head *hash;
+
+ hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
+ if (hash != NULL)
+ for (i = 0; i < NETDEV_HASHENTRIES; i++)
+ INIT_HLIST_HEAD(&hash[i]);
+
+ return hash;
+}
+
+/* Initialize per network namespace state */
+static int __net_init netdev_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->dev_base_head);
+ rwlock_init(&dev_base_lock);
+
+ net->dev_name_head = netdev_create_hash();
+ if (net->dev_name_head == NULL)
+ goto err_name;
+
+ net->dev_index_head = netdev_create_hash();
+ if (net->dev_index_head == NULL)
+ goto err_idx;
+
+ return 0;
+
+err_idx:
+ kfree(net->dev_name_head);
+err_name:
+ return -ENOMEM;
+}
+
+static void __net_exit netdev_exit(struct net *net)
+{
+ kfree(net->dev_name_head);
+ kfree(net->dev_index_head);
+}
+
+static struct pernet_operations __net_initdata netdev_net_ops = {
+ .init = netdev_init,
+ .exit = netdev_exit,
+};
+
+static void __net_exit default_device_exit(struct net *net)
+{
+ struct net_device *dev, *next;
+ /*
+ * Push all migratable of the network devices back to the
+ * initial network namespace
+ */
+ rtnl_lock();
+ for_each_netdev_safe(net, dev, next) {
+ int err;
+
+ /* Ignore unmoveable devices (i.e. loopback) */
+ if (dev->features & NETIF_F_NETNS_LOCAL)
+ continue;
+
+ /* Push remaing network devices to init_net */
+ err = dev_change_net_namespace(dev, &init_net, "dev%d");
+ if (err) {
+ printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
+ __func__, dev->name, err);
+ unregister_netdevice(dev);
+ }
+ }
+ rtnl_unlock();
+}
+
+static struct pernet_operations __net_initdata default_device_ops = {
+ .exit = default_device_exit,
+};
+
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
@@ -3952,18 +4358,18 @@ static int __init net_dev_init(void)
if (dev_proc_init())
goto out;
- if (netdev_sysfs_init())
+ if (netdev_kobject_init())
goto out;
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < 16; i++)
INIT_LIST_HEAD(&ptype_base[i]);
- for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
- INIT_HLIST_HEAD(&dev_name_head[i]);
+ if (register_pernet_subsys(&netdev_net_ops))
+ goto out;
- for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
- INIT_HLIST_HEAD(&dev_index_head[i]);
+ if (register_pernet_device(&default_device_ops))
+ goto out;
/*
* Initialise the packet receive queues.
@@ -3976,10 +4382,9 @@ static int __init net_dev_init(void)
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
- set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
- queue->backlog_dev.weight = weight_p;
- queue->backlog_dev.poll = process_backlog;
- atomic_set(&queue->backlog_dev.refcnt, 1);
+
+ queue->backlog.poll = process_backlog;
+ queue->backlog.weight = weight_p;
}
netdev_dma_register();
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 235a2a8a0d0..15241cf48af 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -41,6 +41,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
+#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
@@ -116,11 +117,13 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
*/
int dev_mc_sync(struct net_device *to, struct net_device *from)
{
- struct dev_addr_list *da;
+ struct dev_addr_list *da, *next;
int err = 0;
netif_tx_lock_bh(to);
- for (da = from->mc_list; da != NULL; da = da->next) {
+ da = from->mc_list;
+ while (da != NULL) {
+ next = da->next;
if (!da->da_synced) {
err = __dev_addr_add(&to->mc_list, &to->mc_count,
da->da_addr, da->da_addrlen, 0);
@@ -134,6 +137,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
__dev_addr_delete(&from->mc_list, &from->mc_count,
da->da_addr, da->da_addrlen, 0);
}
+ da = next;
}
if (!err)
__dev_set_rx_mode(to);
@@ -156,12 +160,14 @@ EXPORT_SYMBOL(dev_mc_sync);
*/
void dev_mc_unsync(struct net_device *to, struct net_device *from)
{
- struct dev_addr_list *da;
+ struct dev_addr_list *da, *next;
netif_tx_lock_bh(from);
netif_tx_lock_bh(to);
- for (da = from->mc_list; da != NULL; da = da->next) {
+ da = from->mc_list;
+ while (da != NULL) {
+ next = da->next;
if (!da->da_synced)
continue;
__dev_addr_delete(&to->mc_list, &to->mc_count,
@@ -169,6 +175,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
da->da_synced = 0;
__dev_addr_delete(&from->mc_list, &from->mc_count,
da->da_addr, da->da_addrlen, 0);
+ da = next;
}
__dev_set_rx_mode(to);
@@ -177,26 +184,15 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
}
EXPORT_SYMBOL(dev_mc_unsync);
-/*
- * Discard multicast list when a device is downed
- */
-
-void dev_mc_discard(struct net_device *dev)
-{
- netif_tx_lock_bh(dev);
- __dev_addr_discard(&dev->mc_list);
- dev->mc_count = 0;
- netif_tx_unlock_bh(dev);
-}
-
#ifdef CONFIG_PROC_FS
static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq->private;
struct net_device *dev;
loff_t off = 0;
read_lock(&dev_base_lock);
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (off++ == *pos)
return dev;
}
@@ -245,7 +241,26 @@ static const struct seq_operations dev_mc_seq_ops = {
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &dev_mc_seq_ops);
+ struct seq_file *seq;
+ int res;
+ res = seq_open(file, &dev_mc_seq_ops);
+ if (!res) {
+ seq = file->private_data;
+ seq->private = get_proc_net(inode);
+ if (!seq->private) {
+ seq_release(inode, file);
+ res = -ENXIO;
+ }
+ }
+ return res;
+}
+
+static int dev_mc_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = seq->private;
+ put_net(net);
+ return seq_release(inode, file);
}
static const struct file_operations dev_mc_seq_fops = {
@@ -253,14 +268,31 @@ static const struct file_operations dev_mc_seq_fops = {
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = dev_mc_seq_release,
};
#endif
+static int __net_init dev_mc_net_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+ proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+ .init = dev_mc_net_init,
+ .exit = dev_mc_net_exit,
+};
+
void __init dev_mcast_init(void)
{
- proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+ register_pernet_subsys(&dev_mc_net_ops);
}
EXPORT_SYMBOL(dev_mc_add);
diff --git a/net/core/dst.c b/net/core/dst.c
index c6a05879d58..16958e64e57 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -9,59 +9,84 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/workqueue.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <net/net_namespace.h>
+#include <net/net_namespace.h>
#include <net/dst.h>
-/* Locking strategy:
- * 1) Garbage collection state of dead destination cache
- * entries is protected by dst_lock.
- * 2) GC is run only from BH context, and is the only remover
- * of entries.
- * 3) Entries are added to the garbage list from both BH
- * and non-BH context, so local BH disabling is needed.
- * 4) All operations modify state, so a spinlock is used.
+/*
+ * Theory of operations:
+ * 1) We use a list, protected by a spinlock, to add
+ * new entries from both BH and non-BH context.
+ * 2) In order to keep spinlock held for a small delay,
+ * we use a second list where are stored long lived
+ * entries, that are handled by the garbage collect thread
+ * fired by a workqueue.
+ * 3) This list is guarded by a mutex,
+ * so that the gc_task and dst_dev_event() can be synchronized.
*/
-static struct dst_entry *dst_garbage_list;
#if RT_CACHE_DEBUG >= 2
static atomic_t dst_total = ATOMIC_INIT(0);
#endif
-static DEFINE_SPINLOCK(dst_lock);
-static unsigned long dst_gc_timer_expires;
-static unsigned long dst_gc_timer_inc = DST_GC_MAX;
-static void dst_run_gc(unsigned long);
+/*
+ * We want to keep lock & list close together
+ * to dirty as few cache lines as possible in __dst_free().
+ * As this is not a very strong hint, we dont force an alignment on SMP.
+ */
+static struct {
+ spinlock_t lock;
+ struct dst_entry *list;
+ unsigned long timer_inc;
+ unsigned long timer_expires;
+} dst_garbage = {
+ .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock),
+ .timer_inc = DST_GC_MAX,
+};
+static void dst_gc_task(struct work_struct *work);
static void ___dst_free(struct dst_entry * dst);
-static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0);
+static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
-static void dst_run_gc(unsigned long dummy)
+static DEFINE_MUTEX(dst_gc_mutex);
+/*
+ * long lived entries are maintained in this list, guarded by dst_gc_mutex
+ */
+static struct dst_entry *dst_busy_list;
+
+static void dst_gc_task(struct work_struct *work)
{
int delayed = 0;
- int work_performed;
- struct dst_entry * dst, **dstp;
+ int work_performed = 0;
+ unsigned long expires = ~0L;
+ struct dst_entry *dst, *next, head;
+ struct dst_entry *last = &head;
+#if RT_CACHE_DEBUG >= 2
+ ktime_t time_start = ktime_get();
+ struct timespec elapsed;
+#endif
- if (!spin_trylock(&dst_lock)) {
- mod_timer(&dst_gc_timer, jiffies + HZ/10);
- return;
- }
+ mutex_lock(&dst_gc_mutex);
+ next = dst_busy_list;
- del_timer(&dst_gc_timer);
- dstp = &dst_garbage_list;
- work_performed = 0;
- while ((dst = *dstp) != NULL) {
- if (atomic_read(&dst->__refcnt)) {
- dstp = &dst->next;
+loop:
+ while ((dst = next) != NULL) {
+ next = dst->next;
+ prefetch(&next->next);
+ if (likely(atomic_read(&dst->__refcnt))) {
+ last->next = dst;
+ last = dst;
delayed++;
continue;
}
- *dstp = dst->next;
- work_performed = 1;
+ work_performed++;
dst = dst_destroy(dst);
if (dst) {
@@ -77,38 +102,56 @@ static void dst_run_gc(unsigned long dummy)
continue;
___dst_free(dst);
- dst->next = *dstp;
- *dstp = dst;
- dstp = &dst->next;
+ dst->next = next;
+ next = dst;
}
}
- if (!dst_garbage_list) {
- dst_gc_timer_inc = DST_GC_MAX;
- goto out;
+
+ spin_lock_bh(&dst_garbage.lock);
+ next = dst_garbage.list;
+ if (next) {
+ dst_garbage.list = NULL;
+ spin_unlock_bh(&dst_garbage.lock);
+ goto loop;
}
- if (!work_performed) {
- if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
- dst_gc_timer_expires = DST_GC_MAX;
- dst_gc_timer_inc += DST_GC_INC;
- } else {
- dst_gc_timer_inc = DST_GC_INC;
- dst_gc_timer_expires = DST_GC_MIN;
+ last->next = NULL;
+ dst_busy_list = head.next;
+ if (!dst_busy_list)
+ dst_garbage.timer_inc = DST_GC_MAX;
+ else {
+ /*
+ * if we freed less than 1/10 of delayed entries,
+ * we can sleep longer.
+ */
+ if (work_performed <= delayed/10) {
+ dst_garbage.timer_expires += dst_garbage.timer_inc;
+ if (dst_garbage.timer_expires > DST_GC_MAX)
+ dst_garbage.timer_expires = DST_GC_MAX;
+ dst_garbage.timer_inc += DST_GC_INC;
+ } else {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ }
+ expires = dst_garbage.timer_expires;
+ /*
+ * if the next desired timer is more than 4 seconds in the future
+ * then round the timer to whole seconds
+ */
+ if (expires > 4*HZ)
+ expires = round_jiffies_relative(expires);
+ schedule_delayed_work(&dst_gc_work, expires);
}
+
+ spin_unlock_bh(&dst_garbage.lock);
+ mutex_unlock(&dst_gc_mutex);
#if RT_CACHE_DEBUG >= 2
- printk("dst_total: %d/%d %ld\n",
- atomic_read(&dst_total), delayed, dst_gc_timer_expires);
+ elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start));
+ printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d"
+ " expires: %lu elapsed: %lu us\n",
+ atomic_read(&dst_total), delayed, work_performed,
+ expires,
+ elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
#endif
- /* if the next desired timer is more than 4 seconds in the future
- * then round the timer to whole seconds
- */
- if (dst_gc_timer_expires > 4*HZ)
- mod_timer(&dst_gc_timer,
- round_jiffies(jiffies + dst_gc_timer_expires));
- else
- mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
-
-out:
- spin_unlock(&dst_lock);
}
static int dst_discard(struct sk_buff *skb)
@@ -153,16 +196,16 @@ static void ___dst_free(struct dst_entry * dst)
void __dst_free(struct dst_entry * dst)
{
- spin_lock_bh(&dst_lock);
+ spin_lock_bh(&dst_garbage.lock);
___dst_free(dst);
- dst->next = dst_garbage_list;
- dst_garbage_list = dst;
- if (dst_gc_timer_inc > DST_GC_INC) {
- dst_gc_timer_inc = DST_GC_INC;
- dst_gc_timer_expires = DST_GC_MIN;
- mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+ dst->next = dst_garbage.list;
+ dst_garbage.list = dst;
+ if (dst_garbage.timer_inc > DST_GC_INC) {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
}
- spin_unlock_bh(&dst_lock);
+ spin_unlock_bh(&dst_garbage.lock);
}
struct dst_entry *dst_destroy(struct dst_entry * dst)
@@ -236,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
- dst->dev = &loopback_dev;
- dev_hold(&loopback_dev);
+ dst->dev = init_net.loopback_dev;
+ dev_hold(dst->dev);
dev_put(dev);
if (dst->neighbour && dst->neighbour->dev == dev) {
- dst->neighbour->dev = &loopback_dev;
+ dst->neighbour->dev = init_net.loopback_dev;
dev_put(dev);
- dev_hold(&loopback_dev);
+ dev_hold(dst->neighbour->dev);
}
}
}
@@ -250,16 +293,33 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- struct dst_entry *dst;
+ struct dst_entry *dst, *last = NULL;
+
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
- spin_lock_bh(&dst_lock);
- for (dst = dst_garbage_list; dst; dst = dst->next) {
+ mutex_lock(&dst_gc_mutex);
+ for (dst = dst_busy_list; dst; dst = dst->next) {
+ last = dst;
+ dst_ifdown(dst, dev, event != NETDEV_DOWN);
+ }
+
+ spin_lock_bh(&dst_garbage.lock);
+ dst = dst_garbage.list;
+ dst_garbage.list = NULL;
+ spin_unlock_bh(&dst_garbage.lock);
+
+ if (last)
+ last->next = dst;
+ else
+ dst_busy_list = dst;
+ for (; dst; dst = dst->next) {
dst_ifdown(dst, dev, event != NETDEV_DOWN);
}
- spin_unlock_bh(&dst_lock);
+ mutex_unlock(&dst_gc_mutex);
break;
}
return NOTIFY_DONE;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0b531e98ec3..1163eb2256d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -3,10 +3,12 @@
* Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
*
* This file is where we call all the ethtool_ops commands to get
- * the information ethtool needs. We fall back to calling do_ioctl()
- * for drivers which haven't been converted to ethtool_ops yet.
+ * the information ethtool needs.
*
- * It's GPL, stupid.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
*/
#include <linux/module.h>
@@ -93,18 +95,6 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
return 0;
}
-int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
-{
- unsigned char len = dev->addr_len;
- if ( addr->size < len )
- return -ETOOSMALL;
-
- addr->size = len;
- memcpy(data, dev->perm_addr, len);
- return 0;
-}
-
-
u32 ethtool_op_get_ufo(struct net_device *dev)
{
return (dev->features & NETIF_F_UFO) != 0;
@@ -119,6 +109,32 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
return 0;
}
+/* the following list of flags are the same as their associated
+ * NETIF_F_xxx values in include/linux/netdevice.h
+ */
+static const u32 flags_dup_features =
+ ETH_FLAG_LRO;
+
+u32 ethtool_op_get_flags(struct net_device *dev)
+{
+ /* in the future, this function will probably contain additional
+ * handling for flags which are not so easily handled
+ * by a simple masking operation
+ */
+
+ return dev->features & flags_dup_features;
+}
+
+int ethtool_op_set_flags(struct net_device *dev, u32 data)
+{
+ if (data & ETH_FLAG_LRO)
+ dev->features |= NETIF_F_LRO;
+ else
+ dev->features &= ~NETIF_F_LRO;
+
+ return 0;
+}
+
/* Handlers for each ethtool command */
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -163,10 +179,26 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
info.cmd = ETHTOOL_GDRVINFO;
ops->get_drvinfo(dev, &info);
- if (ops->self_test_count)
- info.testinfo_len = ops->self_test_count(dev);
- if (ops->get_stats_count)
- info.n_stats = ops->get_stats_count(dev);
+ if (ops->get_sset_count) {
+ int rc;
+
+ rc = ops->get_sset_count(dev, ETH_SS_TEST);
+ if (rc >= 0)
+ info.testinfo_len = rc;
+ rc = ops->get_sset_count(dev, ETH_SS_STATS);
+ if (rc >= 0)
+ info.n_stats = rc;
+ rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
+ if (rc >= 0)
+ info.n_priv_flags = rc;
+ } else {
+ /* code path for obsolete hooks */
+
+ if (ops->self_test_count)
+ info.testinfo_len = ops->self_test_count(dev);
+ if (ops->get_stats_count)
+ info.n_stats = ops->get_stats_count(dev);
+ }
if (ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
if (ops->get_eeprom_len)
@@ -240,34 +272,6 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_wol(dev, &wol);
}
-static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GMSGLVL };
-
- if (!dev->ethtool_ops->get_msglevel)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_msglevel(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (!dev->ethtool_ops->set_msglevel)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- dev->ethtool_ops->set_msglevel(dev, edata.data);
- return 0;
-}
-
static int ethtool_nway_reset(struct net_device *dev)
{
if (!dev->ethtool_ops->nway_reset)
@@ -276,20 +280,6 @@ static int ethtool_nway_reset(struct net_device *dev)
return dev->ethtool_ops->nway_reset(dev);
}
-static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GLINK };
-
- if (!dev->ethtool_ops->get_link)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_link(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
@@ -457,48 +447,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
-static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GRXCSUM };
-
- if (!dev->ethtool_ops->get_rx_csum)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_rx_csum(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (!dev->ethtool_ops->set_rx_csum)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- dev->ethtool_ops->set_rx_csum(dev, edata.data);
- return 0;
-}
-
-static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GTXCSUM };
-
- if (!dev->ethtool_ops->get_tx_csum)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_tx_csum(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int __ethtool_set_sg(struct net_device *dev, u32 data)
{
int err;
@@ -537,20 +485,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
-static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GSG };
-
- if (!dev->ethtool_ops->get_sg)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_sg(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -568,20 +502,6 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
return __ethtool_set_sg(dev, edata.data);
}
-static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GTSO };
-
- if (!dev->ethtool_ops->get_tso)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_tso(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -598,18 +518,6 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tso(dev, edata.data);
}
-static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GUFO };
-
- if (!dev->ethtool_ops->get_ufo)
- return -EOPNOTSUPP;
- edata.data = dev->ethtool_ops->get_ufo(dev);
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -653,16 +561,27 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
struct ethtool_test test;
const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
- int ret;
+ int ret, test_len;
- if (!ops->self_test || !ops->self_test_count)
+ if (!ops->self_test)
+ return -EOPNOTSUPP;
+ if (!ops->get_sset_count && !ops->self_test_count)
return -EOPNOTSUPP;
+ if (ops->get_sset_count)
+ test_len = ops->get_sset_count(dev, ETH_SS_TEST);
+ else
+ /* code path for obsolete hook */
+ test_len = ops->self_test_count(dev);
+ if (test_len < 0)
+ return test_len;
+ WARN_ON(test_len == 0);
+
if (copy_from_user(&test, useraddr, sizeof(test)))
return -EFAULT;
- test.len = ops->self_test_count(dev);
- data = kmalloc(test.len * sizeof(u64), GFP_USER);
+ test.len = test_len;
+ data = kmalloc(test_len * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -694,19 +613,29 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
- switch (gstrings.string_set) {
- case ETH_SS_TEST:
- if (!ops->self_test_count)
- return -EOPNOTSUPP;
- gstrings.len = ops->self_test_count(dev);
- break;
- case ETH_SS_STATS:
- if (!ops->get_stats_count)
- return -EOPNOTSUPP;
- gstrings.len = ops->get_stats_count(dev);
- break;
- default:
- return -EINVAL;
+ if (ops->get_sset_count) {
+ ret = ops->get_sset_count(dev, gstrings.string_set);
+ if (ret < 0)
+ return ret;
+
+ gstrings.len = ret;
+ } else {
+ /* code path for obsolete hooks */
+
+ switch (gstrings.string_set) {
+ case ETH_SS_TEST:
+ if (!ops->self_test_count)
+ return -EOPNOTSUPP;
+ gstrings.len = ops->self_test_count(dev);
+ break;
+ case ETH_SS_STATS:
+ if (!ops->get_stats_count)
+ return -EOPNOTSUPP;
+ gstrings.len = ops->get_stats_count(dev);
+ break;
+ default:
+ return -EINVAL;
+ }
}
data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
@@ -746,16 +675,27 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
struct ethtool_stats stats;
const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
- int ret;
+ int ret, n_stats;
- if (!ops->get_ethtool_stats || !ops->get_stats_count)
+ if (!ops->get_ethtool_stats)
return -EOPNOTSUPP;
+ if (!ops->get_sset_count && !ops->get_stats_count)
+ return -EOPNOTSUPP;
+
+ if (ops->get_sset_count)
+ n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
+ else
+ /* code path for obsolete hook */
+ n_stats = ops->get_stats_count(dev);
+ if (n_stats < 0)
+ return n_stats;
+ WARN_ON(n_stats == 0);
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
- stats.n_stats = ops->get_stats_count(dev);
- data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+ stats.n_stats = n_stats;
+ data = kmalloc(n_stats * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -777,41 +717,71 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
{
struct ethtool_perm_addr epaddr;
- u8 *data;
- int ret;
- if (!dev->ethtool_ops->get_perm_addr)
+ if (copy_from_user(&epaddr, useraddr, sizeof(epaddr)))
+ return -EFAULT;
+
+ if (epaddr.size < dev->addr_len)
+ return -ETOOSMALL;
+ epaddr.size = dev->addr_len;
+
+ if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+ return -EFAULT;
+ useraddr += sizeof(epaddr);
+ if (copy_to_user(useraddr, dev->perm_addr, epaddr.size))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
+ u32 cmd, u32 (*actor)(struct net_device *))
+{
+ struct ethtool_value edata = { cmd };
+
+ if (!actor)
return -EOPNOTSUPP;
- if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+ edata.data = actor(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
+ return 0;
+}
- data = kmalloc(epaddr.size, GFP_USER);
- if (!data)
- return -ENOMEM;
+static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr,
+ void (*actor)(struct net_device *, u32))
+{
+ struct ethtool_value edata;
- ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
- if (ret)
- return ret;
+ if (!actor)
+ return -EOPNOTSUPP;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
- goto out;
- useraddr += sizeof(epaddr);
- if (copy_to_user(useraddr, data, epaddr.size))
- goto out;
- ret = 0;
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
- out:
- kfree(data);
- return ret;
+ actor(dev, edata.data);
+ return 0;
+}
+
+static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
+ int (*actor)(struct net_device *, u32))
+{
+ struct ethtool_value edata;
+
+ if (!actor)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ return actor(dev, edata.data);
}
/* The main entry point in this file. Called from net/core/dev.c */
-int dev_ethtool(struct ifreq *ifr)
+int dev_ethtool(struct net *net, struct ifreq *ifr)
{
- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
@@ -821,7 +791,7 @@ int dev_ethtool(struct ifreq *ifr)
return -ENODEV;
if (!dev->ethtool_ops)
- goto ioctl;
+ return -EOPNOTSUPP;
if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
return -EFAULT;
@@ -841,6 +811,8 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_GPERMADDR:
case ETHTOOL_GUFO:
case ETHTOOL_GGSO:
+ case ETHTOOL_GFLAGS:
+ case ETHTOOL_GPFLAGS:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -873,16 +845,19 @@ int dev_ethtool(struct ifreq *ifr)
rc = ethtool_set_wol(dev, useraddr);
break;
case ETHTOOL_GMSGLVL:
- rc = ethtool_get_msglevel(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_msglevel);
break;
case ETHTOOL_SMSGLVL:
- rc = ethtool_set_msglevel(dev, useraddr);
+ rc = ethtool_set_value_void(dev, useraddr,
+ dev->ethtool_ops->set_msglevel);
break;
case ETHTOOL_NWAY_RST:
rc = ethtool_nway_reset(dev);
break;
case ETHTOOL_GLINK:
- rc = ethtool_get_link(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_link);
break;
case ETHTOOL_GEEPROM:
rc = ethtool_get_eeprom(dev, useraddr);
@@ -909,25 +884,36 @@ int dev_ethtool(struct ifreq *ifr)
rc = ethtool_set_pauseparam(dev, useraddr);
break;
case ETHTOOL_GRXCSUM:
- rc = ethtool_get_rx_csum(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_rx_csum);
break;
case ETHTOOL_SRXCSUM:
- rc = ethtool_set_rx_csum(dev, useraddr);
+ rc = ethtool_set_value(dev, useraddr,
+ dev->ethtool_ops->set_rx_csum);
break;
case ETHTOOL_GTXCSUM:
- rc = ethtool_get_tx_csum(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_tx_csum ?
+ dev->ethtool_ops->get_tx_csum :
+ ethtool_op_get_tx_csum));
break;
case ETHTOOL_STXCSUM:
rc = ethtool_set_tx_csum(dev, useraddr);
break;
case ETHTOOL_GSG:
- rc = ethtool_get_sg(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_sg ?
+ dev->ethtool_ops->get_sg :
+ ethtool_op_get_sg));
break;
case ETHTOOL_SSG:
rc = ethtool_set_sg(dev, useraddr);
break;
case ETHTOOL_GTSO:
- rc = ethtool_get_tso(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_tso ?
+ dev->ethtool_ops->get_tso :
+ ethtool_op_get_tso));
break;
case ETHTOOL_STSO:
rc = ethtool_set_tso(dev, useraddr);
@@ -948,7 +934,10 @@ int dev_ethtool(struct ifreq *ifr)
rc = ethtool_get_perm_addr(dev, useraddr);
break;
case ETHTOOL_GUFO:
- rc = ethtool_get_ufo(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_ufo ?
+ dev->ethtool_ops->get_ufo :
+ ethtool_op_get_ufo));
break;
case ETHTOOL_SUFO:
rc = ethtool_set_ufo(dev, useraddr);
@@ -959,8 +948,24 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_SGSO:
rc = ethtool_set_gso(dev, useraddr);
break;
+ case ETHTOOL_GFLAGS:
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_flags);
+ break;
+ case ETHTOOL_SFLAGS:
+ rc = ethtool_set_value(dev, useraddr,
+ dev->ethtool_ops->set_flags);
+ break;
+ case ETHTOOL_GPFLAGS:
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_priv_flags);
+ break;
+ case ETHTOOL_SPFLAGS:
+ rc = ethtool_set_value(dev, useraddr,
+ dev->ethtool_ops->set_priv_flags);
+ break;
default:
- rc = -EOPNOTSUPP;
+ rc = -EOPNOTSUPP;
}
if (dev->ethtool_ops->complete)
@@ -970,20 +975,9 @@ int dev_ethtool(struct ifreq *ifr)
netdev_features_change(dev);
return rc;
-
- ioctl:
- /* Keep existing behaviour for the moment. */
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (dev->do_ioctl)
- return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
- return -EOPNOTSUPP;
}
-EXPORT_SYMBOL(dev_ethtool);
EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
EXPORT_SYMBOL(ethtool_op_get_sg);
EXPORT_SYMBOL(ethtool_op_get_tso);
EXPORT_SYMBOL(ethtool_op_get_tx_csum);
@@ -994,3 +988,5 @@ EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
EXPORT_SYMBOL(ethtool_op_set_ufo);
EXPORT_SYMBOL(ethtool_op_get_ufo);
+EXPORT_SYMBOL(ethtool_op_set_flags);
+EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 8c5474e1668..13de6f53f09 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -11,6 +11,8 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
#include <net/fib_rules.h>
static LIST_HEAD(rules_ops);
@@ -82,7 +84,7 @@ static void cleanup_ops(struct fib_rules_ops *ops)
{
struct fib_rule *rule, *tmp;
- list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+ list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
list_del_rcu(&rule->list);
fib_rule_put(rule);
}
@@ -137,7 +139,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
rcu_read_lock();
- list_for_each_entry_rcu(rule, ops->rules_list, list) {
+ list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
if (!fib_rule_match(rule, ops, fl, flags))
continue;
@@ -197,6 +199,7 @@ errout:
static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r, *last = NULL;
@@ -234,7 +237,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
rule->ifindex = -1;
nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(rule->ifname);
+ dev = __dev_get_by_name(net, rule->ifname);
if (dev)
rule->ifindex = dev->ifindex;
}
@@ -268,7 +271,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (rule->target <= rule->pref)
goto errout_free;
- list_for_each_entry(r, ops->rules_list, list) {
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref == rule->target) {
rule->ctarget = r;
break;
@@ -284,7 +287,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (err < 0)
goto errout_free;
- list_for_each_entry(r, ops->rules_list, list) {
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
last = r;
@@ -297,7 +300,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
* There are unresolved goto rules in the list, check if
* any of them are pointing to this new rule.
*/
- list_for_each_entry(r, ops->rules_list, list) {
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
r->target == rule->pref) {
BUG_ON(r->ctarget != NULL);
@@ -317,7 +320,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (last)
list_add_rcu(&rule->list, &last->list);
else
- list_add_rcu(&rule->list, ops->rules_list);
+ list_add_rcu(&rule->list, &ops->rules_list);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
@@ -356,7 +359,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (err < 0)
goto errout;
- list_for_each_entry(rule, ops->rules_list, list) {
+ list_for_each_entry(rule, &ops->rules_list, list) {
if (frh->action && (frh->action != rule->action))
continue;
@@ -399,7 +402,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
* actually been added.
*/
if (ops->nr_goto_rules > 0) {
- list_for_each_entry(tmp, ops->rules_list, list) {
+ list_for_each_entry(tmp, &ops->rules_list, list) {
if (tmp->ctarget == rule) {
rcu_assign_pointer(tmp->ctarget, NULL);
ops->unresolved_rules++;
@@ -495,7 +498,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
int idx = 0;
struct fib_rule *rule;
- list_for_each_entry(rule, ops->rules_list, list) {
+ list_for_each_entry(rule, &ops->rules_list, list) {
if (idx < cb->args[1])
goto skip;
@@ -596,18 +599,21 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
struct net_device *dev = ptr;
struct fib_rules_ops *ops;
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
ASSERT_RTNL();
rcu_read_lock();
switch (event) {
case NETDEV_REGISTER:
list_for_each_entry(ops, &rules_ops, list)
- attach_rules(ops->rules_list, dev);
+ attach_rules(&ops->rules_list, dev);
break;
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &rules_ops, list)
- detach_rules(ops->rules_list, dev);
+ detach_rules(&ops->rules_list, dev);
break;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 051430545a0..0ab5234b17d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -350,7 +350,7 @@ static int __init flow_cache_init(void)
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL, NULL);
+ NULL);
flow_hash_shift = 10;
flow_lwm = 2 * flow_hash_size;
flow_hwm = 4 * flow_hash_size;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cc84d8d8a3c..590a767b029 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -79,27 +79,27 @@
struct gen_estimator
{
- struct gen_estimator *next;
+ struct list_head list;
struct gnet_stats_basic *bstats;
struct gnet_stats_rate_est *rate_est;
spinlock_t *stats_lock;
- unsigned interval;
int ewma_log;
u64 last_bytes;
u32 last_packets;
u32 avpps;
u32 avbps;
+ struct rcu_head e_rcu;
};
struct gen_estimator_head
{
struct timer_list timer;
- struct gen_estimator *list;
+ struct list_head list;
};
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
-/* Estimator array lock */
+/* Protects against NULL dereference */
static DEFINE_RWLOCK(est_lock);
static void est_timer(unsigned long arg)
@@ -107,13 +107,17 @@ static void est_timer(unsigned long arg)
int idx = (int)arg;
struct gen_estimator *e;
- read_lock(&est_lock);
- for (e = elist[idx].list; e; e = e->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &elist[idx].list, list) {
u64 nbytes;
u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
+ read_lock(&est_lock);
+ if (e->bstats == NULL)
+ goto skip;
+
nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
rate = (nbytes - e->last_bytes)<<(7 - idx);
@@ -125,12 +129,14 @@ static void est_timer(unsigned long arg)
e->last_packets = npackets;
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
e->rate_est->pps = (e->avpps+0x1FF)>>10;
+skip:
+ read_unlock(&est_lock);
spin_unlock(e->stats_lock);
}
- if (elist[idx].list != NULL)
+ if (!list_empty(&elist[idx].list))
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
- read_unlock(&est_lock);
+ rcu_read_unlock();
}
/**
@@ -147,12 +153,17 @@ static void est_timer(unsigned long arg)
* &rate_est with the statistics lock grabed during this period.
*
* Returns 0 on success or a negative error code.
+ *
+ * NOTE: Called under rtnl_mutex
*/
int gen_new_estimator(struct gnet_stats_basic *bstats,
- struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
+ struct gnet_stats_rate_est *rate_est,
+ spinlock_t *stats_lock,
+ struct rtattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = RTA_DATA(opt);
+ int idx;
if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL;
@@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
if (est == NULL)
return -ENOBUFS;
- est->interval = parm->interval + 2;
+ idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
@@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
- est->next = elist[est->interval].list;
- if (est->next == NULL) {
- init_timer(&elist[est->interval].timer);
- elist[est->interval].timer.data = est->interval;
- elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
- elist[est->interval].timer.function = est_timer;
- add_timer(&elist[est->interval].timer);
+ if (!elist[idx].timer.function) {
+ INIT_LIST_HEAD(&elist[idx].list);
+ setup_timer(&elist[idx].timer, est_timer, idx);
}
- write_lock_bh(&est_lock);
- elist[est->interval].list = est;
- write_unlock_bh(&est_lock);
+
+ if (list_empty(&elist[idx].list))
+ mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+
+ list_add_rcu(&est->list, &elist[idx].list);
return 0;
}
+static void __gen_kill_estimator(struct rcu_head *head)
+{
+ struct gen_estimator *e = container_of(head,
+ struct gen_estimator, e_rcu);
+ kfree(e);
+}
+
/**
* gen_kill_estimator - remove a rate estimator
* @bstats: basic statistics
@@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
*
* Removes the rate estimator specified by &bstats and &rate_est
* and deletes the timer.
+ *
+ * NOTE: Called under rtnl_mutex
*/
void gen_kill_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est)
{
int idx;
- struct gen_estimator *est, **pest;
+ struct gen_estimator *e, *n;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
- int killed = 0;
- pest = &elist[idx].list;
- while ((est=*pest) != NULL) {
- if (est->rate_est != rate_est || est->bstats != bstats) {
- pest = &est->next;
+
+ /* Skip non initialized indexes */
+ if (!elist[idx].timer.function)
+ continue;
+
+ list_for_each_entry_safe(e, n, &elist[idx].list, list) {
+ if (e->rate_est != rate_est || e->bstats != bstats)
continue;
- }
write_lock_bh(&est_lock);
- *pest = est->next;
+ e->bstats = NULL;
write_unlock_bh(&est_lock);
- kfree(est);
- killed++;
+ list_del_rcu(&e->list);
+ call_rcu(&e->e_rcu, __gen_kill_estimator);
}
- if (killed && elist[idx].list == NULL)
- del_timer(&elist[idx].timer);
}
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9df26a07f06..c52df858d0b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -25,6 +25,7 @@
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
+#include <net/net_namespace.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
@@ -33,6 +34,7 @@
#include <linux/rtnetlink.h>
#include <linux/random.h>
#include <linux/string.h>
+#include <linux/log2.h>
#define NEIGH_DEBUG 1
@@ -54,9 +56,8 @@
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-#ifdef CONFIG_ARPD
-static void neigh_app_notify(struct neighbour *n);
-#endif
+static void __neigh_notify(struct neighbour *n, int type, int flags);
+static void neigh_update_notify(struct neighbour *neigh);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
@@ -104,6 +105,15 @@ static int neigh_blackhole(struct sk_buff *skb)
return -ENETDOWN;
}
+static void neigh_cleanup_and_release(struct neighbour *neigh)
+{
+ if (neigh->parms->neigh_cleanup)
+ neigh->parms->neigh_cleanup(neigh);
+
+ __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ neigh_release(neigh);
+}
+
/*
* It is random distribution in the interval (1/2)*base...(3/2)*base.
* It corresponds to default IPv6 settings and is not overridable,
@@ -140,9 +150,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
+ neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
@@ -213,9 +221,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
NEIGH_PRINTK2("neigh %p is stray.\n", n);
}
write_unlock(&n->lock);
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
+ neigh_cleanup_and_release(n);
}
}
}
@@ -311,7 +317,7 @@ static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
- BUG_ON(new_entries & (new_entries - 1));
+ BUG_ON(!is_power_of_2(new_entries));
new_hash = neigh_hash_alloc(new_entries);
if (!new_hash)
return;
@@ -676,9 +682,7 @@ static void neigh_periodic_timer(unsigned long arg)
*np = n->next;
n->dead = 1;
write_unlock(&n->lock);
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
+ neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
@@ -827,13 +831,10 @@ static void neigh_timer_handler(unsigned long arg)
out:
write_unlock(&neigh->lock);
}
+
if (notify)
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
+ neigh_update_notify(neigh);
-#ifdef CONFIG_ARPD
- if (notify && neigh->parms->app_probes)
- neigh_app_notify(neigh);
-#endif
neigh_release(neigh);
}
@@ -896,8 +897,8 @@ out_unlock_bh:
static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
- void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
- neigh->dev->header_cache_update;
+ void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
+ = neigh->dev->header_ops->cache_update;
if (update) {
for (hh = neigh->hh; hh; hh = hh->hh_next) {
@@ -1062,11 +1063,8 @@ out:
write_unlock_bh(&neigh->lock);
if (notify)
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
-#ifdef CONFIG_ARPD
- if (notify && neigh->parms->app_probes)
- neigh_app_notify(neigh);
-#endif
+ neigh_update_notify(neigh);
+
return err;
}
@@ -1097,7 +1095,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 0);
hh->hh_next = NULL;
- if (dev->hard_header_cache(n, hh)) {
+
+ if (dev->header_ops->cache(n, hh)) {
kfree(hh);
hh = NULL;
} else {
@@ -1127,10 +1126,9 @@ int neigh_compat_output(struct sk_buff *skb)
__skb_pull(skb, skb_network_offset(skb));
- if (dev->hard_header &&
- dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
- skb->len) < 0 &&
- dev->rebuild_header(skb))
+ if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+ skb->len) < 0 &&
+ dev->header_ops->rebuild(skb))
return 0;
return dev_queue_xmit(skb);
@@ -1152,17 +1150,17 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
- if (dev->hard_header_cache && !dst->hh) {
+ if (dev->header_ops->cache && !dst->hh) {
write_lock_bh(&neigh->lock);
if (!dst->hh)
neigh_hh_init(neigh, dst, dst->ops->protocol);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
write_unlock_bh(&neigh->lock);
} else {
read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
}
if (err >= 0)
@@ -1193,8 +1191,8 @@ int neigh_connected_output(struct sk_buff *skb)
__skb_pull(skb, skb_network_offset(skb));
read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
@@ -1347,13 +1345,13 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
tbl->kmem_cachep =
kmem_cache_create(tbl->id, tbl->entry_size, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL, NULL);
+ NULL);
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS
- tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
+ tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
if (!tbl->pde)
panic("cannot create neighbour proc dir entry");
tbl->pde->proc_fops = &neigh_stat_seq_fops;
@@ -1443,6 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ndmsg *ndm;
struct nlattr *dst_attr;
struct neigh_table *tbl;
@@ -1458,7 +1457,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(ndm->ndm_ifindex);
+ dev = dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -1508,6 +1507,7 @@ out:
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
@@ -1524,7 +1524,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(ndm->ndm_ifindex);
+ dev = dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -1999,6 +1999,11 @@ nla_put_failure:
return -EMSGSIZE;
}
+static void neigh_update_notify(struct neighbour *neigh)
+{
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+}
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
@@ -2094,11 +2099,8 @@ void __neigh_for_each_release(struct neigh_table *tbl,
} else
np = &n->next;
write_unlock(&n->lock);
- if (release) {
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
- }
+ if (release)
+ neigh_cleanup_and_release(n);
}
}
}
@@ -2421,7 +2423,6 @@ static const struct file_operations neigh_stat_seq_fops = {
#endif /* CONFIG_PROC_FS */
-#ifdef CONFIG_ARPD
static inline size_t neigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
@@ -2453,16 +2454,11 @@ errout:
rtnl_set_sk_err(RTNLGRP_NEIGH, err);
}
+#ifdef CONFIG_ARPD
void neigh_app_ns(struct neighbour *n)
{
__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
-
-static void neigh_app_notify(struct neighbour *n)
-{
- __neigh_notify(n, RTM_NEWNEIGH, 0);
-}
-
#endif /* CONFIG_ARPD */
#ifdef CONFIG_SYSCTL
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c19b0646d7..6628e457ddc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -18,6 +18,7 @@
#include <linux/wireless.h>
#include <net/iw_handler.h>
+#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
@@ -216,20 +217,6 @@ static ssize_t store_tx_queue_len(struct device *dev,
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
-NETDEVICE_SHOW(weight, fmt_dec);
-
-static int change_weight(struct net_device *net, unsigned long new_weight)
-{
- net->weight = new_weight;
- return 0;
-}
-
-static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
-{
- return netdev_store(dev, attr, buf, len, change_weight);
-}
-
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
__ATTR(iflink, S_IRUGO, show_iflink, NULL),
@@ -246,7 +233,6 @@ static struct device_attribute net_class_attributes[] = {
__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len),
- __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
{}
};
@@ -407,29 +393,25 @@ static struct attribute_group wireless_group = {
};
#endif
+#endif /* CONFIG_SYSFS */
+
#ifdef CONFIG_HOTPLUG
-static int netdev_uevent(struct device *d, char **envp,
- int num_envp, char *buf, int size)
+static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
{
struct net_device *dev = to_net_dev(d);
- int retval, len = 0, i = 0;
+ int retval;
/* pass interface to uevent. */
- retval = add_uevent_var(envp, num_envp, &i,
- buf, size, &len,
- "INTERFACE=%s", dev->name);
+ retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
if (retval)
goto exit;
/* pass ifindex to uevent.
* ifindex is useful as it won't change (interface name may change)
* and is what RtNetlink uses natively. */
- retval = add_uevent_var(envp, num_envp, &i,
- buf, size, &len,
- "IFINDEX=%d", dev->ifindex);
+ retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
exit:
- envp[i] = NULL;
return retval;
}
#endif
@@ -450,7 +432,9 @@ static void netdev_release(struct device *d)
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
+#ifdef CONFIG_SYSFS
.dev_attrs = net_class_attributes,
+#endif /* CONFIG_SYSFS */
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
@@ -459,7 +443,7 @@ static struct class net_class = {
/* Delete sysfs entries but hold kobject reference until after all
* netdev references are gone.
*/
-void netdev_unregister_sysfs(struct net_device * net)
+void netdev_unregister_kobject(struct net_device * net)
{
struct device *dev = &(net->dev);
@@ -468,7 +452,7 @@ void netdev_unregister_sysfs(struct net_device * net)
}
/* Create sysfs entries for network device. */
-int netdev_register_sysfs(struct net_device *net)
+int netdev_register_kobject(struct net_device *net)
{
struct device *dev = &(net->dev);
struct attribute_group **groups = net->sysfs_groups;
@@ -481,6 +465,7 @@ int netdev_register_sysfs(struct net_device *net)
BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
+#ifdef CONFIG_SYSFS
if (net->get_stats)
*groups++ = &netstat_group;
@@ -488,11 +473,12 @@ int netdev_register_sysfs(struct net_device *net)
if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
*groups++ = &wireless_group;
#endif
+#endif /* CONFIG_SYSFS */
return device_add(dev);
}
-int netdev_sysfs_init(void)
+int netdev_kobject_init(void)
{
return class_register(&net_class);
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
new file mode 100644
index 00000000000..6f71db8c442
--- /dev/null
+++ b/net/core/net_namespace.c
@@ -0,0 +1,317 @@
+#include <linux/workqueue.h>
+#include <linux/rtnetlink.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <net/net_namespace.h>
+
+/*
+ * Our network namespace constructor/destructor lists
+ */
+
+static LIST_HEAD(pernet_list);
+static struct list_head *first_device = &pernet_list;
+static DEFINE_MUTEX(net_mutex);
+
+LIST_HEAD(net_namespace_list);
+
+static struct kmem_cache *net_cachep;
+
+struct net init_net;
+EXPORT_SYMBOL_GPL(init_net);
+
+static struct net *net_alloc(void)
+{
+ return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+}
+
+static void net_free(struct net *net)
+{
+ if (!net)
+ return;
+
+ if (unlikely(atomic_read(&net->use_count) != 0)) {
+ printk(KERN_EMERG "network namespace not free! Usage: %d\n",
+ atomic_read(&net->use_count));
+ return;
+ }
+
+ kmem_cache_free(net_cachep, net);
+}
+
+static void cleanup_net(struct work_struct *work)
+{
+ struct pernet_operations *ops;
+ struct net *net;
+
+ net = container_of(work, struct net, work);
+
+ mutex_lock(&net_mutex);
+
+ /* Don't let anyone else find us. */
+ rtnl_lock();
+ list_del(&net->list);
+ rtnl_unlock();
+
+ /* Run all of the network namespace exit methods */
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit)
+ ops->exit(net);
+ }
+
+ mutex_unlock(&net_mutex);
+
+ /* Ensure there are no outstanding rcu callbacks using this
+ * network namespace.
+ */
+ rcu_barrier();
+
+ /* Finally it is safe to free my network namespace structure */
+ net_free(net);
+}
+
+
+void __put_net(struct net *net)
+{
+ /* Cleanup the network namespace in process context */
+ INIT_WORK(&net->work, cleanup_net);
+ schedule_work(&net->work);
+}
+EXPORT_SYMBOL_GPL(__put_net);
+
+/*
+ * setup_net runs the initializers for the network namespace object.
+ */
+static int setup_net(struct net *net)
+{
+ /* Must be called with net_mutex held */
+ struct pernet_operations *ops;
+ int error;
+
+ atomic_set(&net->count, 1);
+ atomic_set(&net->use_count, 0);
+
+ error = 0;
+ list_for_each_entry(ops, &pernet_list, list) {
+ if (ops->init) {
+ error = ops->init(net);
+ if (error < 0)
+ goto out_undo;
+ }
+ }
+out:
+ return error;
+
+out_undo:
+ /* Walk through the list backwards calling the exit functions
+ * for the pernet modules whose init functions did not fail.
+ */
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+ if (ops->exit)
+ ops->exit(net);
+ }
+ goto out;
+}
+
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+ struct net *new_net = NULL;
+ int err;
+
+ get_net(old_net);
+
+ if (!(flags & CLONE_NEWNET))
+ return old_net;
+
+#ifndef CONFIG_NET_NS
+ return ERR_PTR(-EINVAL);
+#endif
+
+ err = -ENOMEM;
+ new_net = net_alloc();
+ if (!new_net)
+ goto out;
+
+ mutex_lock(&net_mutex);
+ err = setup_net(new_net);
+ if (err)
+ goto out_unlock;
+
+ rtnl_lock();
+ list_add_tail(&new_net->list, &net_namespace_list);
+ rtnl_unlock();
+
+
+out_unlock:
+ mutex_unlock(&net_mutex);
+out:
+ put_net(old_net);
+ if (err) {
+ net_free(new_net);
+ new_net = ERR_PTR(err);
+ }
+ return new_net;
+}
+
+static int __init net_ns_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
+ net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
+ SMP_CACHE_BYTES,
+ SLAB_PANIC, NULL);
+ mutex_lock(&net_mutex);
+ err = setup_net(&init_net);
+
+ rtnl_lock();
+ list_add_tail(&init_net.list, &net_namespace_list);
+ rtnl_unlock();
+
+ mutex_unlock(&net_mutex);
+ if (err)
+ panic("Could not setup the initial network namespace");
+
+ return 0;
+}
+
+pure_initcall(net_ns_init);
+
+static int register_pernet_operations(struct list_head *list,
+ struct pernet_operations *ops)
+{
+ struct net *net, *undo_net;
+ int error;
+
+ error = 0;
+ list_add_tail(&ops->list, list);
+ for_each_net(net) {
+ if (ops->init) {
+ error = ops->init(net);
+ if (error)
+ goto out_undo;
+ }
+ }
+out:
+ return error;
+
+out_undo:
+ /* If I have an error cleanup all namespaces I initialized */
+ list_del(&ops->list);
+ for_each_net(undo_net) {
+ if (undo_net == net)
+ goto undone;
+ if (ops->exit)
+ ops->exit(undo_net);
+ }
+undone:
+ goto out;
+}
+
+static void unregister_pernet_operations(struct pernet_operations *ops)
+{
+ struct net *net;
+
+ list_del(&ops->list);
+ for_each_net(net)
+ if (ops->exit)
+ ops->exit(net);
+}
+
+/**
+ * register_pernet_subsys - register a network namespace subsystem
+ * @ops: pernet operations structure for the subsystem
+ *
+ * Register a subsystem which has init and exit functions
+ * that are called when network namespaces are created and
+ * destroyed respectively.
+ *
+ * When registered all network namespace init functions are
+ * called for every existing network namespace. Allowing kernel
+ * modules to have a race free view of the set of network namespaces.
+ *
+ * When a new network namespace is created all of the init
+ * methods are called in the order in which they were registered.
+ *
+ * When a network namespace is destroyed all of the exit methods
+ * are called in the reverse of the order with which they were
+ * registered.
+ */
+int register_pernet_subsys(struct pernet_operations *ops)
+{
+ int error;
+ mutex_lock(&net_mutex);
+ error = register_pernet_operations(first_device, ops);
+ mutex_unlock(&net_mutex);
+ return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_subsys);
+
+/**
+ * unregister_pernet_subsys - unregister a network namespace subsystem
+ * @ops: pernet operations structure to manipulate
+ *
+ * Remove the pernet operations structure from the list to be
+ * used when network namespaces are created or destoryed. In
+ * addition run the exit method for all existing network
+ * namespaces.
+ */
+void unregister_pernet_subsys(struct pernet_operations *module)
+{
+ mutex_lock(&net_mutex);
+ unregister_pernet_operations(module);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
+
+/**
+ * register_pernet_device - register a network namespace device
+ * @ops: pernet operations structure for the subsystem
+ *
+ * Register a device which has init and exit functions
+ * that are called when network namespaces are created and
+ * destroyed respectively.
+ *
+ * When registered all network namespace init functions are
+ * called for every existing network namespace. Allowing kernel
+ * modules to have a race free view of the set of network namespaces.
+ *
+ * When a new network namespace is created all of the init
+ * methods are called in the order in which they were registered.
+ *
+ * When a network namespace is destroyed all of the exit methods
+ * are called in the reverse of the order with which they were
+ * registered.
+ */
+int register_pernet_device(struct pernet_operations *ops)
+{
+ int error;
+ mutex_lock(&net_mutex);
+ error = register_pernet_operations(&pernet_list, ops);
+ if (!error && (first_device == &pernet_list))
+ first_device = &ops->list;
+ mutex_unlock(&net_mutex);
+ return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_device);
+
+/**
+ * unregister_pernet_device - unregister a network namespace netdevice
+ * @ops: pernet operations structure to manipulate
+ *
+ * Remove the pernet operations structure from the list to be
+ * used when network namespaces are created or destoryed. In
+ * addition run the exit method for all existing network
+ * namespaces.
+ */
+void unregister_pernet_device(struct pernet_operations *ops)
+{
+ mutex_lock(&net_mutex);
+ if (&ops->list == first_device)
+ first_device = first_device->next;
+ unregister_pernet_operations(ops);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1b26aa572..95daba62496 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
static void poll_napi(struct netpoll *np)
{
struct netpoll_info *npinfo = np->dev->npinfo;
+ struct napi_struct *napi;
int budget = 16;
- if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
- npinfo->poll_owner != smp_processor_id() &&
- spin_trylock(&npinfo->poll_lock)) {
- npinfo->rx_flags |= NETPOLL_RX_DROP;
- atomic_inc(&trapped);
+ list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
+ napi->poll_owner != smp_processor_id() &&
+ spin_trylock(&napi->poll_lock)) {
+ npinfo->rx_flags |= NETPOLL_RX_DROP;
+ atomic_inc(&trapped);
- np->dev->poll(np->dev, &budget);
+ napi->poll(napi, budget);
- atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
- spin_unlock(&npinfo->poll_lock);
+ atomic_dec(&trapped);
+ npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+ spin_unlock(&napi->poll_lock);
+ }
}
}
@@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np)
/* Process pending work on NIC */
np->dev->poll_controller(np->dev);
- if (np->dev->poll)
+ if (!list_empty(&np->dev->napi_list))
poll_napi(np);
service_arp_queue(np->dev->npinfo);
@@ -233,6 +236,17 @@ repeat:
return skb;
}
+static int netpoll_owner_active(struct net_device *dev)
+{
+ struct napi_struct *napi;
+
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (napi->poll_owner == smp_processor_id())
+ return 1;
+ }
+ return 0;
+}
+
static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status = NETDEV_TX_BUSY;
@@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
/* don't get messages out of order, and no recursion */
- if (skb_queue_len(&npinfo->txq) == 0 &&
- npinfo->poll_owner != smp_processor_id()) {
+ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
unsigned long flags;
local_irq_save(flags);
@@ -402,11 +415,9 @@ static void arp_reply(struct sk_buff *skb)
send_skb->protocol = htons(ETH_P_ARP);
/* Fill the device header for the ARP frame */
-
- if (np->dev->hard_header &&
- np->dev->hard_header(send_skb, skb->dev, ptype,
- sha, np->local_mac,
- send_skb->len) < 0) {
+ if (dev_hard_header(send_skb, skb->dev, ptype,
+ sha, np->local_mac,
+ send_skb->len) < 0) {
kfree_skb(send_skb);
return;
}
@@ -519,6 +530,23 @@ out:
return 0;
}
+void netpoll_print_options(struct netpoll *np)
+{
+ DECLARE_MAC_BUF(mac);
+ printk(KERN_INFO "%s: local port %d\n",
+ np->name, np->local_port);
+ printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+ np->name, HIPQUAD(np->local_ip));
+ printk(KERN_INFO "%s: interface %s\n",
+ np->name, np->dev_name);
+ printk(KERN_INFO "%s: remote port %d\n",
+ np->name, np->remote_port);
+ printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
+ np->name, HIPQUAD(np->remote_ip));
+ printk(KERN_INFO "%s: remote ethernet address %s\n",
+ np->name, print_mac(mac, np->remote_mac));
+}
+
int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
@@ -531,7 +559,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur = delim;
}
cur++;
- printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
if (*cur != '/') {
if ((delim = strchr(cur, '/')) == NULL)
@@ -539,9 +566,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
*delim = 0;
np->local_ip = ntohl(in_aton(cur));
cur = delim;
-
- printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
- np->name, HIPQUAD(np->local_ip));
}
cur++;
@@ -555,8 +579,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
}
cur++;
- printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
-
if (*cur != '@') {
/* dst port */
if ((delim = strchr(cur, '@')) == NULL)
@@ -566,7 +588,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur = delim;
}
cur++;
- printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
/* dst ip */
if ((delim = strchr(cur, '/')) == NULL)
@@ -575,9 +596,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
np->remote_ip = ntohl(in_aton(cur));
cur = delim + 1;
- printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
- np->name, HIPQUAD(np->remote_ip));
-
if (*cur != 0) {
/* MAC address */
if ((delim = strchr(cur, ':')) == NULL)
@@ -608,15 +626,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
np->remote_mac[5] = simple_strtol(cur, NULL, 16);
}
- printk(KERN_INFO "%s: remote ethernet address "
- "%02x:%02x:%02x:%02x:%02x:%02x\n",
- np->name,
- np->remote_mac[0],
- np->remote_mac[1],
- np->remote_mac[2],
- np->remote_mac[3],
- np->remote_mac[4],
- np->remote_mac[5]);
+ netpoll_print_options(np);
return 0;
@@ -635,7 +645,7 @@ int netpoll_setup(struct netpoll *np)
int err;
if (np->dev_name)
- ndev = dev_get_by_name(np->dev_name);
+ ndev = dev_get_by_name(&init_net, np->dev_name);
if (!ndev) {
printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
np->name, np->dev_name);
@@ -652,8 +662,6 @@ int netpoll_setup(struct netpoll *np)
npinfo->rx_flags = 0;
npinfo->rx_np = NULL;
- spin_lock_init(&npinfo->poll_lock);
- npinfo->poll_owner = -1;
spin_lock_init(&npinfo->rx_lock);
skb_queue_head_init(&npinfo->arp_tx);
@@ -820,6 +828,7 @@ void netpoll_set_trap(int trap)
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
+EXPORT_SYMBOL(netpoll_print_options);
EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bca787fdbc5..2100c734b10 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -111,6 +111,9 @@
*
* 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com>
*
+ * Fixed src_mac command to set source mac of packet to value specified in
+ * command by Adit Ranadive <adit.262@gmail.com>
+ *
*/
#include <linux/sys.h>
#include <linux/types.h>
@@ -149,6 +152,7 @@
#include <linux/wait.h>
#include <linux/etherdevice.h>
#include <linux/kthread.h>
+#include <net/net_namespace.h>
#include <net/checksum.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -164,7 +168,7 @@
#include <asm/div64.h> /* do_div */
#include <asm/timex.h>
-#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n"
+#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n"
/* The buckets are exponential in 'width' */
#define LAT_BUCKETS_MAX 32
@@ -186,6 +190,7 @@
#define F_SVID_RND (1<<10) /* Random SVLAN ID */
#define F_FLOW_SEQ (1<<11) /* Sequential flows */
#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
+#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
/* Thread control flag bits */
#define T_TERMINATE (1<<0)
@@ -328,6 +333,7 @@ struct pktgen_dev {
__be32 cur_daddr;
__u16 cur_udp_dst;
__u16 cur_udp_src;
+ __u16 cur_queue_map;
__u32 cur_pkt_size;
__u8 hh[14];
@@ -355,6 +361,10 @@ struct pktgen_dev {
unsigned lflow; /* Flow length (config) */
unsigned nflows; /* accumulated flows (stats) */
unsigned curfl; /* current sequenced flow (state)*/
+
+ u16 queue_map_min;
+ u16 queue_map_max;
+
#ifdef CONFIG_XFRM
__u8 ipsmode; /* IPSEC mode (config) */
__u8 ipsproto; /* IPSEC type (config) */
@@ -375,12 +385,10 @@ struct pktgen_thread {
struct list_head th_list;
struct task_struct *tsk;
char result[512];
- u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
/* Field for thread to receive "posted" events terminate, stop ifs etc. */
u32 control;
- int pid;
int cpu;
wait_queue_head_t queue;
@@ -567,7 +575,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf,
pktgen_run_all_threads();
else
- printk("pktgen: Unknown command: %s\n", data);
+ printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
err = count;
@@ -591,11 +599,11 @@ static const struct file_operations pktgen_fops = {
static int pktgen_if_show(struct seq_file *seq, void *v)
{
- int i;
struct pktgen_dev *pkt_dev = seq->private;
__u64 sa;
__u64 stopped;
__u64 now = getCurUs();
+ DECLARE_MAC_BUF(mac);
seq_printf(seq,
"Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
@@ -611,6 +619,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows,
pkt_dev->lflow);
+ seq_printf(seq,
+ " queue_map_min: %u queue_map_max: %u\n",
+ pkt_dev->queue_map_min,
+ pkt_dev->queue_map_max);
+
if (pkt_dev->flags & F_IPV6) {
char b1[128], b2[128], b3[128];
fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -635,19 +648,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " src_mac: ");
- if (is_zero_ether_addr(pkt_dev->src_mac))
- for (i = 0; i < 6; i++)
- seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i],
- i == 5 ? " " : ":");
- else
- for (i = 0; i < 6; i++)
- seq_printf(seq, "%02X%s", pkt_dev->src_mac[i],
- i == 5 ? " " : ":");
+ seq_printf(seq, "%s ",
+ print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ?
+ pkt_dev->odev->dev_addr : pkt_dev->src_mac));
seq_printf(seq, "dst_mac: ");
- for (i = 0; i < 6; i++)
- seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i],
- i == 5 ? "\n" : ":");
+ seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac));
seq_printf(seq,
" udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
@@ -707,6 +713,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_MPLS_RND)
seq_printf(seq, "MPLS_RND ");
+ if (pkt_dev->flags & F_QUEUE_MAP_RND)
+ seq_printf(seq, "QUEUE_MAP_RND ");
+
if (pkt_dev->cflows) {
if (pkt_dev->flags & F_FLOW_SEQ)
seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/
@@ -762,6 +771,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n",
pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
+ seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map);
+
seq_printf(seq, " flows: %u\n", pkt_dev->nflows);
if (pkt_dev->result[0])
@@ -908,14 +919,14 @@ static ssize_t pktgen_if_write(struct file *file,
pg_result = &(pkt_dev->result[0]);
if (count < 1) {
- printk("pktgen: wrong command format\n");
+ printk(KERN_WARNING "pktgen: wrong command format\n");
return -EINVAL;
}
max = count - i;
tmp = count_trail_chars(&user_buffer[i], max);
if (tmp < 0) {
- printk("pktgen: illegal format\n");
+ printk(KERN_WARNING "pktgen: illegal format\n");
return tmp;
}
i += tmp;
@@ -943,7 +954,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(tb, user_buffer, count))
return -EFAULT;
tb[count] = 0;
- printk("pktgen: %s,%lu buffer -:%s:-\n", name,
+ printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
(unsigned long)count, tb);
}
@@ -1213,6 +1224,11 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "FLOW_SEQ") == 0)
pkt_dev->flags |= F_FLOW_SEQ;
+ else if (strcmp(f, "QUEUE_MAP_RND") == 0)
+ pkt_dev->flags |= F_QUEUE_MAP_RND;
+
+ else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
+ pkt_dev->flags &= ~F_QUEUE_MAP_RND;
#ifdef CONFIG_XFRM
else if (strcmp(f, "IPSEC") == 0)
pkt_dev->flags |= F_IPSEC_ON;
@@ -1248,7 +1264,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->cur_daddr = pkt_dev->daddr_min;
}
if (debug)
- printk("pktgen: dst_min set to: %s\n",
+ printk(KERN_DEBUG "pktgen: dst_min set to: %s\n",
pkt_dev->dst_min);
i += len;
sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min);
@@ -1271,7 +1287,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->cur_daddr = pkt_dev->daddr_max;
}
if (debug)
- printk("pktgen: dst_max set to: %s\n",
+ printk(KERN_DEBUG "pktgen: dst_max set to: %s\n",
pkt_dev->dst_max);
i += len;
sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max);
@@ -1294,7 +1310,7 @@ static ssize_t pktgen_if_write(struct file *file,
ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
if (debug)
- printk("pktgen: dst6 set to: %s\n", buf);
+ printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
i += len;
sprintf(pg_result, "OK: dst6=%s", buf);
@@ -1317,7 +1333,7 @@ static ssize_t pktgen_if_write(struct file *file,
ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
&pkt_dev->min_in6_daddr);
if (debug)
- printk("pktgen: dst6_min set to: %s\n", buf);
+ printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
i += len;
sprintf(pg_result, "OK: dst6_min=%s", buf);
@@ -1338,7 +1354,7 @@ static ssize_t pktgen_if_write(struct file *file,
fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
if (debug)
- printk("pktgen: dst6_max set to: %s\n", buf);
+ printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf);
i += len;
sprintf(pg_result, "OK: dst6_max=%s", buf);
@@ -1361,7 +1377,7 @@ static ssize_t pktgen_if_write(struct file *file,
ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
if (debug)
- printk("pktgen: src6 set to: %s\n", buf);
+ printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
i += len;
sprintf(pg_result, "OK: src6=%s", buf);
@@ -1382,7 +1398,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->cur_saddr = pkt_dev->saddr_min;
}
if (debug)
- printk("pktgen: src_min set to: %s\n",
+ printk(KERN_DEBUG "pktgen: src_min set to: %s\n",
pkt_dev->src_min);
i += len;
sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min);
@@ -1403,7 +1419,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->cur_saddr = pkt_dev->saddr_max;
}
if (debug)
- printk("pktgen: src_max set to: %s\n",
+ printk(KERN_DEBUG "pktgen: src_max set to: %s\n",
pkt_dev->src_max);
i += len;
sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max);
@@ -1452,8 +1468,11 @@ static ssize_t pktgen_if_write(struct file *file,
}
if (!strcmp(name, "src_mac")) {
char *v = valstr;
+ unsigned char old_smac[ETH_ALEN];
unsigned char *m = pkt_dev->src_mac;
+ memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
+
len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
if (len < 0) {
return len;
@@ -1482,6 +1501,10 @@ static ssize_t pktgen_if_write(struct file *file,
}
}
+ /* Set up Src MAC */
+ if (compare_ether_addr(old_smac, pkt_dev->src_mac))
+ memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN);
+
sprintf(pg_result, "OK: srcmac");
return count;
}
@@ -1517,23 +1540,47 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "queue_map_min")) {
+ len = num_arg(&user_buffer[i], 5, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ pkt_dev->queue_map_min = value;
+ sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min);
+ return count;
+ }
+
+ if (!strcmp(name, "queue_map_max")) {
+ len = num_arg(&user_buffer[i], 5, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ pkt_dev->queue_map_max = value;
+ sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max);
+ return count;
+ }
+
if (!strcmp(name, "mpls")) {
- unsigned n, offset;
+ unsigned n, cnt;
+
len = get_labels(&user_buffer[i], pkt_dev);
- if (len < 0) { return len; }
+ if (len < 0)
+ return len;
i += len;
- offset = sprintf(pg_result, "OK: mpls=");
+ cnt = sprintf(pg_result, "OK: mpls=");
for (n = 0; n < pkt_dev->nr_labels; n++)
- offset += sprintf(pg_result + offset,
- "%08x%s", ntohl(pkt_dev->labels[n]),
- n == pkt_dev->nr_labels-1 ? "" : ",");
+ cnt += sprintf(pg_result + cnt,
+ "%08x%s", ntohl(pkt_dev->labels[n]),
+ n == pkt_dev->nr_labels-1 ? "" : ",");
if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) {
pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
pkt_dev->svlan_id = 0xffff;
if (debug)
- printk("pktgen: VLAN/SVLAN auto turned off\n");
+ printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n");
}
return count;
}
@@ -1548,10 +1595,10 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->vlan_id = value; /* turn on VLAN */
if (debug)
- printk("pktgen: VLAN turned on\n");
+ printk(KERN_DEBUG "pktgen: VLAN turned on\n");
if (debug && pkt_dev->nr_labels)
- printk("pktgen: MPLS auto turned off\n");
+ printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
pkt_dev->nr_labels = 0; /* turn off MPLS */
sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id);
@@ -1560,7 +1607,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->svlan_id = 0xffff;
if (debug)
- printk("pktgen: VLAN/SVLAN turned off\n");
+ printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
}
return count;
}
@@ -1605,10 +1652,10 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->svlan_id = value; /* turn on SVLAN */
if (debug)
- printk("pktgen: SVLAN turned on\n");
+ printk(KERN_DEBUG "pktgen: SVLAN turned on\n");
if (debug && pkt_dev->nr_labels)
- printk("pktgen: MPLS auto turned off\n");
+ printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
pkt_dev->nr_labels = 0; /* turn off MPLS */
sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id);
@@ -1617,7 +1664,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->svlan_id = 0xffff;
if (debug)
- printk("pktgen: VLAN/SVLAN turned off\n");
+ printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
}
return count;
}
@@ -1709,9 +1756,6 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
BUG_ON(!t);
- seq_printf(seq, "Name: %s max_before_softirq: %d\n",
- t->tsk->comm, t->max_before_softirq);
-
seq_printf(seq, "Running: ");
if_lock(t);
@@ -1744,7 +1788,6 @@ static ssize_t pktgen_thread_write(struct file *file,
int i = 0, max, len, ret;
char name[40];
char *pg_result;
- unsigned long value = 0;
if (count < 1) {
// sprintf(pg_result, "Wrong command format");
@@ -1777,10 +1820,11 @@ static ssize_t pktgen_thread_write(struct file *file,
i += len;
if (debug)
- printk("pktgen: t=%s, count=%lu\n", name, (unsigned long)count);
+ printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n",
+ name, (unsigned long)count);
if (!t) {
- printk("pktgen: ERROR: No thread\n");
+ printk(KERN_ERR "pktgen: ERROR: No thread\n");
ret = -EINVAL;
goto out;
}
@@ -1817,12 +1861,8 @@ static ssize_t pktgen_thread_write(struct file *file,
}
if (!strcmp(name, "max_before_softirq")) {
- len = num_arg(&user_buffer[i], 10, &value);
- mutex_lock(&pktgen_thread_lock);
- t->max_before_softirq = value;
- mutex_unlock(&pktgen_thread_lock);
+ sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use");
ret = count;
- sprintf(pg_result, "OK: max_before_softirq=%lu", value);
goto out;
}
@@ -1891,8 +1931,8 @@ static void pktgen_mark_device(const char *ifname)
mutex_lock(&pktgen_thread_lock);
if (++i >= max_tries) {
- printk("pktgen_mark_device: timed out after waiting "
- "%d msec for device %s to be removed\n",
+ printk(KERN_ERR "pktgen_mark_device: timed out after "
+ "waiting %d msec for device %s to be removed\n",
msec_per_try * i, ifname);
break;
}
@@ -1930,6 +1970,9 @@ static int pktgen_device_event(struct notifier_block *unused,
{
struct net_device *dev = ptr;
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
/* It is OK that we do not hold the group lock right now,
* as we run under the RTNL lock.
*/
@@ -1960,17 +2003,17 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
pkt_dev->odev = NULL;
}
- odev = dev_get_by_name(ifname);
+ odev = dev_get_by_name(&init_net, ifname);
if (!odev) {
- printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+ printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
return -ENODEV;
}
if (odev->type != ARPHRD_ETHER) {
- printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
+ printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname);
err = -EINVAL;
} else if (!netif_running(odev)) {
- printk("pktgen: device is down: \"%s\"\n", ifname);
+ printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname);
err = -ENETDOWN;
} else {
pkt_dev->odev = odev;
@@ -1987,7 +2030,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
{
if (!pkt_dev->odev) {
- printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+ printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
+ "setup_inject.\n");
sprintf(pkt_dev->result,
"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
return;
@@ -2049,7 +2093,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
}
rcu_read_unlock();
if (err)
- printk("pktgen: ERROR: IPv6 link address not availble.\n");
+ printk(KERN_ERR "pktgen: ERROR: IPv6 link "
+ "address not availble.\n");
}
#endif
} else {
@@ -2099,7 +2144,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
if (spin_until_us - now > jiffies_to_usecs(1) + 1)
schedule_timeout_interruptible(1);
else if (spin_until_us - now > 100) {
- do_softirq();
if (!pkt_dev->running)
return;
if (need_resched())
@@ -2156,8 +2200,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
/* If there was already an IPSEC SA, we keep it as is, else
* we go look for it ...
*/
-inline
-void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
+static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
{
struct xfrm_state *x = pkt_dev->flows[flow].x;
if (!x) {
@@ -2376,6 +2419,20 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->cur_pkt_size = t;
}
+ if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+ __u16 t;
+ if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+ t = random32() %
+ (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
+ + pkt_dev->queue_map_min;
+ } else {
+ t = pkt_dev->cur_queue_map + 1;
+ if (t > pkt_dev->queue_map_max)
+ t = pkt_dev->queue_map_min;
+ }
+ pkt_dev->cur_queue_map = t;
+ }
+
pkt_dev->flows[flow].count++;
}
@@ -2441,7 +2498,8 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
if (nhead >0) {
ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
if (ret < 0) {
- printk("Error expanding ipsec packet %d\n",ret);
+ printk(KERN_ERR "Error expanding "
+ "ipsec packet %d\n",ret);
return 0;
}
}
@@ -2450,7 +2508,8 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
skb_pull(skb, ETH_HLEN);
ret = pktgen_output_ipsec(skb, pkt_dev);
if (ret) {
- printk("Error creating ipsec packet %d\n",ret);
+ printk(KERN_ERR "Error creating ipsec "
+ "packet %d\n",ret);
kfree_skb(skb);
return 0;
}
@@ -2544,6 +2603,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+ skb->queue_mapping = pkt_dev->cur_queue_map;
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2673,6 +2733,7 @@ static unsigned int scan_ip6(const char *s, char ip[16])
unsigned int prefixlen = 0;
unsigned int suffixlen = 0;
__be32 tmp;
+ char *pos;
for (i = 0; i < 16; i++)
ip[i] = 0;
@@ -2687,12 +2748,9 @@ static unsigned int scan_ip6(const char *s, char ip[16])
}
s++;
}
- {
- char *tmp;
- u = simple_strtoul(s, &tmp, 16);
- i = tmp - s;
- }
+ u = simple_strtoul(s, &pos, 16);
+ i = pos - s;
if (!i)
return 0;
if (prefixlen == 12 && s[i] == '.') {
@@ -2720,11 +2778,9 @@ static unsigned int scan_ip6(const char *s, char ip[16])
len++;
} else if (suffixlen != 0)
break;
- {
- char *tmp;
- u = simple_strtol(s, &tmp, 16);
- i = tmp - s;
- }
+
+ u = simple_strtol(s, &pos, 16);
+ i = pos - s;
if (!i) {
if (*s)
len--;
@@ -2885,6 +2941,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+ skb->queue_mapping = pkt_dev->cur_queue_map;
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3184,8 +3241,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
if (!pkt_dev->running) {
- printk("pktgen: interface: %s is already stopped\n",
- pkt_dev->odev->name);
+ printk(KERN_WARNING "pktgen: interface: %s is already "
+ "stopped\n", pkt_dev->odev->name);
return -EINVAL;
}
@@ -3327,8 +3384,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
if ((netif_queue_stopped(odev) ||
- netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) ||
- need_resched()) {
+ (pkt_dev->skb &&
+ netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping))) ||
+ need_resched()) {
idle_start = getCurUs();
if (!netif_running(odev)) {
@@ -3360,7 +3418,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->skb = fill_packet(odev, pkt_dev);
if (pkt_dev->skb == NULL) {
- printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n");
+ printk(KERN_ERR "pktgen: ERROR: couldn't "
+ "allocate skb in fill_packet.\n");
schedule();
pkt_dev->clone_count--; /* back out increment, OOM */
goto out;
@@ -3450,19 +3509,13 @@ static int pktgen_thread_worker(void *arg)
struct pktgen_thread *t = arg;
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- u32 max_before_softirq;
- u32 tx_since_softirq = 0;
BUG_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
- t->pid = current->pid;
-
pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid);
- max_before_softirq = t->max_before_softirq;
-
set_current_state(TASK_INTERRUPTIBLE);
set_freezable();
@@ -3481,24 +3534,9 @@ static int pktgen_thread_worker(void *arg)
__set_current_state(TASK_RUNNING);
- if (pkt_dev) {
-
+ if (pkt_dev)
pktgen_xmit(pkt_dev);
- /*
- * We like to stay RUNNING but must also give
- * others fair share.
- */
-
- tx_since_softirq += pkt_dev->last_ok;
-
- if (tx_since_softirq > max_before_softirq) {
- if (local_softirq_pending())
- do_softirq();
- tx_since_softirq = 0;
- }
- }
-
if (t->control & T_STOP) {
pktgen_stop(t);
t->control &= ~(T_STOP);
@@ -3565,7 +3603,8 @@ static int add_dev_to_thread(struct pktgen_thread *t,
if_lock(t);
if (pkt_dev->pg_thread) {
- printk("pktgen: ERROR: already assigned to a thread.\n");
+ printk(KERN_ERR "pktgen: ERROR: already assigned "
+ "to a thread.\n");
rv = -EBUSY;
goto out;
}
@@ -3590,7 +3629,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev = __pktgen_NN_threads(ifname, FIND);
if (pkt_dev) {
- printk("pktgen: ERROR: interface already used.\n");
+ printk(KERN_ERR "pktgen: ERROR: interface already used.\n");
return -EBUSY;
}
@@ -3632,7 +3671,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
if (!pkt_dev->entry) {
- printk("pktgen: cannot create %s/%s procfs entry.\n",
+ printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
PG_PROC_DIR, ifname);
err = -EINVAL;
goto out2;
@@ -3665,7 +3704,8 @@ static int __init pktgen_create_thread(int cpu)
t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL);
if (!t) {
- printk("pktgen: ERROR: out of memory, can't create new thread.\n");
+ printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
+ "create new thread.\n");
return -ENOMEM;
}
@@ -3678,7 +3718,8 @@ static int __init pktgen_create_thread(int cpu)
p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
if (IS_ERR(p)) {
- printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu);
+ printk(KERN_ERR "pktgen: kernel_thread() failed "
+ "for cpu %d\n", t->cpu);
list_del(&t->th_list);
kfree(t);
return PTR_ERR(p);
@@ -3688,7 +3729,7 @@ static int __init pktgen_create_thread(int cpu)
pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir);
if (!pe) {
- printk("pktgen: cannot create %s/%s procfs entry.\n",
+ printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
PG_PROC_DIR, t->tsk->comm);
kthread_stop(p);
list_del(&t->th_list);
@@ -3727,7 +3768,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
if (pkt_dev->running) {
- printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
+ printk(KERN_WARNING "pktgen: WARNING: trying to remove a "
+ "running interface, stopping it now.\n");
pktgen_stop_device(pkt_dev);
}
@@ -3759,18 +3801,18 @@ static int __init pg_init(void)
int cpu;
struct proc_dir_entry *pe;
- printk(version);
+ printk(KERN_INFO "%s", version);
- pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
+ pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
if (!pg_proc_dir)
return -ENODEV;
pg_proc_dir->owner = THIS_MODULE;
pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir);
if (pe == NULL) {
- printk("pktgen: ERROR: cannot create %s procfs entry.\n",
- PGCTRL);
- proc_net_remove(PG_PROC_DIR);
+ printk(KERN_ERR "pktgen: ERROR: cannot create %s "
+ "procfs entry.\n", PGCTRL);
+ proc_net_remove(&init_net, PG_PROC_DIR);
return -EINVAL;
}
@@ -3785,15 +3827,16 @@ static int __init pg_init(void)
err = pktgen_create_thread(cpu);
if (err)
- printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n",
- cpu, err);
+ printk(KERN_WARNING "pktgen: WARNING: Cannot create "
+ "thread for cpu %d (%d)\n", cpu, err);
}
if (list_empty(&pktgen_threads)) {
- printk("pktgen: ERROR: Initialization failed for all threads\n");
+ printk(KERN_ERR "pktgen: ERROR: Initialization failed for "
+ "all threads\n");
unregister_netdevice_notifier(&pktgen_notifier_block);
remove_proc_entry(PGCTRL, pg_proc_dir);
- proc_net_remove(PG_PROC_DIR);
+ proc_net_remove(&init_net, PG_PROC_DIR);
return -ENODEV;
}
@@ -3820,7 +3863,7 @@ static void __exit pg_cleanup(void)
/* Clean up proc file system */
remove_proc_entry(PGCTRL, pg_proc_dir);
- proc_net_remove(PG_PROC_DIR);
+ proc_net_remove(&init_net, PG_PROC_DIR);
}
module_init(pg_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 864cbdf31ed..1072d16696c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/if_addr.h>
+#include <linux/nsproxy.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -74,8 +75,6 @@ void __rtnl_unlock(void)
void rtnl_unlock(void)
{
mutex_unlock(&rtnl_mutex);
- if (rtnl && rtnl->sk_receive_queue.qlen)
- rtnl->sk_data_ready(rtnl, 0);
netdev_run_todo();
}
@@ -98,7 +97,7 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
}
int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
- struct rtattr *rta, int len)
+ struct rtattr *rta, int len)
{
if (RTA_PAYLOAD(rta) < len)
return -1;
@@ -306,10 +305,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register);
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
struct net_device *dev, *n;
+ struct net *net;
- for_each_netdev_safe(dev, n) {
- if (dev->rtnl_link_ops == ops)
- ops->dellink(dev);
+ for_each_net(net) {
+ for_each_netdev_safe(net, dev, n) {
+ if (dev->rtnl_link_ops == ops)
+ ops->dellink(dev);
+ }
}
list_del(&ops->list);
}
@@ -634,7 +636,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
- NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
NLA_PUT_U8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
@@ -694,12 +695,13 @@ nla_put_failure:
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = skb->sk->sk_net;
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
idx = 0;
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (idx < s_idx)
goto cont;
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -714,7 +716,7 @@ cont:
return skb->len;
}
-static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
@@ -724,6 +726,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_WEIGHT] = { .type = NLA_U32 },
[IFLA_OPERSTATE] = { .type = NLA_U8 },
[IFLA_LINKMODE] = { .type = NLA_U8 },
+ [IFLA_NET_NS_PID] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -731,12 +734,45 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_DATA] = { .type = NLA_NESTED },
};
+static struct net *get_net_ns_by_pid(pid_t pid)
+{
+ struct task_struct *tsk;
+ struct net *net;
+
+ /* Lookup the network namespace */
+ net = ERR_PTR(-ESRCH);
+ rcu_read_lock();
+ tsk = find_task_by_pid(pid);
+ if (tsk) {
+ task_lock(tsk);
+ if (tsk->nsproxy)
+ net = get_net(tsk->nsproxy->net_ns);
+ task_unlock(tsk);
+ }
+ rcu_read_unlock();
+ return net;
+}
+
static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
int send_addr_notify = 0;
int err;
+ if (tb[IFLA_NET_NS_PID]) {
+ struct net *net;
+ net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ if (IS_ERR(net)) {
+ err = PTR_ERR(net);
+ goto errout;
+ }
+ err = dev_change_net_namespace(dev, net, ifname);
+ put_net(net);
+ if (err)
+ goto errout;
+ modified = 1;
+ }
+
if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
@@ -834,9 +870,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- if (tb[IFLA_WEIGHT])
- dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
-
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
@@ -862,6 +895,7 @@ errout:
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ifinfomsg *ifm;
struct net_device *dev;
int err;
@@ -880,9 +914,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = dev_get_by_index(ifm->ifi_index);
+ dev = dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = dev_get_by_name(ifname);
+ dev = dev_get_by_name(net, ifname);
else
goto errout;
@@ -908,6 +942,7 @@ errout:
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
@@ -924,9 +959,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(ifm->ifi_index);
+ dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(ifname);
+ dev = __dev_get_by_name(net, ifname);
else
return -EINVAL;
@@ -941,8 +976,52 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return 0;
}
+struct net_device *rtnl_create_link(struct net *net, char *ifname,
+ const struct rtnl_link_ops *ops, struct nlattr *tb[])
+{
+ int err;
+ struct net_device *dev;
+
+ err = -ENOMEM;
+ dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
+ if (!dev)
+ goto err;
+
+ if (strchr(dev->name, '%')) {
+ err = dev_alloc_name(dev, dev->name);
+ if (err < 0)
+ goto err_free;
+ }
+
+ dev->nd_net = net;
+ dev->rtnl_link_ops = ops;
+
+ if (tb[IFLA_MTU])
+ dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+ if (tb[IFLA_ADDRESS])
+ memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
+ nla_len(tb[IFLA_ADDRESS]));
+ if (tb[IFLA_BROADCAST])
+ memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
+ nla_len(tb[IFLA_BROADCAST]));
+ if (tb[IFLA_TXQLEN])
+ dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+ if (tb[IFLA_OPERSTATE])
+ set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+ if (tb[IFLA_LINKMODE])
+ dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+
+ return dev;
+
+err_free:
+ free_netdev(dev);
+err:
+ return ERR_PTR(err);
+}
+
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
@@ -952,7 +1031,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct nlattr *linkinfo[IFLA_INFO_MAX+1];
int err;
+#ifdef CONFIG_KMOD
replay:
+#endif
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
return err;
@@ -964,9 +1045,9 @@ replay:
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(ifm->ifi_index);
+ dev = __dev_get_by_index(net, ifm->ifi_index);
else if (ifname[0])
- dev = __dev_get_by_name(ifname);
+ dev = __dev_get_by_name(net, ifname);
else
dev = NULL;
@@ -1051,40 +1132,17 @@ replay:
if (!ifname[0])
snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
- if (!dev)
- return -ENOMEM;
-
- if (strchr(dev->name, '%')) {
- err = dev_alloc_name(dev, dev->name);
- if (err < 0)
- goto err_free;
- }
- dev->rtnl_link_ops = ops;
-
- if (tb[IFLA_MTU])
- dev->mtu = nla_get_u32(tb[IFLA_MTU]);
- if (tb[IFLA_ADDRESS])
- memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
- nla_len(tb[IFLA_ADDRESS]));
- if (tb[IFLA_BROADCAST])
- memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
- nla_len(tb[IFLA_BROADCAST]));
- if (tb[IFLA_TXQLEN])
- dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- if (tb[IFLA_WEIGHT])
- dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
- if (tb[IFLA_OPERSTATE])
- set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
- if (tb[IFLA_LINKMODE])
- dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
-
- if (ops->newlink)
+
+ dev = rtnl_create_link(net, ifname, ops, tb);
+
+ if (IS_ERR(dev))
+ err = PTR_ERR(dev);
+ else if (ops->newlink)
err = ops->newlink(dev, tb, data);
else
err = register_netdevice(dev);
-err_free:
- if (err < 0)
+
+ if (err < 0 && !IS_ERR(dev))
free_netdev(dev);
return err;
}
@@ -1092,6 +1150,7 @@ err_free:
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ifinfomsg *ifm;
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
@@ -1104,7 +1163,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0) {
- dev = dev_get_by_index(ifm->ifi_index);
+ dev = dev_get_by_index(net, ifm->ifi_index);
if (dev == NULL)
return -ENODEV;
} else
@@ -1253,22 +1312,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return doit(skb, nlh, (void *)&rta_buf[0]);
}
-static void rtnetlink_rcv(struct sock *sk, int len)
+static void rtnetlink_rcv(struct sk_buff *skb)
{
- unsigned int qlen = 0;
-
- do {
- mutex_lock(&rtnl_mutex);
- netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
- mutex_unlock(&rtnl_mutex);
-
- netdev_run_todo();
- } while (qlen);
+ rtnl_lock();
+ netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
+ rtnl_unlock();
}
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
+
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
@@ -1306,8 +1363,8 @@ void __init rtnetlink_init(void)
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
- rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
- &rtnl_mutex, THIS_MODULE);
+ rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX,
+ rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
@@ -1333,3 +1390,5 @@ EXPORT_SYMBOL(rtnl_unlock);
EXPORT_SYMBOL(rtnl_unicast);
EXPORT_SYMBOL(rtnl_notify);
EXPORT_SYMBOL(rtnl_set_sk_err);
+EXPORT_SYMBOL(rtnl_create_link);
+EXPORT_SYMBOL(ifla_policy);
diff --git a/net/core/scm.c b/net/core/scm.c
index 44c4ec2c876..530bee8d9ed 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -167,7 +167,8 @@ error:
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
- struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
+ struct cmsghdr __user *cm
+ = (__force struct cmsghdr __user *)msg->msg_control;
struct cmsghdr cmhdr;
int cmlen = CMSG_LEN(len);
int err;
@@ -202,7 +203,8 @@ out:
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
- struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
+ struct cmsghdr __user *cm
+ = (__force struct cmsghdr __user*)msg->msg_control;
int fdmax = 0;
int fdnum = scm->fp->count;
@@ -222,7 +224,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
if (fdnum < fdmax)
fdmax = fdnum;
- for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+ for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
+ i++, cmfptr++)
{
int new_fd;
err = security_file_receive(fp[i]);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0583e8498f1..944189d9632 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -472,6 +472,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_INET
new->sp = secpath_get(old->sp);
#endif
+ new->csum_start = old->csum_start;
+ new->csum_offset = old->csum_offset;
+ new->ip_summed = old->ip_summed;
new->transport_header = old->transport_header;
new->network_header = old->network_header;
new->mac_header = old->mac_header;
@@ -545,8 +548,6 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
skb_reserve(n, headerlen);
/* Set the tail pointer and length */
skb_put(n, skb->len);
- n->csum = skb->csum;
- n->ip_summed = skb->ip_summed;
if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
BUG();
@@ -589,8 +590,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
skb_copy_from_linear_data(skb, n->data, n->len);
- n->csum = skb->csum;
- n->ip_summed = skb->ip_summed;
n->truesize += skb->data_len;
n->data_len = skb->data_len;
@@ -686,6 +685,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->transport_header += off;
skb->network_header += off;
skb->mac_header += off;
+ skb->csum_start += off;
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
@@ -734,9 +734,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
*
* You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt.
- *
- * BUG ALERT: ip_summed is not copied. Why does this work? Is it used
- * only by netfilter in the cases when checksum is recalculated? --ANK
*/
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom, int newtailroom,
@@ -749,7 +746,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
gfp_mask);
int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
- int off = 0;
+ int off;
if (!n)
return NULL;
@@ -773,12 +770,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
copy_skb_header(n, skb);
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
off = newheadroom - oldheadroom;
-#endif
+ n->csum_start += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
n->transport_header += off;
n->network_header += off;
n->mac_header += off;
+#endif
return n;
}
@@ -2021,13 +2019,13 @@ void __init skb_init(void)
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL, NULL);
+ NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
(2*sizeof(struct sk_buff)) +
sizeof(atomic_t),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL, NULL);
+ NULL);
}
/**
diff --git a/net/core/sock.c b/net/core/sock.c
index 091032a250c..4ed9b507c1e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -119,6 +119,7 @@
#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
+#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
#include <net/xfrm.h>
@@ -171,6 +172,20 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_MAX"
};
+static const char *af_family_clock_key_strings[AF_MAX+1] = {
+ "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
+ "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
+ "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
+ "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
+ "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
+ "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
+ "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
+ "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
+ "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
+ "clock-27" , "clock-28" , "clock-29" ,
+ "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
+ "clock-AF_RXRPC" , "clock-AF_MAX"
+};
#endif
/*
@@ -217,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
warned++;
printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
"tries to set negative timeout\n",
- current->comm, current->pid);
+ current->comm, current->pid);
return 0;
}
*timeo_p = MAX_SCHEDULE_TIMEOUT;
@@ -348,6 +363,62 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
+static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
+{
+ int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ struct net *net = sk->sk_net;
+ char devname[IFNAMSIZ];
+ int index;
+
+ /* Sorry... */
+ ret = -EPERM;
+ if (!capable(CAP_NET_RAW))
+ goto out;
+
+ ret = -EINVAL;
+ if (optlen < 0)
+ goto out;
+
+ /* Bind this socket to a particular device like "eth0",
+ * as specified in the passed interface name. If the
+ * name is "" or the option length is zero the socket
+ * is not bound.
+ */
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+ memset(devname, 0, sizeof(devname));
+
+ ret = -EFAULT;
+ if (copy_from_user(devname, optval, optlen))
+ goto out;
+
+ if (devname[0] == '\0') {
+ index = 0;
+ } else {
+ struct net_device *dev = dev_get_by_name(net, devname);
+
+ ret = -ENODEV;
+ if (!dev)
+ goto out;
+
+ index = dev->ifindex;
+ dev_put(dev);
+ }
+
+ lock_sock(sk);
+ sk->sk_bound_dev_if = index;
+ sk_dst_reset(sk);
+ release_sock(sk);
+
+ ret = 0;
+
+out:
+#endif
+
+ return ret;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -376,6 +447,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
}
#endif
+ if (optname == SO_BINDTODEVICE)
+ return sock_bindtodevice(sk, optval, optlen);
+
if (optlen < sizeof(int))
return -EINVAL;
@@ -564,54 +638,6 @@ set_rcvbuf:
ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
break;
-#ifdef CONFIG_NETDEVICES
- case SO_BINDTODEVICE:
- {
- char devname[IFNAMSIZ];
-
- /* Sorry... */
- if (!capable(CAP_NET_RAW)) {
- ret = -EPERM;
- break;
- }
-
- /* Bind this socket to a particular device like "eth0",
- * as specified in the passed interface name. If the
- * name is "" or the option length is zero the socket
- * is not bound.
- */
-
- if (!valbool) {
- sk->sk_bound_dev_if = 0;
- } else {
- if (optlen > IFNAMSIZ - 1)
- optlen = IFNAMSIZ - 1;
- memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
- ret = -EFAULT;
- break;
- }
-
- /* Remove any cached route for this socket. */
- sk_dst_reset(sk);
-
- if (devname[0] == '\0') {
- sk->sk_bound_dev_if = 0;
- } else {
- struct net_device *dev = dev_get_by_name(devname);
- if (!dev) {
- ret = -ENODEV;
- break;
- }
- sk->sk_bound_dev_if = dev->ifindex;
- dev_put(dev);
- }
- }
- break;
- }
-#endif
-
-
case SO_ATTACH_FILTER:
ret = -EINVAL;
if (optlen == sizeof(struct sock_fprog)) {
@@ -848,7 +874,7 @@ static inline void sock_lock_init(struct sock *sk)
* @prot: struct proto associated with this new sock instance
* @zero_it: if we should zero the newly allocated sock
*/
-struct sock *sk_alloc(int family, gfp_t priority,
+struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int zero_it)
{
struct sock *sk = NULL;
@@ -869,6 +895,7 @@ struct sock *sk_alloc(int family, gfp_t priority,
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
+ sk->sk_net = get_net(net);
}
if (security_sk_alloc(sk, family, priority))
@@ -908,6 +935,7 @@ void sk_free(struct sock *sk)
__FUNCTION__, atomic_read(&sk->sk_omem_alloc));
security_sk_free(sk);
+ put_net(sk->sk_net);
if (sk->sk_prot_creator->slab != NULL)
kmem_cache_free(sk->sk_prot_creator->slab, sk);
else
@@ -917,7 +945,7 @@ void sk_free(struct sock *sk)
struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
{
- struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+ struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
if (newsk != NULL) {
struct sk_filter *filter;
@@ -941,8 +969,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
rwlock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
- lockdep_set_class(&newsk->sk_callback_lock,
- af_callback_keys + newsk->sk_family);
+ lockdep_set_class_and_name(&newsk->sk_callback_lock,
+ af_callback_keys + newsk->sk_family,
+ af_family_clock_key_strings[newsk->sk_family]);
newsk->sk_dst_cache = NULL;
newsk->sk_wmem_queued = 0;
@@ -1530,8 +1559,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
rwlock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
- lockdep_set_class(&sk->sk_callback_lock,
- af_callback_keys + sk->sk_family);
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family,
+ af_family_clock_key_strings[sk->sk_family]);
sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
@@ -1559,9 +1589,9 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (sk->sk_lock.owner)
+ if (sk->sk_lock.owned)
__lock_sock(sk);
- sk->sk_lock.owner = (void *)1;
+ sk->sk_lock.owned = 1;
spin_unlock(&sk->sk_lock.slock);
/*
* The sk_lock has mutex_lock() semantics here:
@@ -1582,7 +1612,7 @@ void fastcall release_sock(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_backlog.tail)
__release_sock(sk);
- sk->sk_lock.owner = NULL;
+ sk->sk_lock.owned = 0;
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);
@@ -1752,7 +1782,7 @@ int proto_register(struct proto *prot, int alloc_slab)
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, NULL);
if (prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
@@ -1770,7 +1800,7 @@ int proto_register(struct proto *prot, int alloc_slab)
sprintf(request_sock_slab_name, mask, prot->name);
prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
prot->rsk_prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, NULL);
if (prot->rsk_prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
@@ -1792,7 +1822,7 @@ int proto_register(struct proto *prot, int alloc_slab)
kmem_cache_create(timewait_sock_slab_name,
prot->twsk_prot->twsk_obj_size,
0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (prot->twsk_prot->twsk_slab == NULL)
goto out_free_timewait_sock_slab_name;
}
@@ -1947,7 +1977,7 @@ static const struct file_operations proto_seq_fops = {
static int __init proto_init(void)
{
/* register /proc/net/protocols */
- return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
}
subsys_initcall(proto_init);
diff --git a/net/core/utils.c b/net/core/utils.c
index 2030bb8c2d3..0bf17da40d5 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -25,6 +25,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <net/sock.h>
#include <asm/byteorder.h>
#include <asm/system.h>