From 3891845e1ef6e6807075d4241966b26f6ecb0a5c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 27 Oct 2008 17:51:47 -0700 Subject: netns: Coexist with the sysfs limitations v2 To make testing of the network namespace simpler allow the network namespace code and the sysfs code to be compiled and run at the same time. To do this only virtual devices are allowed in the additional network namespaces and those virtual devices are not placed in the kobject tree. Since virtual devices don't actually do anything interesting hardware wise that needs device management there should be no loss in keeping them out of the kobject tree and by implication sysfs. The gain in ease of testing and code coverage should be significant. Changelog: v2: As pointed out by Benjamin Thery it only makes sense to call device_rename in the initial network namespace for now. Signed-off-by: Eric W. Biederman Acked-by: Benjamin Thery Tested-by: Serge Hallyn Acked-by: Serge Hallyn Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/core/dev.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc..3a2b8be9e67 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -924,10 +924,15 @@ int dev_change_name(struct net_device *dev, const char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - ret = device_rename(&dev->dev, dev->name); - if (ret) { - memcpy(dev->name, oldname, IFNAMSIZ); - return ret; + /* For now only devices in the initial network namespace + * are in sysfs. + */ + if (net == &init_net) { + ret = device_rename(&dev->dev, dev->name); + if (ret) { + memcpy(dev->name, oldname, IFNAMSIZ); + return ret; + } } write_lock_bh(&dev_base_lock); @@ -4460,6 +4465,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char if (dev->features & NETIF_F_NETNS_LOCAL) goto out; +#ifdef CONFIG_SYSFS + /* Don't allow real devices to be moved when sysfs + * is enabled. + */ + err = -EINVAL; + if (dev->dev.parent) + goto out; +#endif + /* Ensure the device has been registrered */ err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) @@ -4517,6 +4531,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ dev_addr_discard(dev); + netdev_unregister_kobject(dev); + /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -4533,7 +4549,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char } /* Fixup kobjects */ - netdev_unregister_kobject(dev); err = netdev_register_kobject(dev); WARN_ON(err); -- cgit v1.2.3-70-g09d2 From 24f8b2385e03a4f4c8dac513d03b5eaa475822b9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 3 Nov 2008 17:14:38 -0800 Subject: net: increase receive packet quantum This patch gets about 1.25% back on tbench regression. My change to NAPI for multiqueue support changed the time limit on network receive processing. Under sustained loads like tbench, this can cause the receiver to reschedule prematurely. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3a2b8be9e67..8f9d3b38a44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2373,7 +2373,7 @@ EXPORT_SYMBOL(__napi_schedule); static void net_rx_action(struct softirq_action *h) { struct list_head *list = &__get_cpu_var(softnet_data).poll_list; - unsigned long start_time = jiffies; + unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; @@ -2384,13 +2384,10 @@ static void net_rx_action(struct softirq_action *h) int work, weight; /* If softirq window is exhuasted then punt. - * - * Note that this is a slight policy change from the - * previous NAPI code, which would allow up to 2 - * jiffies to pass before breaking out. The test - * used to be "jiffies - start_time > 1". + * Allow this to run for 2 jiffies since which will allow + * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || jiffies != start_time)) + if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) goto softnet_break; local_irq_enable(); -- cgit v1.2.3-70-g09d2 From d0c082cea6dfb9b674b4f6e1e84025662dbd24e8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 15:59:38 -0800 Subject: netns: Delete virtual interfaces during namespace cleanup When physical devices are inside of network namespace and that network namespace terminates we can not make them go away. We have to keep them and moving them to the initial network namespace is the best we can do. For virtual devices left in a network namespace that is exiting we have no need to preserve them and we now have the infrastructure that allows us to delete them. So delete virtual devices when we exit a network namespace. Keeping the necessary user space clean up after a network namespace exits much more tractable. Acked-by: Daniel Lezcano Acked-by: Pavel Emelyanov Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8f9d3b38a44..9475f3e624a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4852,6 +4852,12 @@ static void __net_exit default_device_exit(struct net *net) if (dev->features & NETIF_F_NETNS_LOCAL) continue; + /* Delete virtual devices */ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { + dev->rtnl_link_ops->dellink(dev); + continue; + } + /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); -- cgit v1.2.3-70-g09d2 From ae33bc40c0d96d02f51a996482ea7e41c5152695 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:02 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/loopback.c | 13 ++----------- include/linux/netdevice.h | 1 + net/core/dev.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 91d08585a6d..c4516b580ba 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations __net_initdata loopback_net_ops = { +/* Registered in net/core/dev.c */ +struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; - -static int __init loopback_init(void) -{ - return register_pernet_device(&loopback_net_ops); -} - -/* Loopback is special. It should be initialized before any other network - * device and network subsystem. - */ -fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe5846..12d7f4469dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 9475f3e624a..811507c3980 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4904,6 +4904,18 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + if (register_pernet_device(&default_device_ops)) goto out; -- cgit v1.2.3-70-g09d2 From 0a36b345ab99d6b3c96999e7e3b79bd243cf9bf7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:24 -0800 Subject: net: Don't leak packets when a netns is going down I have been tracking for a while a case where when the network namespace exits the cleanup gets stck in an endless precessess of: unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 unregister_netdevice: waiting for lo to become free. Usage count = 3 It turns out that if you listen on a multicast address an unsubscribe packet is sent when the network device goes down. If you shutdown the network namespace without carefully cleaning up this can trigger the unsubscribe packet to be sent over the loopback interface while the network namespace is going down. All of which is fine except when we drop the packet and forget to free it leaking the skb and the dst entry attached to. As it turns out the dst entry hold a reference to the idev which holds the dev and keeps everything from being cleaned up. Yuck! By fixing my earlier thinko and add the needed kfree_skb and everything cleans up beautifully. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 811507c3980..a0c60607f1a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2253,8 +2253,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) + if (!net_alive(dev_net(skb->dev))) { + kfree_skb(skb); goto out; + } #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { -- cgit v1.2.3-70-g09d2 From 3d8160b1493bcadca74fbb635d79b3928b8999cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 7 Nov 2008 22:52:14 -0800 Subject: Revert "net: Guaranetee the proper ordering of the loopback device." This reverts commit ae33bc40c0d96d02f51a996482ea7e41c5152695. --- drivers/net/loopback.c | 13 +++++++++++-- include/linux/netdevice.h | 1 - net/core/dev.c | 12 ------------ 3 files changed, 11 insertions(+), 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c4516b580ba..91d08585a6d 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,8 +204,17 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -/* Registered in net/core/dev.c */ -struct pernet_operations __net_initdata loopback_net_ops = { +static struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; + +static int __init loopback_init(void) +{ + return register_pernet_device(&loopback_net_ops); +} + +/* Loopback is special. It should be initialized before any other network + * device and network subsystem. + */ +fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc..f1b0dbe5846 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,7 +1766,6 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } -extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index e0dc67a789b..2306d56fbb5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4909,18 +4909,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - /* The loopback device is special if any other network devices - * is present in a network namespace the loopback device must - * be present. Since we now dynamically allocate and free the - * loopback device ensure this invariant is maintained by - * keeping the loopback device as the first device on the - * list of network devices. Ensuring the loopback devices - * is the first device that appears and the last network device - * that disappears. - */ - if (register_pernet_device(&loopback_net_ops)) - goto out; - if (register_pernet_device(&default_device_ops)) goto out; -- cgit v1.2.3-70-g09d2 From 505d4f73dda9e20d59da05008f1f5eb432613e71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Nov 2008 22:54:20 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. v2 I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. But do it carefully so we register the loopback device after we clear dev_boot_phase. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/loopback.c | 13 ++----------- include/linux/netdevice.h | 1 + net/core/dev.c | 22 +++++++++++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 91d08585a6d..c4516b580ba 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations __net_initdata loopback_net_ops = { +/* Registered in net/core/dev.c */ +struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; - -static int __init loopback_init(void) -{ - return register_pernet_device(&loopback_net_ops); -} - -/* Loopback is special. It should be initialized before any other network - * device and network subsystem. - */ -fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe5846..12d7f4469dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 2306d56fbb5..31568b2068a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4909,9 +4909,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - if (register_pernet_device(&default_device_ops)) - goto out; - /* * Initialise the packet receive queues. */ @@ -4928,10 +4925,25 @@ static int __init net_dev_init(void) queue->backlog.weight = weight_p; } - netdev_dma_register(); - dev_boot_phase = 0; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + + if (register_pernet_device(&default_device_ops)) + goto out; + + netdev_dma_register(); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); -- cgit v1.2.3-70-g09d2 From 8192b0c482d7078fcdcb4854341b977426f6f09b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:10 +1100 Subject: CRED: Wrap task credential accesses in the networking subsystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: netdev@vger.kernel.org Signed-off-by: James Morris --- include/net/scm.h | 4 ++-- net/core/dev.c | 8 ++++++-- net/core/scm.c | 8 ++++---- net/socket.c | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103c..f160116db54 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -54,8 +54,8 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { struct task_struct *p = current; - scm->creds.uid = p->uid; - scm->creds.gid = p->gid; + scm->creds.uid = current_uid(); + scm->creds.gid = current_gid(); scm->creds.pid = task_tgid_vnr(p); scm->fp = NULL; scm->seq = 0; diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc..262df226b3c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2958,6 +2958,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + uid_t uid; + gid_t gid; ASSERT_RTNL(); @@ -2982,15 +2984,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); - if (audit_enabled) + if (audit_enabled) { + current_uid_gid(&uid, &gid); audit_log(current->audit_context, GFP_ATOMIC, AUDIT_ANOM_PROMISCUOUS, "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current), - current->uid, current->gid, + uid, gid, audit_get_sessionid(current)); + } dev_change_rx_flags(dev, IFF_PROMISC); } diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a4..4681d8f9b45 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -45,10 +45,10 @@ static __inline__ int scm_check_creds(struct ucred *creds) { if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current->uid || creds->uid == current->euid || - creds->uid == current->suid) || capable(CAP_SETUID)) && - ((creds->gid == current->gid || creds->gid == current->egid || - creds->gid == current->sgid) || capable(CAP_SETGID))) { + ((creds->uid == current_uid() || creds->uid == current_euid() || + creds->uid == current_suid()) || capable(CAP_SETUID)) && + ((creds->gid == current_gid() || creds->gid == current_egid() || + creds->gid == current_sgid()) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/socket.c b/net/socket.c index 57550c3bcab..62c7729527f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -491,8 +491,8 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); inode->i_mode = S_IFSOCK | S_IRWXUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); -- cgit v1.2.3-70-g09d2 From 908cd2dabbfbbefb02f6b908a1188a62e685136a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 16 Nov 2008 19:50:35 -0800 Subject: net: use %pF for /proc/net/ptype Technically, patch changes format for modules, but I think nobody cares. -86dd :ipv6:ipv6_rcv+0x0 +86dd ipv6_rcv+0x0/0x400 [ipv6] Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 31568b2068a..e08c0fcd603 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -108,7 +108,6 @@ #include #include #include -#include #include #include #include @@ -2801,31 +2800,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -static void ptype_seq_decode(struct seq_file *seq, void *sym) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; - const char *symname; - char *modname; - char namebuf[128]; - - symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, - &modname, namebuf); - - if (symname) { - char *delim = ":"; - - if (!modname) - modname = delim = ""; - seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, - symname, offset); - return; - } -#endif - - seq_printf(seq, "[%p]", sym); -} - static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; @@ -2838,10 +2812,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s ", - pt->dev ? pt->dev->name : ""); - ptype_seq_decode(seq, pt->func); - seq_putc(seq, '\n'); + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); } return 0; -- cgit v1.2.3-70-g09d2 From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:32:24 -0800 Subject: netdev: network device operations infrastructure This patch changes the network device internal API to move adminstrative operations out of the network device structure and into a separate structure. This patch involves some hackery to maintain compatablity between the new and old model, so all 300+ drivers don't have to be changed at once. For drivers that aren't converted yet, the netdevice_ops virt function list still resides in the net_device structure. For old protocols, the new net_device_ops are copied out to the old net_device pointers. After the transistion is completed the nag message can be changed to an WARN_ON, and the compatiablity code can be made configurable. Some function pointers aren't moved: * destructor can't be in net_device_ops because it may need to be referenced after the module is unloaded. * neighbor setup is manipulated in a couple of places that need special consideration * hard_start_xmit is in the fast path for transmit. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 232 +++++++++++++++++++++++++++++++++------------- net/Kconfig | 3 + net/core/dev.c | 109 +++++++++++++++------- net/core/netpoll.c | 7 +- net/core/rtnetlink.c | 9 +- net/sched/sch_generic.c | 4 +- 6 files changed, 259 insertions(+), 105 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc..9060f5f3517 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,131 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; } ____cacheline_aligned_in_smp; + +/* + * This structure defines the management hooks for network devices. + * The following hooks can bed defined and are optonal (can be null) + * unless otherwise noted. + * + * int (*ndo_init)(struct net_device *dev); + * This function is called once when network device is registered. + * The network device can use this to any late stage initializaton + * or semantic validattion. It can fail with an error code which will + * be propogated back to register_netdev + * + * void (*ndo_uninit)(struct net_device *dev); + * This function is called when device is unregistered or when registration + * fails. It is not called if init fails. + * + * int (*ndo_open)(struct net_device *dev); + * This function is called when network device transistions to the up + * state. + * + * int (*ndo_stop)(struct net_device *dev); + * This function is called when network device transistions to the down + * state. + * + * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); + * This function is called to allow device receiver to make + * changes to configuration when multicast or promiscious is enabled. + * + * void (*ndo_set_rx_mode)(struct net_device *dev); + * This function is called device changes address list filtering. + * + * void (*ndo_set_multicast_list)(struct net_device *dev); + * This function is called when the multicast address list changes. + * + * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); + * This function is called when the Media Access Control address + * needs to be changed. If not this interface is not defined, the + * mac address can not be changed. + * + * int (*ndo_validate_addr)(struct net_device *dev); + * Test if Media Access Control address is valid for the device. + * + * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called when a user request an ioctl which can't be handled by + * the generic interface code. If not defined ioctl's return + * not supported error code. + * + * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); + * Used to set network devices bus interface parameters. This interface + * is retained for legacy reason, new devices should use the bus + * interface (PCI) for low level management. + * + * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + * Called when a user wants to change the Maximum Transfer Unit + * of a device. If not defined, any request to change MTU will + * will return an error. + * + * void (*ndo_tx_timeout) (struct net_device *dev); + * Callback uses when the transmitter has not made any progress + * for dev->watchdog ticks. + * + * struct net_device_stats* (*get_stats)(struct net_device *dev); + * Called when a user wants to get the network device usage + * statistics. If not defined, the counters in dev->stats will + * be used. + * + * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp); + * If device support VLAN receive accleration + * (ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called + * when vlan groups for the device changes. Note: grp is NULL + * if no vlan's groups are being used. + * + * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is registered. + * + * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is unregistered. + * + * void (*ndo_poll_controller)(struct net_device *dev); + */ +struct net_device_ops { + int (*ndo_init)(struct net_device *dev); + void (*ndo_uninit)(struct net_device *dev); + int (*ndo_open)(struct net_device *dev); + int (*ndo_stop)(struct net_device *dev); +#define HAVE_CHANGE_RX_FLAGS + void (*ndo_change_rx_flags)(struct net_device *dev, + int flags); +#define HAVE_SET_RX_MODE + void (*ndo_set_rx_mode)(struct net_device *dev); +#define HAVE_MULTICAST + void (*ndo_set_multicast_list)(struct net_device *dev); +#define HAVE_SET_MAC_ADDR + int (*ndo_set_mac_address)(struct net_device *dev, + void *addr); +#define HAVE_VALIDATE_ADDR + int (*ndo_validate_addr)(struct net_device *dev); +#define HAVE_PRIVATE_IOCTL + int (*ndo_do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*ndo_set_config)(struct net_device *dev, + struct ifmap *map); +#define HAVE_CHANGE_MTU + int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + +#define HAVE_TX_TIMEOUT + void (*ndo_tx_timeout) (struct net_device *dev); + + struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); + + void (*ndo_vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*ndo_vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER +#define HAVE_NETDEV_POLL + void (*ndo_poll_controller)(struct net_device *dev); +#endif +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -498,11 +623,6 @@ struct net_device #ifdef CONFIG_NETPOLL struct list_head napi_list; #endif - - /* The device initialization function. Called only once. */ - int (*init)(struct net_device *dev); - - /* ------- Fields preinitialized in Space.c finish here ------- */ /* Net device features */ unsigned long features; @@ -546,15 +666,13 @@ struct net_device * for all in netdev_increment_features. */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ - NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) /* Interface index. Unique device identifier */ int ifindex; int iflink; - - struct net_device_stats* (*get_stats)(struct net_device *dev); struct net_device_stats stats; #ifdef CONFIG_WIRELESS_EXT @@ -564,18 +682,13 @@ struct net_device /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif + /* Management operations */ + const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; /* Hardware header description */ const struct header_ops *header_ops; - /* - * This marks the end of the "visible" part of the structure. All - * fields hereafter are internal to the system, and may change at - * will (read: may be cleaned up at will). - */ - - unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ @@ -634,7 +747,7 @@ struct net_device unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ + because most packets are unicast) */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -648,6 +761,10 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; + /* Map buffer to appropriate transmit queue */ + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); + unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* @@ -662,9 +779,6 @@ struct net_device int watchdog_timeo; /* used by dev_watchdog() */ struct timer_list watchdog_timer; -/* - * refcnt is a very hot point, so align it on SMP - */ /* Number of references to this device */ atomic_t refcnt ____cacheline_aligned_in_smp; @@ -683,56 +797,14 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Called after device is detached from network. */ - void (*uninit)(struct net_device *dev); - /* Called after last user reference disappears. */ - void (*destructor)(struct net_device *dev); + /* Called from unregister, can be used to call free_netdev */ + void (*destructor)(struct net_device *dev); - /* Pointers to interface service routines. */ - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); -#define HAVE_NETDEV_POLL -#define HAVE_CHANGE_RX_FLAGS - void (*change_rx_flags)(struct net_device *dev, - int flags); -#define HAVE_SET_RX_MODE - void (*set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST - void (*set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR - int (*set_mac_address)(struct net_device *dev, - void *addr); -#define HAVE_VALIDATE_ADDR - int (*validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG - int (*set_config)(struct net_device *dev, - struct ifmap *map); -#define HAVE_CHANGE_MTU - int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT - void (*tx_timeout) (struct net_device *dev); - - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); - - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); #ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ @@ -763,6 +835,38 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#ifdef CONFIG_COMPAT_NET_DEV_OPS + struct { + int (*init)(struct net_device *dev); + void (*uninit)(struct net_device *dev); + int (*open)(struct net_device *dev); + int (*stop)(struct net_device *dev); + void (*change_rx_flags)(struct net_device *dev, + int flags); + void (*set_rx_mode)(struct net_device *dev); + void (*set_multicast_list)(struct net_device *dev); + int (*set_mac_address)(struct net_device *dev, + void *addr); + int (*validate_addr)(struct net_device *dev); + int (*do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*set_config)(struct net_device *dev, + struct ifmap *map); + int (*change_mtu)(struct net_device *dev, int new_mtu); + void (*tx_timeout) (struct net_device *dev); + struct net_device_stats* (*get_stats)(struct net_device *dev); + void (*vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER + void (*poll_controller)(struct net_device *dev); +#endif +#endif + }; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/Kconfig b/net/Kconfig index 8c3d97ca0d9..4e2e40ba8ba 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,6 +32,9 @@ config NET_NS Allow user space to create what appear to be multiple instances of the network stack. +config COMPAT_NET_DEV_OPS + def_bool y + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index e08c0fcd603..ca14ab407b3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1059,6 +1059,7 @@ void dev_load(struct net *net, const char *name) */ int dev_open(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int ret = 0; ASSERT_RTNL(); @@ -1081,11 +1082,11 @@ int dev_open(struct net_device *dev) */ set_bit(__LINK_STATE_START, &dev->state); - if (dev->validate_addr) - ret = dev->validate_addr(dev); + if (ops->ndo_validate_addr) + ret = ops->ndo_validate_addr(dev); - if (!ret && dev->open) - ret = dev->open(dev); + if (!ret && ops->ndo_open) + ret = ops->ndo_open(dev); /* * If it went open OK then: @@ -1129,6 +1130,7 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; ASSERT_RTNL(); might_sleep(); @@ -1161,8 +1163,8 @@ int dev_close(struct net_device *dev) * We allow it to be called even after a DETACH hot-plug * event. */ - if (dev->stop) - dev->stop(dev); + if (ops->ndo_stop) + ops->ndo_stop(dev); /* * Device is now down. @@ -2930,8 +2932,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) static void dev_change_rx_flags(struct net_device *dev, int flags) { - if (dev->flags & IFF_UP && dev->change_rx_flags) - dev->change_rx_flags(dev, flags); + const struct net_device_ops *ops = dev->netdev_ops; + + if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + ops->ndo_change_rx_flags(dev, flags); } static int __dev_set_promiscuity(struct net_device *dev, int inc) @@ -3051,6 +3055,8 @@ int dev_set_allmulti(struct net_device *dev, int inc) */ void __dev_set_rx_mode(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; + /* dev_open will call this function so the list will stay sane. */ if (!(dev->flags&IFF_UP)) return; @@ -3058,8 +3064,8 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (dev->set_rx_mode) - dev->set_rx_mode(dev); + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); else { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. @@ -3072,8 +3078,8 @@ void __dev_set_rx_mode(struct net_device *dev) dev->uc_promisc = 0; } - if (dev->set_multicast_list) - dev->set_multicast_list(dev); + if (ops->ndo_set_multicast_list) + ops->ndo_set_multicast_list(dev); } } @@ -3432,6 +3438,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) */ int dev_set_mtu(struct net_device *dev, int new_mtu) { + const struct net_device_ops *ops = dev->netdev_ops; int err; if (new_mtu == dev->mtu) @@ -3445,10 +3452,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return -ENODEV; err = 0; - if (dev->change_mtu) - err = dev->change_mtu(dev, new_mtu); + if (ops->ndo_change_mtu) + err = ops->ndo_change_mtu(dev, new_mtu); else dev->mtu = new_mtu; + if (!err && dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; @@ -3463,15 +3471,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { + const struct net_device_ops *ops = dev->netdev_ops; int err; - if (!dev->set_mac_address) + if (!ops->ndo_set_mac_address) return -EOPNOTSUPP; if (sa->sa_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3551,6 +3560,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops = dev->netdev_ops; if (!dev) return -ENODEV; @@ -3578,15 +3588,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (dev->set_config) { + if (ops->ndo_set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev->set_config(dev, &ifr->ifr_map); + return ops->ndo_set_config(dev, &ifr->ifr_map); } return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3595,7 +3605,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3633,10 +3643,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRDELIF || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; - if (dev->do_ioctl) { + if (ops->ndo_do_ioctl) { if (netif_device_present(dev)) - err = dev->do_ioctl(dev, ifr, - cmd); + err = ops->ndo_do_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -3897,8 +3906,8 @@ static void rollback_registered(struct net_device *dev) */ dev_addr_discard(dev); - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); /* Notifier chain MUST detach us from master device. */ WARN_ON(dev->master); @@ -3988,7 +3997,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; - struct net *net; + struct net *net = dev_net(dev); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3997,8 +4006,7 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -4006,9 +4014,46 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + /* Netdevice_ops API compatiability support. + * This is temporary until all network devices are converted. + */ + if (dev->netdev_ops) { + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif + } else { + char drivername[64]; + pr_info("%s (%s): not using net_device_ops yet\n", + dev->name, netdev_drivername(dev, drivername, 64)); + + /* This works only because net_device_ops and the + compatiablity structure are the same. */ + dev->netdev_ops = (void *) &(dev->init); + } +#endif + /* Init, if this function is available */ - if (dev->init) { - ret = dev->init(dev); + if (dev->netdev_ops->ndo_init) { + ret = dev->netdev_ops->ndo_init(dev); if (ret) { if (ret > 0) ret = -EIO; @@ -4086,8 +4131,8 @@ out: return ret; err_uninit: - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); goto out; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc4e28e23b8..630df603444 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -172,12 +172,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; - if (!dev || !netif_running(dev) || !dev->poll_controller) + if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) return; /* Process pending work on NIC */ - dev->poll_controller(dev); + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -694,7 +695,7 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->poll_controller) { + if (!ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4dfb6b4d455..6f8e0778e56 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { + const struct net_device_ops *ops = dev->netdev_ops; int send_addr_notify = 0; int err; @@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct rtnl_link_ifmap *u_map; struct ifmap k_map; - if (!dev->set_config) { + if (!ops->ndo_set_config) { err = -EOPNOTSUPP; goto errout; } @@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, k_map.dma = (unsigned char) u_map->dma; k_map.port = (unsigned char) u_map->port; - err = dev->set_config(dev, &k_map); + err = ops->ndo_set_config(dev, &k_map); if (err < 0) goto errout; @@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!dev->set_mac_address) { + if (!ops->ndo_set_mac_address) { err = -EOPNOTSUPP; goto errout; } @@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 80c8f3dbbea..95ab55c064f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, -- cgit v1.2.3-70-g09d2 From eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:40:23 -0800 Subject: netdev: introduce dev_get_stats() In order for the network device ops get_stats call to be immutable, the handling of the default internal network device stats block has to be changed. Add a new helper function which replaces the old use of internal_get_stats. Note: change return code to make it clear that the caller should not go changing the returned statistics. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- arch/s390/appldata/appldata_net_sum.c | 4 ++-- drivers/net/bonding/bond_main.c | 5 +++-- drivers/net/sfc/ethtool.c | 2 +- drivers/parisc/led.c | 4 ++-- include/linux/netdevice.h | 4 +++- net/core/dev.c | 23 ++++++++++++++++++----- net/core/net-sysfs.c | 3 +-- net/core/rtnetlink.c | 6 +++--- 8 files changed, 33 insertions(+), 18 deletions(-) (limited to 'net/core/dev.c') diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 3b746556e1a..fa741f84c5b 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -67,7 +67,6 @@ static void appldata_get_net_sum_data(void *data) int i; struct appldata_net_sum_data *net_data; struct net_device *dev; - struct net_device_stats *stats; unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped, collisions; @@ -86,7 +85,8 @@ static void appldata_get_net_sum_data(void *data) collisions = 0; read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); + rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; rx_bytes += stats->rx_bytes; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a08ea480805..db5f5c24a25 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3899,7 +3899,7 @@ static int bond_close(struct net_device *bond_dev) static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct net_device_stats *stats = &(bond->stats), *sstats; + struct net_device_stats *stats = &bond->stats; struct net_device_stats local_stats; struct slave *slave; int i; @@ -3909,7 +3909,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) read_lock_bh(&bond->lock); bond_for_each_slave(bond, slave, i) { - sstats = slave->dev->get_stats(slave->dev); + const struct net_device_stats *sstats = dev_get_stats(slave->dev); + local_stats.rx_packets += sstats->rx_packets; local_stats.rx_bytes += sstats->rx_bytes; local_stats.rx_errors += sstats->rx_errors; diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index abd8fcd6e62..cd92c4d8dbc 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -426,7 +426,7 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS); /* Update MAC and NIC statistics */ - net_dev->get_stats(net_dev); + dev_get_stats(net_dev); /* Fill detailed statistics buffer */ for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) { diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index f9b12664f9f..454b6532e40 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -360,13 +360,13 @@ static __inline__ int led_get_net_activity(void) read_lock(&dev_base_lock); rcu_read_lock(); for_each_netdev(&init_net, dev) { - struct net_device_stats *stats; + const struct net_device_stats *stats; struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) continue; if (ipv4_is_loopback(in_dev->ifa_list->ifa_local)) continue; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); rx_total += stats->rx_packets; tx_total += stats->tx_packets; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9060f5f3517..981a089d514 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -864,9 +864,9 @@ struct net_device unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); -#endif #endif }; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1780,6 +1780,8 @@ extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); +extern const struct net_device_stats *dev_get_stats(struct net_device *dev); + extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); diff --git a/net/core/dev.c b/net/core/dev.c index ca14ab407b3..8843f4e3f5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2620,7 +2620,7 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - struct net_device_stats *stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", @@ -4288,10 +4288,24 @@ void netdev_run_todo(void) } } -static struct net_device_stats *internal_stats(struct net_device *dev) -{ - return &dev->stats; +/** + * dev_get_stats - get network device statistics + * @dev: device to get statistics from + * + * Get network statistics from device. The device driver may provide + * its own method by setting dev->netdev_ops->get_stats; otherwise + * the internal statistics structure is used. + */ +const struct net_device_stats *dev_get_stats(struct net_device *dev) + { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_stats) + return ops->ndo_get_stats(dev); + else + return &dev->stats; } +EXPORT_SYMBOL(dev_get_stats); static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, @@ -4370,7 +4384,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 146dcfeb060..afd42d71732 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d, unsigned long offset) { struct net_device *dev = to_net_dev(d); - struct net_device_stats *stats; ssize_t ret = -EINVAL; WARN_ON(offset > sizeof(struct net_device_stats) || @@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d, read_lock(&dev_base_lock); if (dev_isalive(dev)) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6f8e0778e56..790dd205bb5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - struct net_device_stats *b) + const struct net_device_stats *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct net_device_stats *stats; + const struct net_device_stats *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { -- cgit v1.2.3-70-g09d2 From 008298231abbeb91bc7be9e8b078607b816d1a4a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:14:53 -0800 Subject: netdev: add more functions to netdevice ops This patch moves neigh_setup and hard_start_xmit into the network device ops structure. For bisection, fix all the previously converted drivers as well. Bonding driver took the biggest hit on this. Added a prefetch of the hard_start_xmit in the fast path to try and reduce any impact this would have. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/8139cp.c | 2 +- drivers/net/8139too.c | 2 +- drivers/net/acenic.c | 4 ++- drivers/net/atl1e/atl1e_main.c | 3 ++- drivers/net/atlx/atl1.c | 4 +-- drivers/net/atlx/atl2.c | 2 +- drivers/net/bonding/bond_main.c | 56 +++++++++++++++++++++++++++++++++-------- drivers/net/chelsio/cxgb2.c | 6 ++--- drivers/net/cxgb3/cxgb3_main.c | 1 - drivers/net/e100.c | 2 +- drivers/net/e1000/e1000_main.c | 2 +- drivers/net/e1000e/netdev.c | 2 +- drivers/net/enic/enic_main.c | 2 +- drivers/net/forcedeth.c | 22 +++++++++++++--- drivers/net/ifb.c | 4 +-- drivers/net/igb/igb_main.c | 2 +- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/ixgbe/ixgbe_main.c | 2 +- drivers/net/loopback.c | 2 +- drivers/net/macvlan.c | 4 +-- drivers/net/niu.c | 2 +- drivers/net/ppp_generic.c | 5 ++-- drivers/net/r8169.c | 2 +- drivers/net/skge.c | 2 +- drivers/net/sky2.c | 3 ++- drivers/net/tg3.c | 45 ++++++++++++++++++++++----------- drivers/net/tun.c | 4 +-- drivers/net/veth.c | 2 +- drivers/net/via-velocity.c | 2 +- include/linux/netdevice.h | 39 ++++++++++++++++++---------- net/bridge/br_device.c | 10 ++++---- net/bridge/br_if.c | 2 +- net/core/dev.c | 12 ++++++--- net/core/neighbour.c | 6 ++--- net/core/netpoll.c | 6 +++-- net/core/pktgen.c | 8 +++--- 36 files changed, 183 insertions(+), 93 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 13f75b67872..f6d9d1353dd 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -1824,6 +1824,7 @@ static const struct net_device_ops cp_netdev_ops = { .ndo_set_multicast_list = cp_set_rx_mode, .ndo_get_stats = cp_get_stats, .ndo_do_ioctl = cp_ioctl, + .ndo_start_xmit = cp_start_xmit, .ndo_tx_timeout = cp_tx_timeout, #if CP_VLAN_TAG_USED .ndo_vlan_rx_register = cp_vlan_rx_register, @@ -1949,7 +1950,6 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); dev->netdev_ops = &cp_netdev_ops; - dev->hard_start_xmit = cp_start_xmit; netif_napi_add(dev, &cp->napi, cp_rx_poll, 16); dev->ethtool_ops = &cp_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index f8866552386..445a479db79 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -921,6 +921,7 @@ static const struct net_device_ops rtl8139_netdev_ops = { .ndo_stop = rtl8139_close, .ndo_get_stats = rtl8139_get_stats, .ndo_validate_addr = eth_validate_addr, + .ndo_start_xmit = rtl8139_start_xmit, .ndo_set_multicast_list = rtl8139_set_rx_mode, .ndo_do_ioctl = netdev_ioctl, .ndo_tx_timeout = rtl8139_tx_timeout, @@ -992,7 +993,6 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev, dev->netdev_ops = &rtl8139_netdev_ops; dev->ethtool_ops = &rtl8139_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - dev->hard_start_xmit = rtl8139_start_xmit; netif_napi_add(dev, &tp->napi, rtl8139_poll, 64); /* note: the hardware is not capable of sg/csum/highdma, however diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 309a90ea921..21d24320210 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -455,10 +455,13 @@ static const struct net_device_ops ace_netdev_ops = { .ndo_stop = ace_close, .ndo_tx_timeout = ace_watchdog, .ndo_get_stats = ace_get_stats, + .ndo_start_xmit = ace_start_xmit, .ndo_set_multicast_list = ace_set_multicast_list, .ndo_set_mac_address = ace_set_mac_addr, .ndo_change_mtu = ace_change_mtu, +#if ACENIC_DO_VLAN .ndo_vlan_rx_register = ace_vlan_rx_register, +#endif }; static int __devinit acenic_probe_one(struct pci_dev *pdev, @@ -489,7 +492,6 @@ static int __devinit acenic_probe_one(struct pci_dev *pdev, dev->watchdog_timeo = 5*HZ; dev->netdev_ops = &ace_netdev_ops; - dev->hard_start_xmit = &ace_start_xmit; SET_ETHTOOL_OPS(dev, &ace_ethtool_ops); /* we only display this string ONCE */ diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index a815fffc2a5..98b2a7a466b 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -2256,6 +2256,7 @@ static void atl1e_shutdown(struct pci_dev *pdev) static const struct net_device_ops atl1e_netdev_ops = { .ndo_open = atl1e_open, .ndo_stop = atl1e_close, + .ndo_start_xmit = atl1e_xmit_frame, .ndo_get_stats = atl1e_get_stats, .ndo_set_multicast_list = atl1e_set_multi, .ndo_validate_addr = eth_validate_addr, @@ -2277,7 +2278,7 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) netdev->irq = pdev->irq; netdev->netdev_ops = &atl1e_netdev_ops; - netdev->hard_start_xmit = atl1e_xmit_frame, + netdev->watchdog_timeo = AT_TX_WATCHDOG; atl1e_set_ethtool_ops(netdev); diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 7a0fb04e348..aef7e47fdd2 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2883,12 +2883,13 @@ static void atl1_poll_controller(struct net_device *netdev) static const struct net_device_ops atl1_netdev_ops = { .ndo_open = atl1_open, .ndo_stop = atl1_close, + .ndo_start_xmit = atl1_xmit_frame, .ndo_set_multicast_list = atlx_set_multi, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = atl1_set_mac, .ndo_change_mtu = atl1_change_mtu, .ndo_do_ioctl = atlx_ioctl, - .ndo_tx_timeout = atlx_tx_timeout, + .ndo_tx_timeout = atlx_tx_timeout, .ndo_vlan_rx_register = atlx_vlan_rx_register, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1_poll_controller, @@ -2983,7 +2984,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev, adapter->mii.reg_num_mask = 0x1f; netdev->netdev_ops = &atl1_netdev_ops; - netdev->hard_start_xmit = &atl1_xmit_frame; netdev->watchdog_timeo = 5 * HZ; netdev->ethtool_ops = &atl1_ethtool_ops; diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c index b8d585722e1..0326a84503e 100644 --- a/drivers/net/atlx/atl2.c +++ b/drivers/net/atlx/atl2.c @@ -1315,6 +1315,7 @@ static void atl2_poll_controller(struct net_device *netdev) static const struct net_device_ops atl2_netdev_ops = { .ndo_open = atl2_open, .ndo_stop = atl2_close, + .ndo_start_xmit = atl2_xmit_frame, .ndo_set_multicast_list = atl2_set_multi, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = atl2_set_mac, @@ -1400,7 +1401,6 @@ static int __devinit atl2_probe(struct pci_dev *pdev, atl2_setup_pcicmd(pdev); - netdev->hard_start_xmit = &atl2_xmit_frame; netdev->netdev_ops = &atl2_netdev_ops; atl2_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 614656c8187..a339a805273 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1377,14 +1377,12 @@ done: return 0; } - static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = netdev_priv(bond_dev); - bond_dev->neigh_setup = slave_dev->neigh_setup; - bond_dev->header_ops = slave_dev->header_ops; + bond_dev->header_ops = slave_dev->header_ops; bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; @@ -4124,6 +4122,20 @@ static void bond_set_multicast_list(struct net_device *bond_dev) read_unlock(&bond->lock); } +static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) +{ + struct bonding *bond = netdev_priv(dev); + struct slave *slave = bond->first_slave; + + if (slave) { + const struct net_device_ops *slave_ops + = slave->dev->netdev_ops; + if (slave_ops->ndo_neigh_setup) + return slave_ops->ndo_neigh_setup(dev, parms); + } + return 0; +} + /* * Change the MTU of all of a master's slaves to match the master */ @@ -4490,6 +4502,35 @@ static void bond_set_xmit_hash_policy(struct bonding *bond) } } +static int bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct bonding *bond = netdev_priv(dev); + + switch (bond->params.mode) { + case BOND_MODE_ROUNDROBIN: + return bond_xmit_roundrobin(skb, dev); + case BOND_MODE_ACTIVEBACKUP: + return bond_xmit_activebackup(skb, dev); + case BOND_MODE_XOR: + return bond_xmit_xor(skb, dev); + case BOND_MODE_BROADCAST: + return bond_xmit_broadcast(skb, dev); + case BOND_MODE_8023AD: + return bond_3ad_xmit_xor(skb, dev); + case BOND_MODE_ALB: + case BOND_MODE_TLB: + return bond_alb_xmit(skb, dev); + default: + /* Should never happen, mode already checked */ + printk(KERN_ERR DRV_NAME ": %s: Error: Unknown bonding mode %d\n", + dev->name, bond->params.mode); + WARN_ON_ONCE(1); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } +} + + /* * set bond mode specific net device operations */ @@ -4499,28 +4540,22 @@ void bond_set_mode_ops(struct bonding *bond, int mode) switch (mode) { case BOND_MODE_ROUNDROBIN: - bond_dev->hard_start_xmit = bond_xmit_roundrobin; break; case BOND_MODE_ACTIVEBACKUP: - bond_dev->hard_start_xmit = bond_xmit_activebackup; break; case BOND_MODE_XOR: - bond_dev->hard_start_xmit = bond_xmit_xor; bond_set_xmit_hash_policy(bond); break; case BOND_MODE_BROADCAST: - bond_dev->hard_start_xmit = bond_xmit_broadcast; break; case BOND_MODE_8023AD: bond_set_master_3ad_flags(bond); - bond_dev->hard_start_xmit = bond_3ad_xmit_xor; bond_set_xmit_hash_policy(bond); break; case BOND_MODE_ALB: bond_set_master_alb_flags(bond); /* FALLTHRU */ case BOND_MODE_TLB: - bond_dev->hard_start_xmit = bond_alb_xmit; break; default: /* Should never happen, mode already checked */ @@ -4553,12 +4588,13 @@ static const struct ethtool_ops bond_ethtool_ops = { static const struct net_device_ops bond_netdev_ops = { .ndo_open = bond_open, .ndo_stop = bond_close, + .ndo_start_xmit = bond_start_xmit, .ndo_get_stats = bond_get_stats, .ndo_do_ioctl = bond_do_ioctl, .ndo_set_multicast_list = bond_set_multicast_list, .ndo_change_mtu = bond_change_mtu, - .ndo_validate_addr = NULL, .ndo_set_mac_address = bond_set_mac_address, + .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_register = bond_vlan_rx_register, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c index 482741797eb..9b6011e7678 100644 --- a/drivers/net/chelsio/cxgb2.c +++ b/drivers/net/chelsio/cxgb2.c @@ -915,7 +915,7 @@ static int t1_set_mac_addr(struct net_device *dev, void *p) } #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -static void vlan_rx_register(struct net_device *dev, +static void t1_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { struct adapter *adapter = dev->ml_priv; @@ -1013,6 +1013,7 @@ void t1_fatal_err(struct adapter *adapter) static const struct net_device_ops cxgb_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, + .ndo_start_xmit = t1_start_xmit, .ndo_get_stats = t1_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = t1_set_rxmode, @@ -1020,7 +1021,7 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_change_mtu = t1_change_mtu, .ndo_set_mac_address = t1_set_mac_addr, #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - .ndo_vlan_rx_register = vlan_rx_register, + .ndo_vlan_rx_register = t1_vlan_rx_register, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = t1_netpoll, @@ -1157,7 +1158,6 @@ static int __devinit init_one(struct pci_dev *pdev, } netdev->netdev_ops = &cxgb_netdev_ops; - netdev->hard_start_xmit = t1_start_xmit; netdev->hard_header_len += (adapter->flags & TSO_CAPABLE) ? sizeof(struct cpl_tx_pkt_lso) : sizeof(struct cpl_tx_pkt); diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index a9479be53ec..cd9fcaca70f 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -2955,7 +2955,6 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; netdev->netdev_ops = &cxgb_netdev_ops; - netdev->hard_start_xmit = t3_eth_xmit; SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); } diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 5894716de19..2001a63794f 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -2615,6 +2615,7 @@ static int e100_close(struct net_device *netdev) static const struct net_device_ops e100_netdev_ops = { .ndo_open = e100_open, .ndo_stop = e100_close, + .ndo_start_xmit = e100_xmit_frame, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = e100_set_multicast_list, .ndo_set_mac_address = e100_set_mac_address, @@ -2640,7 +2641,6 @@ static int __devinit e100_probe(struct pci_dev *pdev, } netdev->netdev_ops = &e100_netdev_ops; - netdev->hard_start_xmit = e100_xmit_frame; SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops); netdev->watchdog_timeo = E100_WATCHDOG_PERIOD; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index debbba390d4..5c098c9d584 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -891,6 +891,7 @@ static int e1000_is_need_ioport(struct pci_dev *pdev) static const struct net_device_ops e1000_netdev_ops = { .ndo_open = e1000_open, .ndo_stop = e1000_close, + .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_rx_mode = e1000_set_rx_mode, .ndo_set_mac_address = e1000_set_mac, @@ -1001,7 +1002,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } netdev->netdev_ops = &e1000_netdev_ops; - netdev->hard_start_xmit = &e1000_xmit_frame; e1000_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index ced839e4cae..cc0502bbb9f 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4707,6 +4707,7 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter) static const struct net_device_ops e1000e_netdev_ops = { .ndo_open = e1000_open, .ndo_stop = e1000_close, + .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_multicast_list = e1000_set_multi, .ndo_set_mac_address = e1000_set_mac, @@ -4822,7 +4823,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev, /* construct the net_device struct */ netdev->netdev_ops = &e1000e_netdev_ops; - netdev->hard_start_xmit = &e1000_xmit_frame; e1000e_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 40f8c88b166..1c409df735d 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -1593,6 +1593,7 @@ static void enic_iounmap(struct enic *enic) static const struct net_device_ops enic_netdev_ops = { .ndo_open = enic_open, .ndo_stop = enic_stop, + .ndo_start_xmit = enic_hard_start_xmit, .ndo_get_stats = enic_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = enic_set_multicast_list, @@ -1830,7 +1831,6 @@ static int __devinit enic_probe(struct pci_dev *pdev, } netdev->netdev_ops = &enic_netdev_ops; - netdev->hard_start_xmit = enic_hard_start_xmit; netdev->watchdog_timeo = 2 * HZ; netdev->ethtool_ops = &enic_ethtool_ops; diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index dd2e1f670b0..0d7e5750245 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5412,6 +5412,23 @@ static const struct net_device_ops nv_netdev_ops = { .ndo_open = nv_open, .ndo_stop = nv_close, .ndo_get_stats = nv_get_stats, + .ndo_start_xmit = nv_start_xmit, + .ndo_tx_timeout = nv_tx_timeout, + .ndo_change_mtu = nv_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = nv_set_mac_address, + .ndo_set_multicast_list = nv_set_multicast, + .ndo_vlan_rx_register = nv_vlan_rx_register, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = nv_poll_controller, +#endif +}; + +static const struct net_device_ops nv_netdev_ops_optimized = { + .ndo_open = nv_open, + .ndo_stop = nv_close, + .ndo_get_stats = nv_get_stats, + .ndo_start_xmit = nv_start_xmit_optimized, .ndo_tx_timeout = nv_tx_timeout, .ndo_change_mtu = nv_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -5592,11 +5609,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_freering; if (!nv_optimized(np)) - dev->hard_start_xmit = nv_start_xmit; + dev->netdev_ops = &nv_netdev_ops; else - dev->hard_start_xmit = nv_start_xmit_optimized; + dev->netdev_ops = &nv_netdev_ops_optimized; - dev->netdev_ops = &nv_netdev_ops; #ifdef CONFIG_FORCEDETH_NAPI netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP); #endif diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 363a166df8f..60a26300193 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -138,15 +138,15 @@ resched: } static const struct net_device_ops ifb_netdev_ops = { - .ndo_validate_addr = eth_validate_addr, .ndo_open = ifb_open, .ndo_stop = ifb_close, + .ndo_start_xmit = ifb_xmit, + .ndo_validate_addr = eth_validate_addr, }; static void ifb_setup(struct net_device *dev) { /* Initialize the device structure. */ - dev->hard_start_xmit = ifb_xmit; dev->destructor = free_netdev; dev->netdev_ops = &ifb_netdev_ops; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ceb0a045879..eca5684d565 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -953,6 +953,7 @@ static int igb_is_need_ioport(struct pci_dev *pdev) static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, + .ndo_start_xmit = igb_xmit_frame_adv, .ndo_get_stats = igb_get_stats, .ndo_set_multicast_list = igb_set_multi, .ndo_set_mac_address = igb_set_mac, @@ -1080,7 +1081,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, netdev->netdev_ops = &igb_netdev_ops; igb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - netdev->hard_start_xmit = &igb_xmit_frame_adv; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 3ca9daa70b3..a04e3892ddf 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -324,6 +324,7 @@ ixgb_reset(struct ixgb_adapter *adapter) static const struct net_device_ops ixgb_netdev_ops = { .ndo_open = ixgb_open, .ndo_stop = ixgb_close, + .ndo_start_xmit = ixgb_xmit_frame, .ndo_get_stats = ixgb_get_stats, .ndo_set_multicast_list = ixgb_set_multi, .ndo_validate_addr = eth_validate_addr, @@ -414,7 +415,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } netdev->netdev_ops = &ixgb_netdev_ops; - netdev->hard_start_xmit = &ixgb_xmit_frame; ixgb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, ixgb_clean, 64); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 7ad07a00680..40108523377 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -3728,6 +3728,7 @@ static int ixgbe_link_config(struct ixgbe_hw *hw) static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, + .ndo_start_xmit = ixgbe_xmit_frame, .ndo_get_stats = ixgbe_get_stats, .ndo_set_multicast_list = ixgbe_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -3824,7 +3825,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, } netdev->netdev_ops = &ixgbe_netdev_ops; - netdev->hard_start_xmit = &ixgbe_xmit_frame; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; strcpy(netdev->name, pci_name(pdev)); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 958450124de..b7d438a367f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -145,6 +145,7 @@ static void loopback_dev_free(struct net_device *dev) static const struct net_device_ops loopback_ops = { .ndo_init = loopback_dev_init, + .ndo_start_xmit= loopback_xmit, .ndo_get_stats = loopback_get_stats, }; @@ -155,7 +156,6 @@ static const struct net_device_ops loopback_ops = { static void loopback_setup(struct net_device *dev) { dev->mtu = (16 * 1024) + 20 + 20 + 12; - dev->hard_start_xmit = loopback_xmit; dev->hard_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->tx_queue_len = 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d00ea444e0a..e8879217a1d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -140,7 +140,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) return NULL; } -static int macvlan_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { const struct macvlan_dev *vlan = netdev_priv(dev); unsigned int len = skb->len; @@ -365,6 +365,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_init = macvlan_init, .ndo_open = macvlan_open, .ndo_stop = macvlan_stop, + .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, @@ -377,7 +378,6 @@ static void macvlan_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &macvlan_netdev_ops; - dev->hard_start_xmit = macvlan_hard_start_xmit; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, dev->ethtool_ops = &macvlan_ethtool_ops; diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 318537efd58..a8d10630f80 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -8892,6 +8892,7 @@ static struct net_device * __devinit niu_alloc_and_init( static const struct net_device_ops niu_netdev_ops = { .ndo_open = niu_open, .ndo_stop = niu_close, + .ndo_start_xmit = niu_start_xmit, .ndo_get_stats = niu_get_stats, .ndo_set_multicast_list = niu_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -8904,7 +8905,6 @@ static const struct net_device_ops niu_netdev_ops = { static void __devinit niu_assign_netdev_ops(struct net_device *dev) { dev->netdev_ops = &niu_netdev_ops; - dev->hard_start_xmit = niu_start_xmit; dev->ethtool_ops = &niu_ethtool_ops; dev->watchdog_timeo = NIU_TX_TIMEOUT; } diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index bad99e8cac3..1b15a088a3b 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -972,7 +972,8 @@ ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } static const struct net_device_ops ppp_netdev_ops = { - .ndo_do_ioctl = ppp_net_ioctl, + .ndo_start_xmit = ppp_start_xmit, + .ndo_do_ioctl = ppp_net_ioctl, }; static void ppp_setup(struct net_device *dev) @@ -2437,8 +2438,6 @@ ppp_create_interface(int unit, int *retp) skb_queue_head_init(&ppp->mrq); #endif /* CONFIG_PPP_MULTILINK */ - dev->hard_start_xmit = ppp_start_xmit; - ret = -EEXIST; mutex_lock(&all_ppp_mutex); if (unit < 0) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index bac58ca628d..dddf6aeff49 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1927,6 +1927,7 @@ static const struct net_device_ops rtl8169_netdev_ops = { .ndo_open = rtl8169_open, .ndo_stop = rtl8169_close, .ndo_get_stats = rtl8169_get_stats, + .ndo_start_xmit = rtl8169_start_xmit, .ndo_tx_timeout = rtl8169_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = rtl8169_change_mtu, @@ -2125,7 +2126,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->dev_addr[i] = RTL_R8(MAC0 + i); memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); - dev->hard_start_xmit = rtl8169_start_xmit; SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops); dev->watchdog_timeo = RTL8169_TX_TIMEOUT; dev->irq = pdev->irq; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 93c1b1d9296..f73ee797400 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -3805,6 +3805,7 @@ static __exit void skge_debug_cleanup(void) static const struct net_device_ops skge_netdev_ops = { .ndo_open = skge_up, .ndo_stop = skge_down, + .ndo_start_xmit = skge_xmit_frame, .ndo_do_ioctl = skge_ioctl, .ndo_get_stats = skge_get_stats, .ndo_tx_timeout = skge_tx_timeout, @@ -3831,7 +3832,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, } SET_NETDEV_DEV(dev, &hw->pdev->dev); - dev->hard_start_xmit = skge_xmit_frame; dev->netdev_ops = &skge_netdev_ops; dev->ethtool_ops = &skge_ethtool_ops; dev->watchdog_timeo = TX_WATCHDOG; diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 251505125cb..3668e81e474 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -4047,6 +4047,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_up, .ndo_stop = sky2_down, + .ndo_start_xmit = sky2_xmit_frame, .ndo_do_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, @@ -4063,6 +4064,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_up, .ndo_stop = sky2_down, + .ndo_start_xmit = sky2_xmit_frame, .ndo_do_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, @@ -4090,7 +4092,6 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, SET_NETDEV_DEV(dev, &hw->pdev->dev); dev->irq = hw->pdev->irq; - dev->hard_start_xmit = sky2_xmit_frame; SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops); dev->watchdog_timeo = TX_WATCHDOG; dev->netdev_ops = &sky2_netdev_ops[port]; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 4b97cb60136..9ba18e1bc34 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -12614,19 +12614,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) else tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES; - /* All chips before 5787 can get confused if TX buffers - * straddle the 4GB address boundary in some cases. - */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) - tp->dev->hard_start_xmit = tg3_start_xmit; - else - tp->dev->hard_start_xmit = tg3_start_xmit_dma_bug; - tp->rx_offset = 2; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 && (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0) @@ -13346,6 +13333,26 @@ static void __devinit tg3_init_coal(struct tg3 *tp) static const struct net_device_ops tg3_netdev_ops = { .ndo_open = tg3_open, .ndo_stop = tg3_close, + .ndo_start_xmit = tg3_start_xmit, + .ndo_get_stats = tg3_get_stats, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_multicast_list = tg3_set_rx_mode, + .ndo_set_mac_address = tg3_set_mac_addr, + .ndo_do_ioctl = tg3_ioctl, + .ndo_tx_timeout = tg3_tx_timeout, + .ndo_change_mtu = tg3_change_mtu, +#if TG3_VLAN_TAG_USED + .ndo_vlan_rx_register = tg3_vlan_rx_register, +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tg3_poll_controller, +#endif +}; + +static const struct net_device_ops tg3_netdev_ops_dma_bug = { + .ndo_open = tg3_open, + .ndo_stop = tg3_close, + .ndo_start_xmit = tg3_start_xmit_dma_bug, .ndo_get_stats = tg3_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = tg3_set_rx_mode, @@ -13475,7 +13482,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->rx_jumbo_pending = TG3_DEF_RX_JUMBO_RING_PENDING; tp->tx_pending = TG3_DEF_TX_RING_PENDING; - dev->netdev_ops = &tg3_netdev_ops; netif_napi_add(dev, &tp->napi, tg3_poll, 64); dev->ethtool_ops = &tg3_ethtool_ops; dev->watchdog_timeo = TG3_TX_TIMEOUT; @@ -13488,6 +13494,17 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, goto err_out_iounmap; } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + dev->netdev_ops = &tg3_netdev_ops; + else + dev->netdev_ops = &tg3_netdev_ops_dma_bug; + + /* The EPB bridge inside 5714, 5715, and 5780 and any * device behind the EPB cannot support DMA addresses > 40-bit. * On 64-bit systems with IOMMU, use 40-bit dma_mask. diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b4c94144475..fd0b11ea556 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -308,13 +308,14 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops tun_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, + .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, - }; static const struct net_device_ops tap_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, + .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, .ndo_set_multicast_list = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, @@ -691,7 +692,6 @@ static void tun_setup(struct net_device *dev) tun->owner = -1; tun->group = -1; - dev->hard_start_xmit = tun_net_xmit; dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = free_netdev; dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 4f93a55aaaa..852d0e7c4e6 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -265,6 +265,7 @@ static void veth_dev_free(struct net_device *dev) static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, + .ndo_start_xmit = veth_xmit, .ndo_get_stats = veth_get_stats, }; @@ -273,7 +274,6 @@ static void veth_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &veth_netdev_ops; - dev->hard_start_xmit = veth_xmit; dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; dev->destructor = veth_dev_free; diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 033e63a6843..58e25d090ae 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -852,6 +852,7 @@ static int velocity_soft_reset(struct velocity_info *vptr) static const struct net_device_ops velocity_netdev_ops = { .ndo_open = velocity_open, .ndo_stop = velocity_close, + .ndo_start_xmit = velocity_xmit, .ndo_get_stats = velocity_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = velocity_set_multi, @@ -971,7 +972,6 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi vptr->phy_id = MII_GET_PHY_ID(vptr->mac_regs); dev->irq = pdev->irq; - dev->hard_start_xmit = velocity_xmit; dev->netdev_ops = &velocity_netdev_ops; dev->ethtool_ops = &velocity_ethtool_ops; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 981a089d514..d8fb23679ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -454,8 +454,8 @@ struct netdev_queue { /* * This structure defines the management hooks for network devices. - * The following hooks can bed defined and are optonal (can be null) - * unless otherwise noted. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when network device is registered. @@ -475,6 +475,15 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * + * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * Called when a packet needs to be transmitted. + * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, + * Required can not be NULL. + * + * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); + * Called to decide which queue to when device supports multiple + * transmit queues. + * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscious is enabled. @@ -508,7 +517,7 @@ struct netdev_queue { * of a device. If not defined, any request to change MTU will * will return an error. * - * void (*ndo_tx_timeout) (struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev); * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -538,6 +547,10 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); + int (*ndo_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*ndo_select_queue)(struct net_device *dev, + struct sk_buff *skb); #define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); @@ -557,8 +570,10 @@ struct net_device_ops { int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); #define HAVE_CHANGE_MTU - int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); - + int (*ndo_change_mtu)(struct net_device *dev, + int new_mtu); + int (*ndo_neigh_setup)(struct net_device *dev, + struct neigh_parms *); #define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); @@ -761,18 +776,12 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - /* Map buffer to appropriate transmit queue */ - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ void *priv; /* pointer to private data */ - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ @@ -800,8 +809,6 @@ struct net_device /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); - #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif @@ -842,6 +849,10 @@ struct net_device void (*uninit)(struct net_device *dev); int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); void (*change_rx_flags)(struct net_device *dev, int flags); void (*set_rx_mode)(struct net_device *dev); @@ -854,6 +865,8 @@ struct net_device int (*set_config)(struct net_device *dev, struct ifmap *map); int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, + struct neigh_parms *); void (*tx_timeout) (struct net_device *dev); struct net_device_stats* (*get_stats)(struct net_device *dev); void (*vlan_rx_register)(struct net_device *dev, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 920ce334839..18538d7460d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -163,10 +163,11 @@ static const struct ethtool_ops br_ethtool_ops = { static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, - .ndo_set_mac_address = br_set_mac_address, - .ndo_set_multicast_list = br_dev_set_multicast_list, - .ndo_change_mtu = br_change_mtu, - .ndo_do_ioctl = br_dev_ioctl, + .ndo_start_xmit = br_dev_xmit, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, }; void br_dev_setup(struct net_device *dev) @@ -175,7 +176,6 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->hard_start_xmit = br_dev_xmit; dev->destructor = free_netdev; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ee3a8dd13f5..727c5c510a6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -373,7 +373,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; - if (dev->hard_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; if (dev->br_port != NULL) diff --git a/net/core/dev.c b/net/core/dev.c index 8843f4e3f5e..4615e9a443a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1660,6 +1660,9 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { + const struct net_device_ops *ops = dev->netdev_ops; + + prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1671,7 +1674,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return dev->hard_start_xmit(skb, dev); + return ops->ndo_start_xmit(skb, dev); } gso: @@ -1681,7 +1684,7 @@ gso: skb->next = nskb->next; nskb->next = NULL; - rc = dev->hard_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc)) { nskb->next = skb->next; skb->next = nskb; @@ -1755,10 +1758,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { + const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index = 0; - if (dev->select_queue) - queue_index = dev->select_queue(dev, skb); + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) queue_index = simple_tx_hash(dev, skb); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cca6a55909e..9c3717a23cf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1327,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { struct neigh_parms *p, *ref; - struct net *net; + struct net *net = dev_net(dev); + const struct net_device_ops *ops = dev->netdev_ops; - net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -1341,7 +1341,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { kfree(p); return NULL; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 630df603444..96fb0519eb7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { @@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work) __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); local_irq_restore(flags); @@ -273,6 +274,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -293,7 +295,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) - status = dev->hard_start_xmit(skb, dev); + status = ops->ndo_start_xmit(skb, dev); __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4e77914c4d4..15e0c2c7aac 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3352,14 +3352,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { - struct net_device *odev = NULL; + struct net_device *odev = pkt_dev->odev; + int (*xmit)(struct sk_buff *, struct net_device *) + = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; __u64 idle_start = 0; u16 queue_map; int ret; - odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { u64 now; @@ -3440,7 +3440,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_inc(&(pkt_dev->skb->users)); retry_now: - ret = odev->hard_start_xmit(pkt_dev->skb, odev); + ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; -- cgit v1.2.3-70-g09d2 From b74ca3a896b9ab5f952bc440154758e708c48884 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 8 Dec 2008 01:14:16 -0800 Subject: netdevice: Kill netdev->priv This is the last shoot of this series. After I removing all directly reference of netdev->priv, I am killing "priv" of "struct net_device" and fixing relative comments/docs. Anyone will not be allowed to reference netdev->priv directly. If you want to reference the memory of private data, use netdev_priv() instead. If the private data is not allocted when alloc_netdev(), use netdev->ml_priv to point that memory after you creating that private data. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- Documentation/networking/driver.txt | 2 +- Documentation/networking/netdevices.txt | 2 +- drivers/net/3c501.h | 2 +- drivers/net/atp.c | 2 +- drivers/net/eexpress.c | 2 +- drivers/net/forcedeth.c | 4 ++-- drivers/net/lance.c | 2 +- drivers/net/myri_sbus.c | 2 +- drivers/net/pci-skeleton.c | 2 +- drivers/net/sun3_82586.c | 2 +- drivers/net/sunbmac.c | 2 +- drivers/net/tokenring/3c359.c | 5 +++-- drivers/net/via-rhine.c | 9 +++++---- drivers/net/wireless/strip.c | 2 +- include/linux/hdlc.h | 2 +- include/linux/netdevice.h | 1 - net/atm/mpc.c | 4 ++-- net/core/dev.c | 6 ------ 18 files changed, 24 insertions(+), 29 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt index ea72d2e66ca..03283daa64f 100644 --- a/Documentation/networking/driver.txt +++ b/Documentation/networking/driver.txt @@ -13,7 +13,7 @@ Transmit path guidelines: static int drv_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct drv *dp = dev->priv; + struct drv *dp = netdev_priv(dev); lock_tx(dp); ... diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index d0f71fc7f78..a2ab6a0b116 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -18,7 +18,7 @@ There are routines in net_init.c to handle the common cases of alloc_etherdev, alloc_netdev. These reserve extra space for driver private data which gets freed when the network device is freed. If separately allocated data is attached to the network device -(dev->priv) then it is up to the module exit handler to free that. +(netdev_priv(dev)) then it is up to the module exit handler to free that. MTU === diff --git a/drivers/net/3c501.h b/drivers/net/3c501.h index cfec64efff7..f40b0493337 100644 --- a/drivers/net/3c501.h +++ b/drivers/net/3c501.h @@ -23,7 +23,7 @@ static const struct ethtool_ops netdev_ethtool_ops; static int el_debug = EL_DEBUG; /* - * Board-specific info in dev->priv. + * Board-specific info in netdev_priv(dev). */ struct net_local diff --git a/drivers/net/atp.c b/drivers/net/atp.c index 7028b276dfd..1d6b74c5d6c 100644 --- a/drivers/net/atp.c +++ b/drivers/net/atp.c @@ -420,7 +420,7 @@ static unsigned short __init eeprom_op(long ioaddr, u32 cmd) registers that "should" only need to be set once at boot, so that there is non-reboot way to recover if something goes wrong. - This is an attachable device: if there is no dev->priv entry then it wasn't + This is an attachable device: if there is no private entry then it wasn't probed for at boot-time, and we need to probe for it again. */ static int net_open(struct net_device *dev) diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index a125e41240f..9ff3f2f5e38 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -1046,7 +1046,7 @@ static void eexp_hw_tx_pio(struct net_device *dev, unsigned short *buf, /* * Sanity check the suspected EtherExpress card * Read hardware address, reset card, size memory and initialize buffer - * memory pointers. These are held in dev->priv, in case someone has more + * memory pointers. These are held in netdev_priv(), in case someone has more * than one card in a machine. */ diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 12384df8cb2..1f2b24743ee 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -712,12 +712,12 @@ struct nv_skb_map { /* * SMP locking: - * All hardware access under dev->priv->lock, except the performance + * All hardware access under netdev_priv(dev)->lock, except the performance * critical parts: * - rx is (pseudo-) lockless: it relies on the single-threading provided * by the arch code for interrupts. * - tx setup is lockless: it relies on netif_tx_lock. Actual submission - * needs dev->priv->lock :-( + * needs netdev_priv(dev)->lock :-( * - set_multicast_list: preparation lockless, relies on netif_tx_lock. */ diff --git a/drivers/net/lance.c b/drivers/net/lance.c index e81b6113ed9..d7afb938ea6 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -519,7 +519,7 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int } } - /* We can't allocate dev->priv from alloc_etherdev() because it must + /* We can't allocate private data from alloc_etherdev() because it must a ISA DMA-able region. */ chipname = chip_table[lance_version].name; printk("%s: %s at %#3x, ", dev->name, chipname, ioaddr); diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c index 6833f65f8ae..899ed065a14 100644 --- a/drivers/net/myri_sbus.c +++ b/drivers/net/myri_sbus.c @@ -1091,7 +1091,7 @@ static int __devinit myri_sbus_probe(struct of_device *op, const struct of_devic err_free_irq: free_irq(dev->irq, dev); err: - /* This will also free the co-allocated 'dev->priv' */ + /* This will also free the co-allocated private data*/ free_netdev(dev); return -ENODEV; } diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c index b23b5c397b1..c95fd72c3bb 100644 --- a/drivers/net/pci-skeleton.c +++ b/drivers/net/pci-skeleton.c @@ -781,7 +781,7 @@ static int __devinit netdrv_init_one (struct pci_dev *pdev, dev->irq = pdev->irq; dev->base_addr = (unsigned long) ioaddr; - /* dev->priv/tp zeroed and aligned in alloc_etherdev */ + /* netdev_priv()/tp zeroed and aligned in alloc_etherdev */ tp = netdev_priv(dev); /* note: tp->chipset set in netdrv_init_board */ diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c index e8f97d5c9c2..e0d84772771 100644 --- a/drivers/net/sun3_82586.c +++ b/drivers/net/sun3_82586.c @@ -209,7 +209,7 @@ static int sun3_82586_open(struct net_device *dev) static int check586(struct net_device *dev,char *where,unsigned size) { struct priv pb; - struct priv *p = /* (struct priv *) dev->priv*/ &pb; + struct priv *p = &pb; char *iscp_addr; int i; diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c index 977b3e08bbf..7f69c7f176c 100644 --- a/drivers/net/sunbmac.c +++ b/drivers/net/sunbmac.c @@ -1233,7 +1233,7 @@ fail_and_cleanup: bp->bmac_block, bp->bblock_dvma); - /* This also frees the co-located 'dev->priv' */ + /* This also frees the co-located private data */ free_netdev(dev); return -ENODEV; } diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c index e7a944657cf..43853e3b210 100644 --- a/drivers/net/tokenring/3c359.c +++ b/drivers/net/tokenring/3c359.c @@ -296,8 +296,9 @@ static int __devinit xl_probe(struct pci_dev *pdev, } ; /* - * Allowing init_trdev to allocate the dev->priv structure will align xl_private - * on a 32 bytes boundary which we need for the rx/tx descriptors + * Allowing init_trdev to allocate the private data will align + * xl_private on a 32 bytes boundary which we need for the rx/tx + * descriptors */ dev = alloc_trdev(sizeof(struct xl_private)) ; diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index 93b74b7b707..8d405c83df8 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -191,12 +191,13 @@ IIId. Synchronization The driver runs as two independent, single-threaded flows of control. One is the send-packet routine, which enforces single-threaded use by the -dev->priv->lock spinlock. The other thread is the interrupt handler, which -is single threaded by the hardware and interrupt handling software. +netdev_priv(dev)->lock spinlock. The other thread is the interrupt handler, +which is single threaded by the hardware and interrupt handling software. The send packet thread has partial control over the Tx ring. It locks the -dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring -is not available it stops the transmit queue by calling netif_stop_queue. +netdev_priv(dev)->lock whenever it's queuing a Tx packet. If the next slot in +the ring is not available it stops the transmit queue by +calling netif_stop_queue. The interrupt handler has exclusive control over the Rx ring and records stats from the Tx ring. After reaping the stats, it marks the Tx queue entry as diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 692e6c5e009..dd0de3a9ed4 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -2494,7 +2494,7 @@ static void strip_dev_setup(struct net_device *dev) dev->type = ARPHRD_METRICOM; /* dtang */ dev->hard_header_len = sizeof(STRIP_Header); /* - * dev->priv Already holds a pointer to our struct strip + * netdev_priv(dev) Already holds a pointer to our struct strip */ *(MetricomAddress *) & dev->broadcast = broadcast_address; diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e960faac609..fd47a151665 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -43,7 +43,7 @@ struct hdlc_proto { }; -/* Pointed to by dev->priv */ +/* Pointed to by netdev_priv(dev) */ typedef struct hdlc_device { /* used by HDLC layer to take control over HDLC device from hw driver*/ int (*attach)(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0df0db068ac..47e73152831 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -785,7 +785,6 @@ struct net_device /* * One part is mostly used on xmit path (device) */ - void *priv; /* pointer to private data */ /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 12e9ea371db..039d5cc72c3 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -341,8 +341,8 @@ static const char *mpoa_device_type_string(char type) } /* - * lec device calls this via its dev->priv->lane2_ops->associate_indicator() - * when it sees a TLV in LE_ARP packet. + * lec device calls this via its netdev_priv(dev)->lane2_ops + * ->associate_indicator() when it sees a TLV in LE_ARP packet. * We fill in the pointer above when we see a LANE2 lec initializing * See LANE2 spec 3.1.5 * diff --git a/net/core/dev.c b/net/core/dev.c index 4615e9a443a..f54cac76438 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4378,12 +4378,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; - if (sizeof_priv) { - dev->priv = ((char *)dev + - ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST)); - } - dev->gso_max_size = GSO_MAX_SIZE; netdev_init_queues(dev); -- cgit v1.2.3-70-g09d2 From 1a881f27c50b4fbd6858a8696a189263621136b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:27:47 -0800 Subject: net: Add frag_list support to GSO This patch allows GSO to handle frag_list in a limited way for the purposes of allowing packets merged by GRO to be refragmented on output. Most hardware won't (and aren't expected to) support handling GRO frag_list packets directly. Therefore we will perform GSO in software for those cases. However, for drivers that can support it (such as virtual NICs) we may not have to segment the packets at all. Whether the added overhead of GRO/GSO is worthwhile for bridges and routers when weighed against the benefit of potentially increasing the MTU within the host is still an open question. However, for the case of host nodes this is undoubtedly a win. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b60c26b7d31..bdf5465deb9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,6 +1858,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || + (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST)) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } diff --git a/net/core/dev.c b/net/core/dev.c index f54cac76438..e415f0b0d0d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1533,8 +1533,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __be16 type = skb->protocol; int err; - BUG_ON(skb_shinfo(skb)->frag_list); - skb_reset_mac_header(skb); skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); -- cgit v1.2.3-70-g09d2 From d565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:38:52 -0800 Subject: net: Add Generic Receive Offload infrastructure This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 80 +++++++------------ include/linux/netpoll.h | 5 -- net/core/dev.c | 193 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 219 insertions(+), 59 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465deb9..58856b6737f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,8 +314,9 @@ struct napi_struct { spinlock_t poll_lock; int poll_owner; struct net_device *dev; - struct list_head dev_list; #endif + struct list_head dev_list; + struct sk_buff *gro_list; }; enum @@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) * * Mark NAPI processing as complete. */ -static inline void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -static inline void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} +extern void __napi_complete(struct napi_struct *n); +extern void napi_complete(struct napi_struct *n); /** * napi_disable - prevent NAPI from scheduling @@ -640,9 +627,7 @@ struct net_device unsigned long state; struct list_head dev_list; -#ifdef CONFIG_NETPOLL struct list_head napi_list; -#endif /* Net device features */ unsigned long features; @@ -661,6 +646,7 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ +#define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ @@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) * netif_napi_add() must be used to initialize a napi context prior to calling * *any* of the other napi related functions. */ -static inline void netif_napi_add(struct net_device *dev, - struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->poll = poll; - napi->weight = weight; -#ifdef CONFIG_NETPOLL - napi->dev = dev; - list_add(&napi->dev_list, &dev->napi_list); - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); /** * netif_napi_del - remove a napi context @@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, * * netif_napi_del() removes a napi context from the network device napi list */ -static inline void netif_napi_del(struct napi_struct *napi) -{ -#ifdef CONFIG_NETPOLL - list_del(&napi->dev_list); -#endif -} +void netif_napi_del(struct napi_struct *napi); + +struct napi_gro_cb { + /* This is non-zero if the packet may be of the same flow. */ + int same_flow; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + int flush; + + /* Number of segments aggregated. */ + int count; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) struct packet_type { __be16 type; /* This is really htons(ether_type). */ @@ -1024,6 +1004,9 @@ struct packet_type { struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; }; @@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); +extern int napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); @@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, static inline void netif_rx_complete(struct net_device *dev, struct napi_struct *napi) { - unsigned long flags; - - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) - return; - local_irq_save(flags); - __netif_rx_complete(dev, napi); - local_irq_restore(flags); + napi_complete(napi); } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d79593fb3..e38d3c9dccd 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) rcu_read_unlock(); } -static inline void netpoll_netdev_init(struct net_device *dev) -{ - INIT_LIST_HEAD(&dev->napi_list); -} - #else static inline int netpoll_rx(struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index e415f0b0d0d..d8d7d1fccde 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,9 @@ #include "net-sysfs.h" +/* Instead of increasing this, you should create a hash table. */ +#define MAX_GRO_SKBS 8 + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg) } } +static int napi_gro_complete(struct sk_buff *skb) +{ + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int err = -ENOENT; + + if (!skb_shinfo(skb)->frag_list) + goto out; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + if (ptype->type != type || ptype->dev || !ptype->gro_complete) + continue; + + err = ptype->gro_complete(skb); + break; + } + rcu_read_unlock(); + + if (err) { + WARN_ON(&ptype->list == head); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + +out: + __skb_push(skb, -skb_network_offset(skb)); + return netif_receive_skb(skb); +} + +void napi_gro_flush(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + napi_gro_complete(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(napi_gro_flush); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int count = 0; + int mac_len; + + if (!(skb->dev->features & NETIF_F_GRO)) + goto normal; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + struct sk_buff *p; + + if (ptype->type != type || ptype->dev || !ptype->gro_receive) + continue; + + skb_reset_network_header(skb); + mac_len = skb->network_header - skb->mac_header; + skb->mac_len = mac_len; + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 0; + + for (p = napi->gro_list; p; p = p->next) { + count++; + NAPI_GRO_CB(p)->same_flow = + p->mac_len == mac_len && + !memcmp(skb_mac_header(p), skb_mac_header(skb), + mac_len); + NAPI_GRO_CB(p)->flush = 0; + } + + pp = ptype->gro_receive(&napi->gro_list, skb); + break; + } + rcu_read_unlock(); + + if (&ptype->list == head) + goto normal; + + if (pp) { + struct sk_buff *nskb = *pp; + + *pp = nskb->next; + nskb->next = NULL; + napi_gro_complete(nskb); + count--; + } + + if (NAPI_GRO_CB(skb)->same_flow) + goto ok; + + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { + __skb_push(skb, -skb_network_offset(skb)); + goto normal; + } + + NAPI_GRO_CB(skb)->count = 1; + skb->next = napi->gro_list; + napi->gro_list = skb; + +ok: + return NET_RX_SUCCESS; + +normal: + return netif_receive_skb(skb); +} +EXPORT_SYMBOL(napi_gro_receive); + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota) } local_irq_enable(); - netif_receive_skb(skb); + napi_gro_receive(napi, skb); } while (++work < quota && jiffies == start_time); + napi_gro_flush(napi); + return work; } @@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +void __napi_complete(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + BUG_ON(n->gro_list); + + list_del(&n->poll_list); + smp_mb__before_clear_bit(); + clear_bit(NAPI_STATE_SCHED, &n->state); +} +EXPORT_SYMBOL(__napi_complete); + +void napi_complete(struct napi_struct *n) +{ + unsigned long flags; + + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) + return; + + napi_gro_flush(n); + local_irq_save(flags); + __napi_complete(n); + local_irq_restore(flags); +} +EXPORT_SYMBOL(napi_complete); + +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + INIT_LIST_HEAD(&napi->poll_list); + napi->gro_list = NULL; + napi->poll = poll; + napi->weight = weight; + list_add(&napi->dev_list, &dev->napi_list); +#ifdef CONFIG_NETPOLL + napi->dev = dev; + spin_lock_init(&napi->poll_lock); + napi->poll_owner = -1; +#endif + set_bit(NAPI_STATE_SCHED, &napi->state); +} +EXPORT_SYMBOL(netif_napi_add); + +void netif_napi_del(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + list_del(&napi->dev_list); + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + kfree_skb(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(netif_napi_del); + static void net_rx_action(struct softirq_action *h) { @@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - netpoll_netdev_init(dev); + INIT_LIST_HEAD(&dev->napi_list); setup(dev); strcpy(dev->name, name); return dev; @@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { + struct napi_struct *p, *n; + release_net(dev_net(dev)); kfree(dev->_tx); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) + netif_napi_del(p); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4949,6 +5137,7 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; + queue->backlog.gro_list = NULL; } dev_boot_phase = 0; -- cgit v1.2.3-70-g09d2 From 2d91d78b68606ff7ce52ea70e187dee7831aa2f6 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:47:29 -0800 Subject: Phonet: allocate a non-Ethernet ARP type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also leave some room for more 802.11 types. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ net/core/dev.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 4d3401812e6..11df77ab2db 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -87,6 +87,8 @@ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_PHONET 820 /* PhoNet media type */ + #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/core/dev.c b/net/core/dev.c index d8d7d1fccde..15aab0c46d6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -283,8 +283,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, - ARPHRD_NONE}; + ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, + ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -300,8 +300,8 @@ static const char *netdev_lock_name[] = "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", - "_xmit_NONE"}; + "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", + "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; -- cgit v1.2.3-70-g09d2 From 57c81fffc863fb4c1804bc963bcbfb82d736c6df Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:47:48 -0800 Subject: Phonet: allocate separate ARP type for GPRS over a Phonet pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate xmit lock class supports GPRS over a Phonet pipe over a TUN device (type ARPHRD_NONE). Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 1 + net/core/dev.c | 4 ++-- net/phonet/pep-gprs.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 11df77ab2db..5ff89809a58 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -88,6 +88,7 @@ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_PHONET 820 /* PhoNet media type */ +#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/core/dev.c b/net/core/dev.c index 15aab0c46d6..048cf119787 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -284,7 +284,7 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -301,7 +301,7 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_VOID", "_xmit_NONE"}; + "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 0b640b0fce0..a2873203dff 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -260,7 +260,7 @@ static int gprs_set_mtu(struct net_device *dev, int new_mtu) static void gprs_setup(struct net_device *dev) { dev->features = NETIF_F_FRAGLIST; - dev->type = ARPHRD_NONE; + dev->type = ARPHRD_PHONET_PIPE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = GPRS_DEFAULT_MTU; dev->hard_header_len = 0; -- cgit v1.2.3-70-g09d2 From 5f2f6da76c429c42d54f73807f00b8fd761a7d68 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 22 Dec 2008 19:35:28 -0800 Subject: net: Fix oops in dev_ifsioc() A command like this: "brctl addif br1 eth1" issued as a user gave me an oops when bridge module wasn't loaded. It's caused by using a dev pointer before checking for NULL. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 048cf119787..daca72e6b37 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3745,11 +3745,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; if (!dev) return -ENODEV; + ops = dev->netdev_ops; + switch (cmd) { case SIOCSIFFLAGS: /* Set interface flags */ return dev_change_flags(dev, ifr->ifr_flags); -- cgit v1.2.3-70-g09d2 From d7b06636be162d3f74c9ce5d6d0d9ea4e5d362c8 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 26 Dec 2008 01:35:35 -0800 Subject: net: Init NAPI dev_list on napi_del The recent GRO patches introduced the NAPI removal of devices in free_netdev. For drivers that can change the number of queues during driver operation, the NAPI infrastructure doesn't allow the freeing and re-addition of NAPI entities without reloading the driver. This change reinitializes the dev_list in each NAPI struct on delete, instead of just deleting it (and assigning the list pointers to POISON). Drivers that wish to remove/re-add NAPI will need to re-initialize the netdev napi_list after removing all NAPI instances, before re-adding NAPI devices again. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index daca72e6b37..536a8ac189c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2548,7 +2548,7 @@ void netif_napi_del(struct napi_struct *napi) { struct sk_buff *skb, *next; - list_del(&napi->dev_list); + list_del_init(&napi->dev_list); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; -- cgit v1.2.3-70-g09d2 From 0da2afd59653d2edf5c8e0f09b23f367ab5bc80f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Dec 2008 14:57:42 -0800 Subject: gro: Fix potential use after free The initial skb may have been freed after napi_gro_complete in napi_gro_receive if it was merged into an existing packet. Thus we cannot check same_flow (which indicates whether it was merged) after calling napi_gro_complete. This patch fixes this by saving the same_flow status before the call to napi_gro_complete. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 536a8ac189c..303e984ee6a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2390,6 +2390,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) __be16 type = skb->protocol; struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int count = 0; + int same_flow; int mac_len; if (!(skb->dev->features & NETIF_F_GRO)) @@ -2425,6 +2426,8 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (&ptype->list == head) goto normal; + same_flow = NAPI_GRO_CB(skb)->same_flow; + if (pp) { struct sk_buff *nskb = *pp; @@ -2434,7 +2437,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) count--; } - if (NAPI_GRO_CB(skb)->same_flow) + if (same_flow) goto ok; if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { -- cgit v1.2.3-70-g09d2