From c2355e1ab910278a94d487b78590ee3c8eecd08a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 13 Oct 2010 16:01:49 +0000 Subject: bonding: Fix bonding drivers improper modification of netpoll structure The bonding driver currently modifies the netpoll structure in its xmit path while sending frames from netpoll. This is racy, as other cpus can access the netpoll structure in parallel. Since the bonding driver points np->dev to a slave device, other cpus can inadvertently attempt to send data directly to slave devices, leading to improper locking with the bonding master, lost frames, and deadlocks. This patch fixes that up. This patch also removes the real_dev pointer from the netpoll structure as that data is really only used by bonding in the poll_controller, and we can emulate its behavior by check each slave for IS_UP. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 537e01afd81..4e98ffac3af 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -288,11 +288,11 @@ static int netpoll_owner_active(struct net_device *dev) return 0; } -void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, + struct net_device *dev) { int status = NETDEV_TX_BUSY; unsigned long tries; - struct net_device *dev = np->dev; const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo = np->dev->npinfo; @@ -346,7 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) schedule_delayed_work(&npinfo->tx_work,0); } } -EXPORT_SYMBOL(netpoll_send_skb); +EXPORT_SYMBOL(netpoll_send_skb_on_dev); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { -- cgit v1.2.3-70-g09d2 From 990c3d6f9c4115347659fc2b163907c8c832ae44 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 13 Oct 2010 16:01:51 +0000 Subject: bonding: Fix napi poll for bonding driver Usually the netpoll path, when preforming a napi poll can get away with just polling all the napi instances of the configured device. Thats not the case for the bonding driver however, as the napi instances which may wind up getting flagged as needing polling after the poll_controller call don't belong to the bonded device, but rather to the slave devices. Fix this by checking the device in question for the IFF_MASTER flag, if set, we know we need to check the full poll list for this cpu, rather than just the devices napi instance list. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4e98ffac3af..d79d221fd1f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -156,8 +156,15 @@ static void poll_napi(struct net_device *dev) { struct napi_struct *napi; int budget = 16; + struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct list_head *nlist; - list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (dev->flags & IFF_MASTER) + nlist = &sd->poll_list; + else + nlist = &dev->napi_list; + + list_for_each_entry(napi, nlist, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { budget = poll_one_napi(dev->npinfo, napi, budget); -- cgit v1.2.3-70-g09d2 From f13d493d9cf772d510d78ae00bb9f4d680b3170b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 19 Oct 2010 07:04:26 +0000 Subject: netpoll: Revert napi_poll fix for bonding driver In an erlier patch I modified napi_poll so that devices with IFF_MASTER polled the per_cpu list instead of the device list for napi. I did this because the bonding driver has no napi instances to poll, it instead expects to check the slave devices napi instances, which napi_poll was unaware of. Looking at this more closely however, I now see this isn't strictly needed. As the bond driver poll_controller calls the slaves poll_controller via netpoll_poll_dev, which recursively calls poll_napi on each slave, allowing those napi instances to get serviced. The earlier patch isn't at all harmfull, its just not needed, so lets revert it to make the code cleaner. Sorry for the noise, Signed-off-by: Neil Horman Reviewed-by: WANG Cong Signed-off-by: David S. Miller --- net/core/netpoll.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d79d221fd1f..4e98ffac3af 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -156,15 +156,8 @@ static void poll_napi(struct net_device *dev) { struct napi_struct *napi; int budget = 16; - struct softnet_data *sd = &__get_cpu_var(softnet_data); - struct list_head *nlist; - if (dev->flags & IFF_MASTER) - nlist = &sd->poll_list; - else - nlist = &dev->napi_list; - - list_for_each_entry(napi, nlist, dev_list) { + list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { budget = poll_one_napi(dev->npinfo, napi, budget); -- cgit v1.2.3-70-g09d2