From 95afae481414cbdb0567bf82d5e5077c3ac9da20 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 8 Sep 2014 17:30:41 +0100 Subject: xen: remove DEFINE_XENBUS_DRIVER() macro The DEFINE_XENBUS_DRIVER() macro looks a bit weird and causes sparse errors. Replace the uses with standard structure definitions instead. This is similar to pci and usb device registration. Signed-off-by: David Vrabel --- drivers/net/xen-netfront.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ca82f545ec2..fa671442f42 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2300,12 +2300,6 @@ static void xennet_sysfs_delif(struct net_device *netdev) #endif /* CONFIG_SYSFS */ -static const struct xenbus_device_id netfront_ids[] = { - { "vif" }, - { "" } -}; - - static int xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); @@ -2338,12 +2332,18 @@ static int xennet_remove(struct xenbus_device *dev) return 0; } -static DEFINE_XENBUS_DRIVER(netfront, , +static const struct xenbus_device_id netfront_ids[] = { + { "vif" }, + { "" } +}; + +static struct xenbus_driver netfront_driver = { + .ids = netfront_ids, .probe = netfront_probe, .remove = xennet_remove, .resume = netfront_resume, .otherend_changed = netback_changed, -); +}; static int __init netif_init(void) { -- cgit v1.2.3-70-g09d2 From 04ffcb255f22a2a988ce7393e6e72f6eb3fcb7aa Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 14 Oct 2014 15:19:06 -0700 Subject: net: Add ndo_gso_check Add ndo_gso_check which a device can define to indicate whether is is capable of doing GSO on a packet. This funciton would be called from the stack to determine whether software GSO is needed to be done. A driver should populate this function if it advertises GSO types for which there are combinations that it wouldn't be able to handle. For instance a device that performs UDP tunneling might only implement support for transparent Ethernet bridging type of inner packets or might have limitations on lengths of inner headers. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- drivers/net/xen-netfront.c | 2 +- include/linux/netdevice.h | 12 +++++++++++- net/core/dev.c | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0c6adaaf898..65e2892342b 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -298,7 +298,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) */ if (q->flags & IFF_VNET_HDR) features |= vlan->tap_features; - if (netif_needs_gso(skb, features)) { + if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); if (IS_ERR(segs)) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ca82f545ec2..3c0b37574d0 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -638,7 +638,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!netif_carrier_ok(dev) || (slots > 1 && !xennet_can_sg(dev)) || - netif_needs_gso(skb, netif_skb_features(skb)))) { + netif_needs_gso(dev, skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&queue->tx_lock, flags); goto drop; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 838407aea70..74fd5d37f15 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -998,6 +998,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. + * bool (*ndo_gso_check) (struct sk_buff *skb, + * struct net_device *dev); + * Called by core transmit path to determine if device is capable of + * performing GSO on a packet. The device returns true if it is + * able to GSO the packet, false otherwise. If the return value is + * false the stack will do software GSO. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1147,6 +1153,8 @@ struct net_device_ops { struct net_device *dev, void *priv); int (*ndo_get_lock_subclass)(struct net_device *dev); + bool (*ndo_gso_check) (struct sk_buff *skb, + struct net_device *dev); }; /** @@ -3572,10 +3580,12 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline bool netif_needs_gso(struct sk_buff *skb, +static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || + (dev->netdev_ops->ndo_gso_check && + !dev->netdev_ops->ndo_gso_check(skb, dev)) || unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && (skb->ip_summed != CHECKSUM_UNNECESSARY))); } diff --git a/net/core/dev.c b/net/core/dev.c index 4699dcfdc4a..9f77a78c6b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2675,7 +2675,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (skb->encapsulation) features &= dev->hw_enc_features; - if (netif_needs_gso(skb, features)) { + if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); -- cgit v1.2.3-70-g09d2 From 1f3c2eba1e2d866ef99bb9b10ade4096e3d7607c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 22 Oct 2014 11:17:06 +0100 Subject: xen-netfront: always keep the Rx ring full of requests A full Rx ring only requires 1 MiB of memory. This is not enough memory that it is useful to dynamically scale the number of Rx requests in the ring based on traffic rates, because: a) Even the full 1 MiB is a tiny fraction of a typically modern Linux VM (for example, the AWS micro instance still has 1 GiB of memory). b) Netfront would have used up to 1 MiB already even with moderate data rates (there was no adjustment of target based on memory pressure). c) Small VMs are going to typically have one VCPU and hence only one queue. Keeping the ring full of Rx requests handles bursty traffic better than trying to converge on an optimal number of requests to keep filled. On a 4 core host, an iperf -P 64 -t 60 run from dom0 to a 4 VCPU guest improved from 5.1 Gbit/s to 5.6 Gbit/s. Gains with more bursty traffic are expected to be higher. Signed-off-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 253 +++++++++++---------------------------------- 1 file changed, 62 insertions(+), 191 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index cca871346a0..88a70f5ed59 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -77,7 +77,9 @@ struct netfront_cb { #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) -#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256) + +/* Minimum number of Rx slots (includes slot for GSO metadata). */ +#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) /* Queue name is interface name with "-qNNN" appended */ #define QUEUE_NAME_SIZE (IFNAMSIZ + 6) @@ -137,13 +139,6 @@ struct netfront_queue { struct xen_netif_rx_front_ring rx; int rx_ring_ref; - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 8 -#define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; - struct timer_list rx_refill_timer; struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; @@ -251,7 +246,7 @@ static void rx_refill_timeout(unsigned long data) static int netfront_tx_slot_available(struct netfront_queue *queue) { return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) < - (TX_MAX_TARGET - MAX_SKB_FRAGS - 2); + (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2); } static void xennet_maybe_wake_tx(struct netfront_queue *queue) @@ -265,77 +260,55 @@ static void xennet_maybe_wake_tx(struct netfront_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id)); } -static void xennet_alloc_rx_buffers(struct netfront_queue *queue) + +static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) { - unsigned short id; struct sk_buff *skb; struct page *page; - int i, batch_target, notify; - RING_IDX req_prod = queue->rx.req_prod_pvt; - grant_ref_t ref; - unsigned long pfn; - void *vaddr; - struct xen_netif_rx_request *req; - if (unlikely(!netif_carrier_ok(queue->info->netdev))) - return; + skb = __netdev_alloc_skb(queue->info->netdev, + RX_COPY_THRESHOLD + NET_IP_ALIGN, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; - /* - * Allocate skbuffs greedily, even though we batch updates to the - * receive ring. This creates a less bursty demand on the memory - * allocator, so should reduce the chance of failed allocation requests - * both for ourself and for other kernel subsystems. - */ - batch_target = queue->rx_target - (req_prod - queue->rx.rsp_cons); - for (i = skb_queue_len(&queue->rx_batch); i < batch_target; i++) { - skb = __netdev_alloc_skb(queue->info->netdev, - RX_COPY_THRESHOLD + NET_IP_ALIGN, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) - goto no_skb; - - /* Align ip header to a 16 bytes boundary */ - skb_reserve(skb, NET_IP_ALIGN); - - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); - if (!page) { - kfree_skb(skb); -no_skb: - /* Could not allocate any skbuffs. Try again later. */ - mod_timer(&queue->rx_refill_timer, - jiffies + (HZ/10)); - - /* Any skbuffs queued for refill? Force them out. */ - if (i != 0) - goto refill; - break; - } - - skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); - __skb_queue_tail(&queue->rx_batch, skb); + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) { + kfree_skb(skb); + return NULL; } + skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + + /* Align ip header to a 16 bytes boundary */ + skb_reserve(skb, NET_IP_ALIGN); + skb->dev = queue->info->netdev; + + return skb; +} + - /* Is the batch large enough to be worthwhile? */ - if (i < (queue->rx_target/2)) { - if (req_prod > queue->rx.sring->req_prod) - goto push; +static void xennet_alloc_rx_buffers(struct netfront_queue *queue) +{ + RING_IDX req_prod = queue->rx.req_prod_pvt; + int notify; + + if (unlikely(!netif_carrier_ok(queue->info->netdev))) return; - } - /* Adjust our fill target if we risked running out of buffers. */ - if (((req_prod - queue->rx.sring->rsp_prod) < (queue->rx_target / 4)) && - ((queue->rx_target *= 2) > queue->rx_max_target)) - queue->rx_target = queue->rx_max_target; + for (req_prod = queue->rx.req_prod_pvt; + req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE; + req_prod++) { + struct sk_buff *skb; + unsigned short id; + grant_ref_t ref; + unsigned long pfn; + struct xen_netif_rx_request *req; - refill: - for (i = 0; ; i++) { - skb = __skb_dequeue(&queue->rx_batch); - if (skb == NULL) + skb = xennet_alloc_one_rx_buffer(queue); + if (!skb) break; - skb->dev = queue->info->netdev; - - id = xennet_rxidx(req_prod + i); + id = xennet_rxidx(req_prod); BUG_ON(queue->rx_skbs[id]); queue->rx_skbs[id] = skb; @@ -345,9 +318,8 @@ no_skb: queue->grant_rx_ref[id] = ref; pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0])); - vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0])); - req = RING_GET_REQUEST(&queue->rx, req_prod + i); + req = RING_GET_REQUEST(&queue->rx, req_prod); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, pfn_to_mfn(pfn), @@ -357,11 +329,16 @@ no_skb: req->gref = ref; } + queue->rx.req_prod_pvt = req_prod; + + /* Not enough requests? Try again later. */ + if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) { + mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); + return; + } + wmb(); /* barrier so backend seens requests */ - /* Above is a suitable barrier to ensure backend will see requests. */ - queue->rx.req_prod_pvt = req_prod + i; - push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify); if (notify) notify_remote_via_irq(queue->rx_irq); @@ -1070,13 +1047,6 @@ err: work_done -= handle_incoming_queue(queue, &rxq); - /* If we get a callback with very few responses, reduce fill target. */ - /* NB. Note exponential increase, linear decrease. */ - if (((queue->rx.req_prod_pvt - queue->rx.sring->rsp_prod) > - ((3*queue->rx_target) / 4)) && - (--queue->rx_target < queue->rx_min_target)) - queue->rx_target = queue->rx_min_target; - xennet_alloc_rx_buffers(queue); if (work_done < budget) { @@ -1643,11 +1613,6 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); - skb_queue_head_init(&queue->rx_batch); - queue->rx_target = RX_DFL_MIN_TARGET; - queue->rx_min_target = RX_DFL_MIN_TARGET; - queue->rx_max_target = RX_MAX_TARGET; - init_timer(&queue->rx_refill_timer); queue->rx_refill_timer.data = (unsigned long)queue; queue->rx_refill_timer.function = rx_refill_timeout; @@ -1670,7 +1635,7 @@ static int xennet_init_queue(struct netfront_queue *queue) } /* A grant for every tx ring slot */ - if (gnttab_alloc_grant_references(TX_MAX_TARGET, + if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, &queue->gref_tx_head) < 0) { pr_alert("can't alloc tx grant refs\n"); err = -ENOMEM; @@ -1678,7 +1643,7 @@ static int xennet_init_queue(struct netfront_queue *queue) } /* A grant for every rx ring slot */ - if (gnttab_alloc_grant_references(RX_MAX_TARGET, + if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, &queue->gref_rx_head) < 0) { pr_alert("can't alloc rx grant refs\n"); err = -ENOMEM; @@ -2146,83 +2111,18 @@ static const struct ethtool_ops xennet_ethtool_ops = }; #ifdef CONFIG_SYSFS -static ssize_t show_rxbuf_min(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct net_device *netdev = to_net_dev(dev); - struct netfront_info *info = netdev_priv(netdev); - unsigned int num_queues = netdev->real_num_tx_queues; - - if (num_queues) - return sprintf(buf, "%u\n", info->queues[0].rx_min_target); - else - return sprintf(buf, "%u\n", RX_MIN_TARGET); -} - -static ssize_t store_rxbuf_min(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t show_rxbuf(struct device *dev, + struct device_attribute *attr, char *buf) { - struct net_device *netdev = to_net_dev(dev); - struct netfront_info *np = netdev_priv(netdev); - unsigned int num_queues = netdev->real_num_tx_queues; - char *endp; - unsigned long target; - unsigned int i; - struct netfront_queue *queue; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - target = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EBADMSG; - - if (target < RX_MIN_TARGET) - target = RX_MIN_TARGET; - if (target > RX_MAX_TARGET) - target = RX_MAX_TARGET; - - for (i = 0; i < num_queues; ++i) { - queue = &np->queues[i]; - spin_lock_bh(&queue->rx_lock); - if (target > queue->rx_max_target) - queue->rx_max_target = target; - queue->rx_min_target = target; - if (target > queue->rx_target) - queue->rx_target = target; - - xennet_alloc_rx_buffers(queue); - - spin_unlock_bh(&queue->rx_lock); - } - return len; -} - -static ssize_t show_rxbuf_max(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct net_device *netdev = to_net_dev(dev); - struct netfront_info *info = netdev_priv(netdev); - unsigned int num_queues = netdev->real_num_tx_queues; - - if (num_queues) - return sprintf(buf, "%u\n", info->queues[0].rx_max_target); - else - return sprintf(buf, "%u\n", RX_MAX_TARGET); + return sprintf(buf, "%lu\n", NET_RX_RING_SIZE); } -static ssize_t store_rxbuf_max(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t store_rxbuf(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) { - struct net_device *netdev = to_net_dev(dev); - struct netfront_info *np = netdev_priv(netdev); - unsigned int num_queues = netdev->real_num_tx_queues; char *endp; unsigned long target; - unsigned int i = 0; - struct netfront_queue *queue = NULL; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2231,44 +2131,15 @@ static ssize_t store_rxbuf_max(struct device *dev, if (endp == buf) return -EBADMSG; - if (target < RX_MIN_TARGET) - target = RX_MIN_TARGET; - if (target > RX_MAX_TARGET) - target = RX_MAX_TARGET; - - for (i = 0; i < num_queues; ++i) { - queue = &np->queues[i]; - spin_lock_bh(&queue->rx_lock); - if (target < queue->rx_min_target) - queue->rx_min_target = target; - queue->rx_max_target = target; - if (target < queue->rx_target) - queue->rx_target = target; - - xennet_alloc_rx_buffers(queue); + /* rxbuf_min and rxbuf_max are no longer configurable. */ - spin_unlock_bh(&queue->rx_lock); - } return len; } -static ssize_t show_rxbuf_cur(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct net_device *netdev = to_net_dev(dev); - struct netfront_info *info = netdev_priv(netdev); - unsigned int num_queues = netdev->real_num_tx_queues; - - if (num_queues) - return sprintf(buf, "%u\n", info->queues[0].rx_target); - else - return sprintf(buf, "0\n"); -} - static struct device_attribute xennet_attrs[] = { - __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), - __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), - __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), + __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf), + __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf), + __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL), }; static int xennet_sysfs_addif(struct net_device *netdev) -- cgit v1.2.3-70-g09d2 From 8d609725d4357f499e2103e46011308b32f53513 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 25 Nov 2014 20:28:24 -0600 Subject: xen-netfront: Remove BUGs on paged skb data which crosses a page boundary These BUGs can be erroneously triggered by frags which refer to tail pages within a compound page. The data in these pages may overrun the hardware page while still being contained within the compound page, but since compound_order() evaluates to 0 for tail pages the assertion fails. The code already iterates through subsequent pages correctly in this scenario, so the BUGs are unnecessary and can be removed. Fixes: f36c374782e4 ("xen/netfront: handle compound page fragments on transmit") Cc: # 3.7+ Signed-off-by: Seth Forshee Reviewed-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index cca871346a0..ece8d1804d1 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -496,9 +496,6 @@ static void xennet_make_frags(struct sk_buff *skb, struct netfront_queue *queue, len = skb_frag_size(frag); offset = frag->page_offset; - /* Data must not cross a page boundary. */ - BUG_ON(len + offset > PAGE_SIZE<> PAGE_SHIFT; offset &= ~PAGE_MASK; @@ -506,8 +503,6 @@ static void xennet_make_frags(struct sk_buff *skb, struct netfront_queue *queue, while (len > 0) { unsigned long bytes; - BUG_ON(offset >= PAGE_SIZE); - bytes = PAGE_SIZE - offset; if (bytes > len) bytes = len; -- cgit v1.2.3-70-g09d2 From 11d3d2a16cc1f05c6ece69a4392e99efb85666a6 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 9 Dec 2014 18:43:28 +0000 Subject: xen-netfront: use correct linear area after linearizing an skb Commit 97a6d1bb2b658ac85ed88205ccd1ab809899884d (xen-netfront: Fix handling packets on compound pages with skb_linearize) attempted to fix a problem where an skb that would have required too many slots would be dropped causing TCP connections to stall. However, it filled in the first slot using the original buffer and not the new one and would use the wrong offset and grant access to the wrong page. Netback would notice the malformed request and stop all traffic on the VIF, reporting: vif vif-3-0 vif3.0: txreq.offset: 85e, size: 4002, end: 6144 vif vif-3-0 vif3.0: fatal error; disabling device Reported-by: Anthony Wright Tested-by: Anthony Wright Signed-off-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ece8d1804d1..eeed0ce620f 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -627,6 +627,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) slots, skb->len); if (skb_linearize(skb)) goto drop; + data = skb->data; + offset = offset_in_page(data); + len = skb_headlen(skb); } spin_lock_irqsave(&queue->tx_lock, flags); -- cgit v1.2.3-70-g09d2 From 6a6dc08ff6395f58be3ee568cb970ea956f16819 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 16 Dec 2014 18:59:46 +0000 Subject: xen-netfront: use napi_complete() correctly to prevent Rx stalling After d75b1ade567ffab085e8adbbdacf0092d10cd09c (net: less interrupt masking in NAPI) the napi instance is removed from the per-cpu list prior to calling the n->poll(), and is only requeued if all of the budget was used. This inadvertently broke netfront because netfront does not use NAPI correctly. If netfront had not used all of its budget it would do a final check for any Rx responses and avoid calling napi_complete() if there were more responses. It would still return under budget so it would never be rescheduled. The final check would also not re-enable the Rx interrupt. Additionally, xenvif_poll() would also call napi_complete() /after/ enabling the interrupt. This resulted in a race between the napi_complete() and the napi_schedule() in the interrupt handler. The use of local_irq_save/restore() avoided by race iff the handler is running on the same CPU but not if it was running on a different CPU. Fix both of these by always calling napi_compete() if the budget was not all used, and then calling napi_schedule() if the final checks says there's more work. Signed-off-by: David Vrabel Cc: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 2f0a9ce9ff7..22bcb4e12e2 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -977,7 +977,6 @@ static int xennet_poll(struct napi_struct *napi, int budget) struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; - unsigned long flags; int err; spin_lock(&queue->rx_lock); @@ -1050,15 +1049,11 @@ err: if (work_done < budget) { int more_to_do = 0; - napi_gro_flush(napi, false); - - local_irq_save(flags); + napi_complete(napi); RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do); - if (!more_to_do) - __napi_complete(napi); - - local_irq_restore(flags); + if (more_to_do) + napi_schedule(napi); } spin_unlock(&queue->rx_lock); -- cgit v1.2.3-70-g09d2