diff options
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
-rw-r--r-- | drivers/net/xen-netback/interface.c | 145 |
1 files changed, 136 insertions, 9 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index b9de31ea7fc..ef05c5c49d4 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -38,6 +38,7 @@ #include <xen/events.h> #include <asm/xen/hypercall.h> +#include <xen/balloon.h> #define XENVIF_QUEUE_LENGTH 32 #define XENVIF_NAPI_WEIGHT 64 @@ -62,6 +63,15 @@ static int xenvif_poll(struct napi_struct *napi, int budget) struct xenvif *vif = container_of(napi, struct xenvif, napi); int work_done; + /* This vif is rogue, we pretend we've there is nothing to do + * for this vif to deschedule it from NAPI. But this interface + * will be turned off in thread context later. + */ + if (unlikely(vif->disabled)) { + napi_complete(napi); + return 0; + } + work_done = xenvif_tx_action(vif, budget); if (work_done < budget) { @@ -100,7 +110,6 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif *vif = dev_id; - vif->rx_event = true; xenvif_kick_thread(vif); return IRQ_HANDLED; @@ -114,6 +123,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void xenvif_wake_queue(unsigned long data) +{ + struct xenvif *vif = (struct xenvif *)data; + + if (netif_queue_stopped(vif->dev)) { + netdev_err(vif->dev, "draining TX queue\n"); + vif->rx_queue_purge = true; + xenvif_kick_thread(vif); + netif_wake_queue(vif->dev); + } +} + static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); @@ -122,7 +143,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) BUG_ON(skb->dev != dev); /* Drop the packet if vif is not ready */ - if (vif->task == NULL || !xenvif_schedulable(vif)) + if (vif->task == NULL || + vif->dealloc_task == NULL || + !xenvif_schedulable(vif)) goto drop; /* At best we'll need one slot for the header and one for each @@ -133,16 +156,20 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) /* If the skb is GSO then we'll also need an extra slot for the * metadata. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb)) min_slots_needed++; /* If the skb can't possibly fit in the remaining slots * then turn off the queue to give the ring a chance to * drain. */ - if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) + if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { + vif->wake_queue.function = xenvif_wake_queue; + vif->wake_queue.data = (unsigned long)vif; xenvif_stop_queue(vif); + mod_timer(&vif->wake_queue, + jiffies + rx_drain_timeout_jiffies); + } skb_queue_tail(&vif->rx_queue, skb); xenvif_kick_thread(vif); @@ -235,6 +262,28 @@ static const struct xenvif_stat { "rx_gso_checksum_fixup", offsetof(struct xenvif, rx_gso_checksum_fixup) }, + /* If (sent != success + fail), there are probably packets never + * freed up properly! + */ + { + "tx_zerocopy_sent", + offsetof(struct xenvif, tx_zerocopy_sent), + }, + { + "tx_zerocopy_success", + offsetof(struct xenvif, tx_zerocopy_success), + }, + { + "tx_zerocopy_fail", + offsetof(struct xenvif, tx_zerocopy_fail) + }, + /* Number of packets exceeding MAX_SKB_FRAG slots. You should use + * a guest with the same MAX_SKB_FRAG + */ + { + "tx_frag_overflow", + offsetof(struct xenvif, tx_frag_overflow) + }, }; static int xenvif_get_sset_count(struct net_device *dev, int string_set) @@ -323,11 +372,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->ip_csum = 1; vif->dev = dev; + vif->disabled = false; + vif->credit_bytes = vif->remaining_credit = ~0UL; vif->credit_usec = 0UL; init_timer(&vif->credit_timeout); vif->credit_window_start = get_jiffies_64(); + init_timer(&vif->wake_queue); + dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -344,8 +397,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->pending_prod = MAX_PENDING_REQS; for (i = 0; i < MAX_PENDING_REQS; i++) vif->pending_ring[i] = i; - for (i = 0; i < MAX_PENDING_REQS; i++) - vif->mmap_pages[i] = NULL; + spin_lock_init(&vif->callback_lock); + spin_lock_init(&vif->response_lock); + /* If ballooning is disabled, this will consume real memory, so you + * better enable it. The long term solution would be to use just a + * bunch of valid page descriptors, without dependency on ballooning + */ + err = alloc_xenballooned_pages(MAX_PENDING_REQS, + vif->mmap_pages, + false); + if (err) { + netdev_err(dev, "Could not reserve mmap_pages\n"); + return ERR_PTR(-ENOMEM); + } + for (i = 0; i < MAX_PENDING_REQS; i++) { + vif->pending_tx_info[i].callback_struct = (struct ubuf_info) + { .callback = xenvif_zerocopy_callback, + .ctx = NULL, + .desc = i }; + vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; + } /* * Initialise a dummy MAC address. We choose the numerically @@ -383,12 +454,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, BUG_ON(vif->tx_irq); BUG_ON(vif->task); + BUG_ON(vif->dealloc_task); err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); if (err < 0) goto err; init_waitqueue_head(&vif->wq); + init_waitqueue_head(&vif->dealloc_wq); if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ @@ -422,8 +495,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, disable_irq(vif->rx_irq); } - task = kthread_create(xenvif_kthread, - (void *)vif, "%s", vif->dev->name); + task = kthread_create(xenvif_kthread_guest_rx, + (void *)vif, "%s-guest-rx", vif->dev->name); if (IS_ERR(task)) { pr_warn("Could not allocate kthread for %s\n", vif->dev->name); err = PTR_ERR(task); @@ -432,6 +505,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, vif->task = task; + task = kthread_create(xenvif_dealloc_kthread, + (void *)vif, "%s-dealloc", vif->dev->name); + if (IS_ERR(task)) { + pr_warn("Could not allocate kthread for %s\n", vif->dev->name); + err = PTR_ERR(task); + goto err_rx_unbind; + } + + vif->dealloc_task = task; + rtnl_lock(); if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) dev_set_mtu(vif->dev, ETH_DATA_LEN); @@ -442,6 +525,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, rtnl_unlock(); wake_up_process(vif->task); + wake_up_process(vif->dealloc_task); return 0; @@ -475,10 +559,16 @@ void xenvif_disconnect(struct xenvif *vif) xenvif_carrier_off(vif); if (vif->task) { + del_timer_sync(&vif->wake_queue); kthread_stop(vif->task); vif->task = NULL; } + if (vif->dealloc_task) { + kthread_stop(vif->dealloc_task); + vif->dealloc_task = NULL; + } + if (vif->tx_irq) { if (vif->tx_irq == vif->rx_irq) unbind_from_irqhandler(vif->tx_irq, vif); @@ -494,6 +584,43 @@ void xenvif_disconnect(struct xenvif *vif) void xenvif_free(struct xenvif *vif) { + int i, unmap_timeout = 0; + /* Here we want to avoid timeout messages if an skb can be legitimately + * stuck somewhere else. Realistically this could be an another vif's + * internal or QDisc queue. That another vif also has this + * rx_drain_timeout_msecs timeout, but the timer only ditches the + * internal queue. After that, the QDisc queue can put in worst case + * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's + * internal queue, so we need several rounds of such timeouts until we + * can be sure that no another vif should have skb's from us. We are + * not sending more skb's, so newly stuck packets are not interesting + * for us here. + */ + unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * + DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); + + for (i = 0; i < MAX_PENDING_REQS; ++i) { + if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { + unmap_timeout++; + schedule_timeout(msecs_to_jiffies(1000)); + if (unmap_timeout > worst_case_skb_lifetime && + net_ratelimit()) + netdev_err(vif->dev, + "Page still granted! Index: %x\n", + i); + /* If there are still unmapped pages, reset the loop to + * start checking again. We shouldn't exit here until + * dealloc thread and NAPI instance release all the + * pages. If a kernel bug causes the skbs to stall + * somewhere, the interface cannot be brought down + * properly. + */ + i = -1; + } + } + + free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages); + netif_napi_del(&vif->napi); unregister_netdev(vif->dev); |