summaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r--drivers/net/xen-netback/common.h36
-rw-r--r--drivers/net/xen-netback/interface.c163
-rw-r--r--drivers/net/xen-netback/netback.c228
-rw-r--r--drivers/net/xen-netback/xenbus.c209
4 files changed, 477 insertions, 159 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 4dd7c4a1923..d4eb8d2e9cb 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -44,6 +44,7 @@
#include <xen/interface/grant_table.h>
#include <xen/grant_table.h>
#include <xen/xenbus.h>
+#include <linux/debugfs.h>
typedef unsigned int pending_ring_idx_t;
#define INVALID_PENDING_RING_IDX (~0U)
@@ -164,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
u16 dealloc_ring[MAX_PENDING_REQS];
struct task_struct *dealloc_task;
wait_queue_head_t dealloc_wq;
+ atomic_t inflight_packets;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -175,9 +177,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
RING_IDX rx_last_skb_slots;
- bool rx_queue_purge;
+ unsigned long status;
- struct timer_list wake_queue;
+ struct timer_list rx_stalled;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
@@ -197,6 +199,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xenvif_stats stats;
};
+enum state_bit_shift {
+ /* This bit marks that the vif is connected */
+ VIF_STATUS_CONNECTED,
+ /* This bit signals the RX thread that queuing was stopped (in
+ * start_xmit), and either the timer fired or an RX interrupt came
+ */
+ QUEUE_STATUS_RX_PURGE_EVENT,
+ /* This bit tells the interrupt handler that this queue was the reason
+ * for the carrier off, so it should kick the thread. Only queues which
+ * brought it down can turn on the carrier.
+ */
+ QUEUE_STATUS_RX_STALLED
+};
+
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
@@ -219,9 +235,15 @@ struct xenvif {
* frontend is rogue.
*/
bool disabled;
+ unsigned long status;
/* Queues */
struct xenvif_queue *queues;
+ unsigned int num_queues; /* active queues, resource allocated */
+
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *xenvif_dbg_root;
+#endif
/* Miscellaneous private stuff. */
struct net_device *dev;
@@ -296,10 +318,20 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
/* Callback from stack when TX packet can be released */
void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+irqreturn_t xenvif_interrupt(int irq, void *dev_id);
+
extern bool separate_tx_rx_irq;
extern unsigned int rx_drain_timeout_msecs;
extern unsigned int rx_drain_timeout_jiffies;
extern unsigned int xenvif_max_queues;
+#ifdef CONFIG_DEBUG_FS
+extern struct dentry *xen_netback_dbg_root;
+#endif
+
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 852da34b896..e29e15dca86 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,23 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb)
+{
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+ atomic_dec(&queue->inflight_packets);
+}
+
static inline void xenvif_stop_queue(struct xenvif_queue *queue)
{
struct net_device *dev = queue->vif->dev;
@@ -55,7 +72,8 @@ static inline void xenvif_stop_queue(struct xenvif_queue *queue)
int xenvif_schedulable(struct xenvif *vif)
{
- return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
+ return netif_running(vif->dev) &&
+ test_bit(VIF_STATUS_CONNECTED, &vif->status);
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
@@ -96,13 +114,22 @@ int xenvif_poll(struct napi_struct *napi, int budget)
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ struct netdev_queue *net_queue =
+ netdev_get_tx_queue(queue->vif->dev, queue->id);
+ /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
+ * the carrier went down and this queue was previously blocked
+ */
+ if (unlikely(netif_tx_queue_stopped(net_queue) ||
+ (!netif_carrier_ok(queue->vif->dev) &&
+ test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
+ set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
return IRQ_HANDLED;
}
-static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
xenvif_tx_interrupt(irq, dev_id);
xenvif_rx_interrupt(irq, dev_id);
@@ -124,45 +151,22 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
}
-/* Callback to wake the queue and drain it on timeout */
-static void xenvif_wake_queue_callback(unsigned long data)
+/* Callback to wake the queue's thread and turn the carrier off on timeout */
+static void xenvif_rx_stalled(unsigned long data)
{
struct xenvif_queue *queue = (struct xenvif_queue *)data;
if (xenvif_queue_stopped(queue)) {
- netdev_err(queue->vif->dev, "draining TX queue\n");
- queue->rx_queue_purge = true;
+ set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
- xenvif_wake_queue(queue);
}
}
-static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
- void *accel_priv, select_queue_fallback_t fallback)
-{
- unsigned int num_queues = dev->real_num_tx_queues;
- u32 hash;
- u16 queue_index;
-
- /* First, check if there is only one queue to optimise the
- * single-queue or old frontend scenario.
- */
- if (num_queues == 1) {
- queue_index = 0;
- } else {
- /* Use skb_get_hash to obtain an L4 hash if available */
- hash = skb_get_hash(skb);
- queue_index = hash % num_queues;
- }
-
- return queue_index;
-}
-
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
u16 index;
int min_slots_needed;
@@ -203,11 +207,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
* drain.
*/
if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
- queue->wake_queue.function = xenvif_wake_queue_callback;
- queue->wake_queue.data = (unsigned long)queue;
+ queue->rx_stalled.function = xenvif_rx_stalled;
+ queue->rx_stalled.data = (unsigned long)queue;
xenvif_stop_queue(queue);
- mod_timer(&queue->wake_queue,
- jiffies + rx_drain_timeout_jiffies);
+ mod_timer(&queue->rx_stalled,
+ jiffies + rx_drain_timeout_jiffies);
}
skb_queue_tail(&queue->rx_queue, skb);
@@ -225,7 +229,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
unsigned long rx_bytes = 0;
unsigned long rx_packets = 0;
unsigned long tx_bytes = 0;
@@ -256,7 +260,7 @@ out:
static void xenvif_up(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = vif->dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -272,7 +276,7 @@ static void xenvif_up(struct xenvif *vif)
static void xenvif_down(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = vif->dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -288,7 +292,7 @@ static void xenvif_down(struct xenvif *vif)
static int xenvif_open(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
- if (netif_carrier_ok(dev))
+ if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
xenvif_up(vif);
netif_tx_start_all_queues(dev);
return 0;
@@ -297,7 +301,7 @@ static int xenvif_open(struct net_device *dev)
static int xenvif_close(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
- if (netif_carrier_ok(dev))
+ if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
xenvif_down(vif);
netif_tx_stop_all_queues(dev);
return 0;
@@ -379,7 +383,7 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 * data)
{
struct xenvif *vif = netdev_priv(dev);
- unsigned int num_queues = dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
int i;
unsigned int queue_index;
struct xenvif_stats *vif_stats;
@@ -424,7 +428,6 @@ static const struct net_device_ops xenvif_netdev_ops = {
.ndo_fix_features = xenvif_fix_features,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_select_queue = xenvif_select_queue,
};
struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
@@ -438,10 +441,10 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
/* Allocate a netdev with the max. supported number of queues.
* When the guest selects the desired number, it will be updated
- * via netif_set_real_num_tx_queues().
+ * via netif_set_real_num_*_queues().
*/
- dev = alloc_netdev_mq(sizeof(struct xenvif), name, ether_setup,
- xenvif_max_queues);
+ dev = alloc_netdev_mq(sizeof(struct xenvif), name, NET_NAME_UNKNOWN,
+ ether_setup, xenvif_max_queues);
if (dev == NULL) {
pr_warn("Could not allocate netdev for %s\n", name);
return ERR_PTR(-ENOMEM);
@@ -458,11 +461,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->dev = dev;
vif->disabled = false;
- /* Start out with no queues. The call below does not require
- * rtnl_lock() as it happens before register_netdev().
- */
+ /* Start out with no queues. */
vif->queues = NULL;
- netif_set_real_num_tx_queues(dev, 0);
+ vif->num_queues = 0;
dev->netdev_ops = &xenvif_netdev_ops;
dev->hw_features = NETIF_F_SG |
@@ -538,10 +539,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
}
- init_timer(&queue->wake_queue);
-
- netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
- XENVIF_NAPI_WEIGHT);
+ init_timer(&queue->rx_stalled);
return 0;
}
@@ -552,6 +550,7 @@ void xenvif_carrier_on(struct xenvif *vif)
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
netdev_update_features(vif->dev);
+ set_bit(VIF_STATUS_CONNECTED, &vif->status);
netif_carrier_on(vif->dev);
if (netif_running(vif->dev))
xenvif_up(vif);
@@ -575,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
init_waitqueue_head(&queue->wq);
init_waitqueue_head(&queue->dealloc_wq);
+ atomic_set(&queue->inflight_packets, 0);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
@@ -629,6 +629,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
wake_up_process(queue->task);
wake_up_process(queue->dealloc_task);
+ netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+ XENVIF_NAPI_WEIGHT);
+
return 0;
err_rx_unbind:
@@ -649,45 +652,29 @@ void xenvif_carrier_off(struct xenvif *vif)
struct net_device *dev = vif->dev;
rtnl_lock();
- netif_carrier_off(dev); /* discard queued packets */
- if (netif_running(dev))
- xenvif_down(vif);
- rtnl_unlock();
-}
-
-static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
- unsigned int worst_case_skb_lifetime)
-{
- int i, unmap_timeout = 0;
-
- for (i = 0; i < MAX_PENDING_REQS; ++i) {
- if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
- unmap_timeout++;
- schedule_timeout(msecs_to_jiffies(1000));
- if (unmap_timeout > worst_case_skb_lifetime &&
- net_ratelimit())
- netdev_err(queue->vif->dev,
- "Page still granted! Index: %x\n",
- i);
- i = -1;
- }
+ if (test_and_clear_bit(VIF_STATUS_CONNECTED, &vif->status)) {
+ netif_carrier_off(dev); /* discard queued packets */
+ if (netif_running(dev))
+ xenvif_down(vif);
}
+ rtnl_unlock();
}
void xenvif_disconnect(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = vif->dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- if (netif_carrier_ok(vif->dev))
- xenvif_carrier_off(vif);
+ xenvif_carrier_off(vif);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
+ netif_napi_del(&queue->napi);
+
if (queue->task) {
- del_timer_sync(&queue->wake_queue);
+ del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
queue->task = NULL;
}
@@ -718,42 +705,24 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_deinit_queue(struct xenvif_queue *queue)
{
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
- netif_napi_del(&queue->napi);
}
void xenvif_free(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = vif->dev->real_num_tx_queues;
+ unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- /* Here we want to avoid timeout messages if an skb can be legitimately
- * stuck somewhere else. Realistically this could be an another vif's
- * internal or QDisc queue. That another vif also has this
- * rx_drain_timeout_msecs timeout, but the timer only ditches the
- * internal queue. After that, the QDisc queue can put in worst case
- * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
- * internal queue, so we need several rounds of such timeouts until we
- * can be sure that no another vif should have skb's from us. We are
- * not sending more skb's, so newly stuck packets are not interesting
- * for us here.
- */
- unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
- DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
unregister_netdev(vif->dev);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
- xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
xenvif_deinit_queue(queue);
}
- /* Free the array of queues. The call below does not require
- * rtnl_lock() because it happens after unregister_netdev().
- */
- netif_set_real_num_tx_queues(vif->dev, 0);
vfree(vif->queues);
vif->queues = NULL;
+ vif->num_queues = 0;
free_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 1844a47636b..08f65996534 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1030,14 +1030,21 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
{
struct gnttab_map_grant_ref *gop_map = *gopp_map;
u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ /* This always points to the shinfo of the skb being checked, which
+ * could be either the first or the one on the frag_list
+ */
struct skb_shared_info *shinfo = skb_shinfo(skb);
+ /* If this is non-NULL, we are currently checking the frag_list skb, and
+ * this points to the shinfo of the first one
+ */
+ struct skb_shared_info *first_shinfo = NULL;
int nr_frags = shinfo->nr_frags;
+ const bool sharedslot = nr_frags &&
+ frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
int i, err;
- struct sk_buff *first_skb = NULL;
/* Check status of header. */
err = (*gopp_copy)->status;
- (*gopp_copy)++;
if (unlikely(err)) {
if (net_ratelimit())
netdev_dbg(queue->vif->dev,
@@ -1045,8 +1052,12 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
(*gopp_copy)->status,
pending_idx,
(*gopp_copy)->source.u.ref);
- xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
+ /* The first frag might still have this slot mapped */
+ if (!sharedslot)
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_ERROR);
}
+ (*gopp_copy)++;
check_frags:
for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -1062,8 +1073,19 @@ check_frags:
pending_idx,
gop_map->handle);
/* Had a previous error? Invalidate this fragment. */
- if (unlikely(err))
+ if (unlikely(err)) {
xenvif_idx_unmap(queue, pending_idx);
+ /* If the mapping of the first frag was OK, but
+ * the header's copy failed, and they are
+ * sharing a slot, send an error
+ */
+ if (i == 0 && sharedslot)
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_ERROR);
+ else
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_OKAY);
+ }
continue;
}
@@ -1075,42 +1097,53 @@ check_frags:
gop_map->status,
pending_idx,
gop_map->ref);
+
xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
/* Not the first error? Preceding frags already invalidated. */
if (err)
continue;
- /* First error: invalidate preceding fragments. */
+
+ /* First error: if the header haven't shared a slot with the
+ * first frag, release it as well.
+ */
+ if (!sharedslot)
+ xenvif_idx_release(queue,
+ XENVIF_TX_CB(skb)->pending_idx,
+ XEN_NETIF_RSP_OKAY);
+
+ /* Invalidate preceding fragments of this skb. */
for (j = 0; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
xenvif_idx_unmap(queue, pending_idx);
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_OKAY);
+ }
+
+ /* And if we found the error while checking the frag_list, unmap
+ * the first skb's frags
+ */
+ if (first_shinfo) {
+ for (j = 0; j < first_shinfo->nr_frags; j++) {
+ pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
+ xenvif_idx_unmap(queue, pending_idx);
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_OKAY);
+ }
}
/* Remember the error: invalidate all subsequent fragments. */
err = newerr;
}
- if (skb_has_frag_list(skb)) {
- first_skb = skb;
- skb = shinfo->frag_list;
- shinfo = skb_shinfo(skb);
+ if (skb_has_frag_list(skb) && !first_shinfo) {
+ first_shinfo = skb_shinfo(skb);
+ shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
nr_frags = shinfo->nr_frags;
goto check_frags;
}
- /* There was a mapping error in the frag_list skb. We have to unmap
- * the first skb's frags
- */
- if (first_skb && err) {
- int j;
- shinfo = skb_shinfo(first_skb);
- for (j = 0; j < shinfo->nr_frags; j++) {
- pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
- xenvif_idx_unmap(queue, pending_idx);
- }
- }
-
*gopp_map = gop_map;
return err;
}
@@ -1492,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
/* remove traces of mapped pages and frag_list */
skb_frag_list_init(skb);
uarg = skb_shinfo(skb)->destructor_arg;
+ /* increase inflight counter to offset decrement in callback */
+ atomic_inc(&queue->inflight_packets);
uarg->callback(uarg, true);
skb_shinfo(skb)->destructor_arg = NULL;
- skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, nskb);
kfree_skb(nskb);
return 0;
@@ -1518,7 +1553,16 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
/* Check the remap error code. */
if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
+ /* If there was an error, xenvif_tx_check_gop is
+ * expected to release all the frags which were mapped,
+ * so kfree_skb shouldn't do it again
+ */
skb_shinfo(skb)->nr_frags = 0;
+ if (skb_has_frag_list(skb)) {
+ struct sk_buff *nskb =
+ skb_shinfo(skb)->frag_list;
+ skb_shinfo(nskb)->nr_frags = 0;
+ }
kfree_skb(skb);
continue;
}
@@ -1547,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
if (net_ratelimit())
netdev_err(queue->vif->dev,
"Not enough memory to consolidate frag_list!\n");
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1567,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
"Can't setup checksum in net_tx_action\n");
/* We have to set this flag to trigger the callback */
if (skb_shinfo(skb)->destructor_arg)
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1599,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
* skb. E.g. the __pskb_pull_tail earlier can do such thing.
*/
if (skb_shinfo(skb)->destructor_arg) {
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
queue->stats.tx_zerocopy_sent++;
}
@@ -1639,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
queue->stats.tx_zerocopy_success++;
else
queue->stats.tx_zerocopy_fail++;
+ xenvif_skb_zerocopy_complete(queue);
}
static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -1822,15 +1867,12 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
tx_unmap_op.status);
BUG();
}
-
- xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_OKAY);
}
static inline int rx_work_todo(struct xenvif_queue *queue)
{
return (!skb_queue_empty(&queue->rx_queue) &&
- xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) ||
- queue->rx_queue_purge;
+ xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
}
static inline int tx_work_todo(struct xenvif_queue *queue)
@@ -1895,6 +1937,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue)
xenvif_wake_queue(queue);
}
+/* Only called from the queue's thread, it handles the situation when the guest
+ * doesn't post enough requests on the receiving ring.
+ * First xenvif_start_xmit disables QDisc and start a timer, and then either the
+ * timer fires, or the guest send an interrupt after posting new request. If it
+ * is the timer, the carrier is turned off here.
+ * */
+static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+{
+ /* Either the last unsuccesful skb or at least 1 slot should fit */
+ int needed = queue->rx_last_skb_slots ?
+ queue->rx_last_skb_slots : 1;
+
+ /* It is assumed that if the guest post new slots after this, the RX
+ * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
+ * the thread again
+ */
+ set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
+ if (!xenvif_rx_ring_slots_available(queue, needed)) {
+ rtnl_lock();
+ if (netif_carrier_ok(queue->vif->dev)) {
+ /* Timer fired and there are still no slots. Turn off
+ * everything except the interrupts
+ */
+ netif_carrier_off(queue->vif->dev);
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_last_skb_slots = 0;
+ if (net_ratelimit())
+ netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
+ } else {
+ /* Probably an another queue already turned the carrier
+ * off, make sure nothing is stucked in the internal
+ * queue of this queue
+ */
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_last_skb_slots = 0;
+ }
+ rtnl_unlock();
+ } else if (!netif_carrier_ok(queue->vif->dev)) {
+ unsigned int num_queues = queue->vif->num_queues;
+ unsigned int i;
+ /* The carrier was down, but an interrupt kicked
+ * the thread again after new requests were
+ * posted
+ */
+ clear_bit(QUEUE_STATUS_RX_STALLED,
+ &queue->status);
+ rtnl_lock();
+ netif_carrier_on(queue->vif->dev);
+ netif_tx_wake_all_queues(queue->vif->dev);
+ rtnl_unlock();
+
+ for (i = 0; i < num_queues; i++) {
+ struct xenvif_queue *temp = &queue->vif->queues[i];
+
+ xenvif_napi_schedule_or_enable_events(temp);
+ }
+ if (net_ratelimit())
+ netdev_err(queue->vif->dev, "Carrier on again\n");
+ } else {
+ /* Queuing were stopped, but the guest posted
+ * new requests and sent an interrupt
+ */
+ clear_bit(QUEUE_STATUS_RX_STALLED,
+ &queue->status);
+ del_timer_sync(&queue->rx_stalled);
+ xenvif_start_queue(queue);
+ }
+}
+
int xenvif_kthread_guest_rx(void *data)
{
struct xenvif_queue *queue = data;
@@ -1904,8 +2015,12 @@ int xenvif_kthread_guest_rx(void *data)
wait_event_interruptible(queue->wq,
rx_work_todo(queue) ||
queue->vif->disabled ||
+ test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
/* This frontend is found to be rogue, disable it in
* kthread context. Currently this is only set when
* netback finds out frontend sends malformed packet,
@@ -1913,26 +2028,29 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is
* associated with queue 0.
*/
- if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0))
+ if (unlikely(queue->vif->disabled && queue->id == 0)) {
xenvif_carrier_off(queue->vif);
-
- if (kthread_should_stop())
- break;
-
- if (queue->rx_queue_purge) {
+ } else if (unlikely(queue->vif->disabled)) {
+ /* kthread_stop() would be called upon this thread soon,
+ * be a bit proactive
+ */
skb_queue_purge(&queue->rx_queue);
- queue->rx_queue_purge = false;
+ queue->rx_last_skb_slots = 0;
+ } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
+ &queue->status))) {
+ xenvif_rx_purge_event(queue);
+ } else if (!netif_carrier_ok(queue->vif->dev)) {
+ /* Another queue stalled and turned the carrier off, so
+ * purge the internal queue of queues which were not
+ * blocked
+ */
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_last_skb_slots = 0;
}
if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue);
- if (skb_queue_empty(&queue->rx_queue) &&
- xenvif_queue_stopped(queue)) {
- del_timer_sync(&queue->wake_queue);
- xenvif_start_queue(queue);
- }
-
cond_resched();
}
@@ -1943,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data)
return 0;
}
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+ /* Dealloc thread must remain running until all inflight
+ * packets complete.
+ */
+ return kthread_should_stop() &&
+ !atomic_read(&queue->inflight_packets);
+}
+
int xenvif_dealloc_kthread(void *data)
{
struct xenvif_queue *queue = data;
- while (!kthread_should_stop()) {
+ for (;;) {
wait_event_interruptible(queue->dealloc_wq,
tx_dealloc_work_todo(queue) ||
- kthread_should_stop());
- if (kthread_should_stop())
+ xenvif_dealloc_kthread_should_stop(queue));
+ if (xenvif_dealloc_kthread_should_stop(queue))
break;
xenvif_tx_dealloc_action(queue);
@@ -1987,6 +2114,13 @@ static int __init netback_init(void)
rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+#ifdef CONFIG_DEBUG_FS
+ xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+ pr_warn("Init of debugfs returned %ld!\n",
+ PTR_ERR(xen_netback_dbg_root));
+#endif /* CONFIG_DEBUG_FS */
+
return 0;
failed_init:
@@ -1997,6 +2131,10 @@ module_init(netback_init);
static void __exit netback_fini(void)
{
+#ifdef CONFIG_DEBUG_FS
+ if (!IS_ERR_OR_NULL(xen_netback_dbg_root))
+ debugfs_remove_recursive(xen_netback_dbg_root);
+#endif /* CONFIG_DEBUG_FS */
xenvif_xenbus_fini();
}
module_exit(netback_fini);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 96c63dc2509..9c47b897b6d 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -44,6 +44,177 @@ static void unregister_hotplug_status_watch(struct backend_info *be);
static void set_backend_state(struct backend_info *be,
enum xenbus_state state);
+#ifdef CONFIG_DEBUG_FS
+struct dentry *xen_netback_dbg_root = NULL;
+
+static int xenvif_read_io_ring(struct seq_file *m, void *v)
+{
+ struct xenvif_queue *queue = m->private;
+ struct xen_netif_tx_back_ring *tx_ring = &queue->tx;
+ struct xen_netif_rx_back_ring *rx_ring = &queue->rx;
+
+ if (tx_ring->sring) {
+ struct xen_netif_tx_sring *sring = tx_ring->sring;
+
+ seq_printf(m, "Queue %d\nTX: nr_ents %u\n", queue->id,
+ tx_ring->nr_ents);
+ seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n",
+ sring->req_prod,
+ sring->req_prod - sring->rsp_prod,
+ tx_ring->req_cons,
+ tx_ring->req_cons - sring->rsp_prod,
+ sring->req_event,
+ sring->req_event - sring->rsp_prod);
+ seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n",
+ sring->rsp_prod,
+ tx_ring->rsp_prod_pvt,
+ tx_ring->rsp_prod_pvt - sring->rsp_prod,
+ sring->rsp_event,
+ sring->rsp_event - sring->rsp_prod);
+ seq_printf(m, "pending prod %u pending cons %u nr_pending_reqs %u\n",
+ queue->pending_prod,
+ queue->pending_cons,
+ nr_pending_reqs(queue));
+ seq_printf(m, "dealloc prod %u dealloc cons %u dealloc_queue %u\n\n",
+ queue->dealloc_prod,
+ queue->dealloc_cons,
+ queue->dealloc_prod - queue->dealloc_cons);
+ }
+
+ if (rx_ring->sring) {
+ struct xen_netif_rx_sring *sring = rx_ring->sring;
+
+ seq_printf(m, "RX: nr_ents %u\n", rx_ring->nr_ents);
+ seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n",
+ sring->req_prod,
+ sring->req_prod - sring->rsp_prod,
+ rx_ring->req_cons,
+ rx_ring->req_cons - sring->rsp_prod,
+ sring->req_event,
+ sring->req_event - sring->rsp_prod);
+ seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n\n",
+ sring->rsp_prod,
+ rx_ring->rsp_prod_pvt,
+ rx_ring->rsp_prod_pvt - sring->rsp_prod,
+ sring->rsp_event,
+ sring->rsp_event - sring->rsp_prod);
+ }
+
+ seq_printf(m, "NAPI state: %lx NAPI weight: %d TX queue len %u\n"
+ "Credit timer_pending: %d, credit: %lu, usec: %lu\n"
+ "remaining: %lu, expires: %lu, now: %lu\n",
+ queue->napi.state, queue->napi.weight,
+ skb_queue_len(&queue->tx_queue),
+ timer_pending(&queue->credit_timeout),
+ queue->credit_bytes,
+ queue->credit_usec,
+ queue->remaining_credit,
+ queue->credit_timeout.expires,
+ jiffies);
+
+ return 0;
+}
+
+#define XENVIF_KICK_STR "kick"
+#define BUFFER_SIZE 32
+
+static ssize_t
+xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct xenvif_queue *queue =
+ ((struct seq_file *)filp->private_data)->private;
+ int len;
+ char write[BUFFER_SIZE];
+
+ /* don't allow partial writes and check the length */
+ if (*ppos != 0)
+ return 0;
+ if (count >= sizeof(write))
+ return -ENOSPC;
+
+ len = simple_write_to_buffer(write,
+ sizeof(write) - 1,
+ ppos,
+ buf,
+ count);
+ if (len < 0)
+ return len;
+
+ write[len] = '\0';
+
+ if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
+ xenvif_interrupt(0, (void *)queue);
+ else {
+ pr_warn("Unknown command to io_ring_q%d. Available: kick\n",
+ queue->id);
+ count = -EINVAL;
+ }
+ return count;
+}
+
+static int xenvif_dump_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+ void *queue = NULL;
+
+ if (inode->i_private)
+ queue = inode->i_private;
+ ret = single_open(filp, xenvif_read_io_ring, queue);
+ filp->f_mode |= FMODE_PWRITE;
+ return ret;
+}
+
+static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
+ .owner = THIS_MODULE,
+ .open = xenvif_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = xenvif_write_io_ring,
+};
+
+static void xenvif_debugfs_addif(struct xenvif *vif)
+{
+ struct dentry *pfile;
+ int i;
+
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+ return;
+
+ vif->xenvif_dbg_root = debugfs_create_dir(vif->dev->name,
+ xen_netback_dbg_root);
+ if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root)) {
+ for (i = 0; i < vif->num_queues; ++i) {
+ char filename[sizeof("io_ring_q") + 4];
+
+ snprintf(filename, sizeof(filename), "io_ring_q%d", i);
+ pfile = debugfs_create_file(filename,
+ S_IRUSR | S_IWUSR,
+ vif->xenvif_dbg_root,
+ &vif->queues[i],
+ &xenvif_dbg_io_ring_ops_fops);
+ if (IS_ERR_OR_NULL(pfile))
+ pr_warn("Creation of io_ring file returned %ld!\n",
+ PTR_ERR(pfile));
+ }
+ } else
+ netdev_warn(vif->dev,
+ "Creation of vif debugfs dir returned %ld!\n",
+ PTR_ERR(vif->xenvif_dbg_root));
+}
+
+static void xenvif_debugfs_delif(struct xenvif *vif)
+{
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+ return;
+
+ if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root))
+ debugfs_remove_recursive(vif->xenvif_dbg_root);
+ vif->xenvif_dbg_root = NULL;
+}
+#endif /* CONFIG_DEBUG_FS */
+
static int netback_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
@@ -246,8 +417,12 @@ static void backend_create_xenvif(struct backend_info *be)
static void backend_disconnect(struct backend_info *be)
{
- if (be->vif)
+ if (be->vif) {
+#ifdef CONFIG_DEBUG_FS
+ xenvif_debugfs_delif(be->vif);
+#endif /* CONFIG_DEBUG_FS */
xenvif_disconnect(be->vif);
+ }
}
static void backend_connect(struct backend_info *be)
@@ -527,9 +702,7 @@ static void connect(struct backend_info *be)
/* Use the number of queues requested by the frontend */
be->vif->queues = vzalloc(requested_num_queues *
sizeof(struct xenvif_queue));
- rtnl_lock();
- netif_set_real_num_tx_queues(be->vif->dev, requested_num_queues);
- rtnl_unlock();
+ be->vif->num_queues = requested_num_queues;
for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
queue = &be->vif->queues[queue_index];
@@ -546,9 +719,7 @@ static void connect(struct backend_info *be)
* earlier queues can be destroyed using the regular
* disconnect logic.
*/
- rtnl_lock();
- netif_set_real_num_tx_queues(be->vif->dev, queue_index);
- rtnl_unlock();
+ be->vif->num_queues = queue_index;
goto err;
}
@@ -561,13 +732,23 @@ static void connect(struct backend_info *be)
* and also clean up any previously initialised queues.
*/
xenvif_deinit_queue(queue);
- rtnl_lock();
- netif_set_real_num_tx_queues(be->vif->dev, queue_index);
- rtnl_unlock();
+ be->vif->num_queues = queue_index;
goto err;
}
}
+#ifdef CONFIG_DEBUG_FS
+ xenvif_debugfs_addif(be->vif);
+#endif /* CONFIG_DEBUG_FS */
+
+ /* Initialisation completed, tell core driver the number of
+ * active queues.
+ */
+ rtnl_lock();
+ netif_set_real_num_tx_queues(be->vif->dev, requested_num_queues);
+ netif_set_real_num_rx_queues(be->vif->dev, requested_num_queues);
+ rtnl_unlock();
+
xenvif_carrier_on(be->vif);
unregister_hotplug_status_watch(be);
@@ -582,13 +763,11 @@ static void connect(struct backend_info *be)
return;
err:
- if (be->vif->dev->real_num_tx_queues > 0)
+ if (be->vif->num_queues > 0)
xenvif_disconnect(be->vif); /* Clean up existing queues */
vfree(be->vif->queues);
be->vif->queues = NULL;
- rtnl_lock();
- netif_set_real_num_tx_queues(be->vif->dev, 0);
- rtnl_unlock();
+ be->vif->num_queues = 0;
return;
}
@@ -596,7 +775,7 @@ err:
static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
{
struct xenbus_device *dev = be->dev;
- unsigned int num_queues = queue->vif->dev->real_num_tx_queues;
+ unsigned int num_queues = queue->vif->num_queues;
unsigned long tx_ring_ref, rx_ring_ref;
unsigned int tx_evtchn, rx_evtchn;
int err;