summaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r--drivers/net/xen-netback/common.h44
-rw-r--r--drivers/net/xen-netback/interface.c128
-rw-r--r--drivers/net/xen-netback/netback.c339
-rw-r--r--drivers/net/xen-netback/xenbus.c49
4 files changed, 308 insertions, 252 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f46a3..083ecc93fe5 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
u16 dealloc_ring[MAX_PENDING_REQS];
struct task_struct *dealloc_task;
wait_queue_head_t dealloc_wq;
+ atomic_t inflight_packets;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -175,10 +176,11 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
- RING_IDX rx_last_skb_slots;
- unsigned long status;
- struct timer_list rx_stalled;
+ unsigned int rx_queue_max;
+ unsigned int rx_queue_len;
+ unsigned long last_rx_time;
+ bool stalled;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
@@ -198,18 +200,14 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xenvif_stats stats;
};
+/* Maximum number of Rx slots a to-guest packet may use, including the
+ * slot needed for GSO meta-data.
+ */
+#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1)
+
enum state_bit_shift {
/* This bit marks that the vif is connected */
VIF_STATUS_CONNECTED,
- /* This bit signals the RX thread that queuing was stopped (in
- * start_xmit), and either the timer fired or an RX interrupt came
- */
- QUEUE_STATUS_RX_PURGE_EVENT,
- /* This bit tells the interrupt handler that this queue was the reason
- * for the carrier off, so it should kick the thread. Only queues which
- * brought it down can turn on the carrier.
- */
- QUEUE_STATUS_RX_STALLED
};
struct xenvif {
@@ -227,9 +225,6 @@ struct xenvif {
u8 ip_csum:1;
u8 ipv6_csum:1;
- /* Internal feature information. */
- u8 can_queue:1; /* can queue packets for receiver? */
-
/* Is this interface disabled? True when backend discovers
* frontend is rogue.
*/
@@ -239,6 +234,9 @@ struct xenvif {
/* Queues */
struct xenvif_queue *queues;
unsigned int num_queues; /* active queues, resource allocated */
+ unsigned int stalled_queues;
+
+ spinlock_t lock;
#ifdef CONFIG_DEBUG_FS
struct dentry *xenvif_dbg_root;
@@ -248,6 +246,14 @@ struct xenvif {
struct net_device *dev;
};
+struct xenvif_rx_cb {
+ unsigned long expires;
+ int meta_slots_used;
+ bool full_coalesce;
+};
+
+#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
{
return to_xenbus_device(vif->dev->dev.parent);
@@ -271,8 +277,6 @@ void xenvif_xenbus_fini(void);
int xenvif_schedulable(struct xenvif *vif);
-int xenvif_must_stop_queue(struct xenvif_queue *queue);
-
int xenvif_queue_stopped(struct xenvif_queue *queue);
void xenvif_wake_queue(struct xenvif_queue *queue);
@@ -295,6 +299,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue);
int xenvif_dealloc_kthread(void *data);
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+
/* Determine whether the needed number of slots (req) are available,
* and set req_event if not.
*/
@@ -329,4 +335,8 @@ extern unsigned int xenvif_max_queues;
extern struct dentry *xen_netback_dbg_root;
#endif
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 48a55cda979..895fe84011e 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,20 +43,31 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
-static inline void xenvif_stop_queue(struct xenvif_queue *queue)
-{
- struct net_device *dev = queue->vif->dev;
+/* Number of bytes allowed on the internal guest Rx queue. */
+#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE)
- if (!queue->vif->can_queue)
- return;
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb)
+{
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ atomic_inc(&queue->inflight_packets);
+}
- netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+ atomic_dec(&queue->inflight_packets);
}
int xenvif_schedulable(struct xenvif *vif)
{
return netif_running(vif->dev) &&
- test_bit(VIF_STATUS_CONNECTED, &vif->status);
+ test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+ !vif->disabled;
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
@@ -78,12 +89,8 @@ int xenvif_poll(struct napi_struct *napi, int budget)
/* This vif is rogue, we pretend we've there is nothing to do
* for this vif to deschedule it from NAPI. But this interface
* will be turned off in thread context later.
- * Also, if a guest doesn't post enough slots to receive data on one of
- * its queues, the carrier goes down and NAPI is descheduled here so
- * the guest can't send more packets until it's ready to receive.
*/
- if (unlikely(queue->vif->disabled ||
- !netif_carrier_ok(queue->vif->dev))) {
+ if (unlikely(queue->vif->disabled)) {
napi_complete(napi);
return 0;
}
@@ -101,16 +108,7 @@ int xenvif_poll(struct napi_struct *napi, int budget)
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
- struct netdev_queue *net_queue =
- netdev_get_tx_queue(queue->vif->dev, queue->id);
- /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
- * the carrier went down and this queue was previously blocked
- */
- if (unlikely(netif_tx_queue_stopped(net_queue) ||
- (!netif_carrier_ok(queue->vif->dev) &&
- test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
- set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
return IRQ_HANDLED;
@@ -138,24 +136,13 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
}
-/* Callback to wake the queue's thread and turn the carrier off on timeout */
-static void xenvif_rx_stalled(unsigned long data)
-{
- struct xenvif_queue *queue = (struct xenvif_queue *)data;
-
- if (xenvif_queue_stopped(queue)) {
- set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
- xenvif_kick_thread(queue);
- }
-}
-
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
u16 index;
- int min_slots_needed;
+ struct xenvif_rx_cb *cb;
BUG_ON(skb->dev != dev);
@@ -178,30 +165,10 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
!xenvif_schedulable(vif))
goto drop;
- /* At best we'll need one slot for the header and one for each
- * frag.
- */
- min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;
-
- /* If the skb is GSO then we'll also need an extra slot for the
- * metadata.
- */
- if (skb_is_gso(skb))
- min_slots_needed++;
-
- /* If the skb can't possibly fit in the remaining slots
- * then turn off the queue to give the ring a chance to
- * drain.
- */
- if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
- queue->rx_stalled.function = xenvif_rx_stalled;
- queue->rx_stalled.data = (unsigned long)queue;
- xenvif_stop_queue(queue);
- mod_timer(&queue->rx_stalled,
- jiffies + rx_drain_timeout_jiffies);
- }
+ cb = XENVIF_RX_CB(skb);
+ cb->expires = jiffies + rx_drain_timeout_jiffies;
- skb_queue_tail(&queue->rx_queue, skb);
+ xenvif_rx_queue_tail(queue, skb);
xenvif_kick_thread(queue);
return NETDEV_TX_OK;
@@ -452,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->queues = NULL;
vif->num_queues = 0;
+ spin_lock_init(&vif->lock);
+
dev->netdev_ops = &xenvif_netdev_ops;
dev->hw_features = NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -495,6 +464,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
init_timer(&queue->credit_timeout);
queue->credit_window_start = get_jiffies_64();
+ queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES;
+
skb_queue_head_init(&queue->rx_queue);
skb_queue_head_init(&queue->tx_queue);
@@ -526,11 +497,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
}
- init_timer(&queue->rx_stalled);
-
- netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
- XENVIF_NAPI_WEIGHT);
-
return 0;
}
@@ -541,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
netdev_update_features(vif->dev);
set_bit(VIF_STATUS_CONNECTED, &vif->status);
- netif_carrier_on(vif->dev);
if (netif_running(vif->dev))
xenvif_up(vif);
rtnl_unlock();
@@ -564,6 +529,10 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
init_waitqueue_head(&queue->wq);
init_waitqueue_head(&queue->dealloc_wq);
+ atomic_set(&queue->inflight_packets, 0);
+
+ netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+ XENVIF_NAPI_WEIGHT);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
@@ -597,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
disable_irq(queue->rx_irq);
}
+ queue->stalled = true;
+
task = kthread_create(xenvif_kthread_guest_rx,
(void *)queue, "%s-guest-rx", queue->name);
if (IS_ERR(task)) {
@@ -646,25 +617,6 @@ void xenvif_carrier_off(struct xenvif *vif)
rtnl_unlock();
}
-static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
- unsigned int worst_case_skb_lifetime)
-{
- int i, unmap_timeout = 0;
-
- for (i = 0; i < MAX_PENDING_REQS; ++i) {
- if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
- unmap_timeout++;
- schedule_timeout(msecs_to_jiffies(1000));
- if (unmap_timeout > worst_case_skb_lifetime &&
- net_ratelimit())
- netdev_err(queue->vif->dev,
- "Page still granted! Index: %x\n",
- i);
- i = -1;
- }
- }
-}
-
void xenvif_disconnect(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
@@ -676,8 +628,9 @@ void xenvif_disconnect(struct xenvif *vif)
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
+ netif_napi_del(&queue->napi);
+
if (queue->task) {
- del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
queue->task = NULL;
}
@@ -708,7 +661,6 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_deinit_queue(struct xenvif_queue *queue)
{
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
- netif_napi_del(&queue->napi);
}
void xenvif_free(struct xenvif *vif)
@@ -716,21 +668,11 @@ void xenvif_free(struct xenvif *vif)
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- /* Here we want to avoid timeout messages if an skb can be legitimately
- * stuck somewhere else. Realistically this could be an another vif's
- * internal or QDisc queue. That another vif also has this
- * rx_drain_timeout_msecs timeout, so give it time to drain out.
- * Although if that other guest wakes up just before its timeout happens
- * and takes only one skb from QDisc, it can hold onto other skbs for a
- * longer period.
- */
- unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
unregister_netdev(vif->dev);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
- xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
xenvif_deinit_queue(queue);
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index aa2093325be..6563f0713fc 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -55,13 +55,20 @@
bool separate_tx_rx_irq = 1;
module_param(separate_tx_rx_irq, bool, 0644);
-/* When guest ring is filled up, qdisc queues the packets for us, but we have
- * to timeout them, otherwise other guests' packets can get stuck there
+/* The time that packets can stay on the guest Rx internal queue
+ * before they are dropped.
*/
unsigned int rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_drain_timeout_jiffies;
+/* The length of time before the frontend is considered unresponsive
+ * because it isn't providing Rx slots.
+ */
+static unsigned int rx_stall_timeout_msecs = 60000;
+module_param(rx_stall_timeout_msecs, uint, 0444);
+static unsigned int rx_stall_timeout_jiffies;
+
unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
@@ -83,7 +90,6 @@ static void make_tx_response(struct xenvif_queue *queue,
s8 st);
static inline int tx_work_todo(struct xenvif_queue *queue);
-static inline int rx_work_todo(struct xenvif_queue *queue);
static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
u16 id,
@@ -163,6 +169,69 @@ bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
return false;
}
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+ __skb_queue_tail(&queue->rx_queue, skb);
+
+ queue->rx_queue_len += skb->len;
+ if (queue->rx_queue_len > queue->rx_queue_max)
+ netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+}
+
+static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+
+ spin_lock_irq(&queue->rx_queue.lock);
+
+ skb = __skb_dequeue(&queue->rx_queue);
+ if (skb)
+ queue->rx_queue_len -= skb->len;
+
+ spin_unlock_irq(&queue->rx_queue.lock);
+
+ return skb;
+}
+
+static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
+{
+ spin_lock_irq(&queue->rx_queue.lock);
+
+ if (queue->rx_queue_len < queue->rx_queue_max)
+ netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+ spin_unlock_irq(&queue->rx_queue.lock);
+}
+
+
+static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+ while ((skb = xenvif_rx_dequeue(queue)) != NULL)
+ kfree_skb(skb);
+}
+
+static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+
+ for(;;) {
+ skb = skb_peek(&queue->rx_queue);
+ if (!skb)
+ break;
+ if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
+ break;
+ xenvif_rx_dequeue(queue);
+ kfree_skb(skb);
+ }
+}
+
/*
* Returns true if we should start a new receive buffer instead of
* adding 'size' bytes to a buffer which currently contains 'offset'
@@ -237,13 +306,6 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
return meta;
}
-struct xenvif_rx_cb {
- int meta_slots_used;
- bool full_coalesce;
-};
-
-#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
-
/*
* Set up the grant operations for this fragment. If it's a flipping
* interface, we also set up the unmap request from here.
@@ -587,12 +649,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
skb_queue_head_init(&rxq);
- while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
+ while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
+ && (skb = xenvif_rx_dequeue(queue)) != NULL) {
RING_IDX max_slots_needed;
RING_IDX old_req_cons;
RING_IDX ring_slots_used;
int i;
+ queue->last_rx_time = jiffies;
+
/* We need a cheap worse case estimate for the number of
* slots we'll use.
*/
@@ -634,15 +699,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
max_slots_needed++;
- /* If the skb may not fit then bail out now */
- if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
- skb_queue_head(&queue->rx_queue, skb);
- need_to_notify = true;
- queue->rx_last_skb_slots = max_slots_needed;
- break;
- } else
- queue->rx_last_skb_slots = 0;
-
old_req_cons = queue->rx.req_cons;
XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
ring_slots_used = queue->rx.req_cons - old_req_cons;
@@ -1525,10 +1581,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
/* remove traces of mapped pages and frag_list */
skb_frag_list_init(skb);
uarg = skb_shinfo(skb)->destructor_arg;
+ /* increase inflight counter to offset decrement in callback */
+ atomic_inc(&queue->inflight_packets);
uarg->callback(uarg, true);
skb_shinfo(skb)->destructor_arg = NULL;
- skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, nskb);
kfree_skb(nskb);
return 0;
@@ -1589,7 +1647,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
if (net_ratelimit())
netdev_err(queue->vif->dev,
"Not enough memory to consolidate frag_list!\n");
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1609,7 +1667,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
"Can't setup checksum in net_tx_action\n");
/* We have to set this flag to trigger the callback */
if (skb_shinfo(skb)->destructor_arg)
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1641,7 +1699,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
* skb. E.g. the __pskb_pull_tail earlier can do such thing.
*/
if (skb_shinfo(skb)->destructor_arg) {
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
queue->stats.tx_zerocopy_sent++;
}
@@ -1681,6 +1739,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
queue->stats.tx_zerocopy_success++;
else
queue->stats.tx_zerocopy_fail++;
+ xenvif_skb_zerocopy_complete(queue);
}
static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -1866,12 +1925,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
}
}
-static inline int rx_work_todo(struct xenvif_queue *queue)
-{
- return (!skb_queue_empty(&queue->rx_queue) &&
- xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
-}
-
static inline int tx_work_todo(struct xenvif_queue *queue)
{
if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
@@ -1928,92 +1981,121 @@ err:
return err;
}
-static void xenvif_start_queue(struct xenvif_queue *queue)
+static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
{
- if (xenvif_schedulable(queue->vif))
- xenvif_wake_queue(queue);
+ struct xenvif *vif = queue->vif;
+
+ queue->stalled = true;
+
+ /* At least one queue has stalled? Disable the carrier. */
+ spin_lock(&vif->lock);
+ if (vif->stalled_queues++ == 0) {
+ netdev_info(vif->dev, "Guest Rx stalled");
+ netif_carrier_off(vif->dev);
+ }
+ spin_unlock(&vif->lock);
}
-/* Only called from the queue's thread, it handles the situation when the guest
- * doesn't post enough requests on the receiving ring.
- * First xenvif_start_xmit disables QDisc and start a timer, and then either the
- * timer fires, or the guest send an interrupt after posting new request. If it
- * is the timer, the carrier is turned off here.
- * */
-static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
{
- /* Either the last unsuccesful skb or at least 1 slot should fit */
- int needed = queue->rx_last_skb_slots ?
- queue->rx_last_skb_slots : 1;
+ struct xenvif *vif = queue->vif;
- /* It is assumed that if the guest post new slots after this, the RX
- * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
- * the thread again
- */
- set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
- if (!xenvif_rx_ring_slots_available(queue, needed)) {
- rtnl_lock();
- if (netif_carrier_ok(queue->vif->dev)) {
- /* Timer fired and there are still no slots. Turn off
- * everything except the interrupts
- */
- netif_carrier_off(queue->vif->dev);
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- if (net_ratelimit())
- netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
- } else {
- /* Probably an another queue already turned the carrier
- * off, make sure nothing is stucked in the internal
- * queue of this queue
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- }
- rtnl_unlock();
- } else if (!netif_carrier_ok(queue->vif->dev)) {
- unsigned int num_queues = queue->vif->num_queues;
- unsigned int i;
- /* The carrier was down, but an interrupt kicked
- * the thread again after new requests were
- * posted
- */
- clear_bit(QUEUE_STATUS_RX_STALLED,
- &queue->status);
- rtnl_lock();
- netif_carrier_on(queue->vif->dev);
- netif_tx_wake_all_queues(queue->vif->dev);
- rtnl_unlock();
+ queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
+ queue->stalled = false;
- for (i = 0; i < num_queues; i++) {
- struct xenvif_queue *temp = &queue->vif->queues[i];
+ /* All queues are ready? Enable the carrier. */
+ spin_lock(&vif->lock);
+ if (--vif->stalled_queues == 0) {
+ netdev_info(vif->dev, "Guest Rx ready");
+ netif_carrier_on(vif->dev);
+ }
+ spin_unlock(&vif->lock);
+}
- xenvif_napi_schedule_or_enable_events(temp);
- }
- if (net_ratelimit())
- netdev_err(queue->vif->dev, "Carrier on again\n");
- } else {
- /* Queuing were stopped, but the guest posted
- * new requests and sent an interrupt
- */
- clear_bit(QUEUE_STATUS_RX_STALLED,
- &queue->status);
- del_timer_sync(&queue->rx_stalled);
- xenvif_start_queue(queue);
+static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+{
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
+ return !queue->stalled
+ && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+ && time_after(jiffies,
+ queue->last_rx_time + rx_stall_timeout_jiffies);
+}
+
+static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+{
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
+ return queue->stalled
+ && prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
+}
+
+static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+{
+ return (!skb_queue_empty(&queue->rx_queue)
+ && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+ || xenvif_rx_queue_stalled(queue)
+ || xenvif_rx_queue_ready(queue)
+ || kthread_should_stop()
+ || queue->vif->disabled;
+}
+
+static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+ long timeout;
+
+ skb = skb_peek(&queue->rx_queue);
+ if (!skb)
+ return MAX_SCHEDULE_TIMEOUT;
+
+ timeout = XENVIF_RX_CB(skb)->expires - jiffies;
+ return timeout < 0 ? 0 : timeout;
+}
+
+/* Wait until the guest Rx thread has work.
+ *
+ * The timeout needs to be adjusted based on the current head of the
+ * queue (and not just the head at the beginning). In particular, if
+ * the queue is initially empty an infinite timeout is used and this
+ * needs to be reduced when a skb is queued.
+ *
+ * This cannot be done with wait_event_timeout() because it only
+ * calculates the timeout once.
+ */
+static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
+{
+ DEFINE_WAIT(wait);
+
+ if (xenvif_have_rx_work(queue))
+ return;
+
+ for (;;) {
+ long ret;
+
+ prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+ if (xenvif_have_rx_work(queue))
+ break;
+ ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+ if (!ret)
+ break;
}
+ finish_wait(&queue->wq, &wait);
}
int xenvif_kthread_guest_rx(void *data)
{
struct xenvif_queue *queue = data;
- struct sk_buff *skb;
+ struct xenvif *vif = queue->vif;
- while (!kthread_should_stop()) {
- wait_event_interruptible(queue->wq,
- rx_work_todo(queue) ||
- queue->vif->disabled ||
- test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
- kthread_should_stop());
+ for (;;) {
+ xenvif_wait_for_rx_work(queue);
if (kthread_should_stop())
break;
@@ -2025,42 +2107,60 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is
* associated with queue 0.
*/
- if (unlikely(queue->vif->disabled && queue->id == 0))
- xenvif_carrier_off(queue->vif);
- else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
- &queue->status))) {
- xenvif_rx_purge_event(queue);
- } else if (!netif_carrier_ok(queue->vif->dev)) {
- /* Another queue stalled and turned the carrier off, so
- * purge the internal queue of queues which were not
- * blocked
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
+ if (unlikely(vif->disabled && queue->id == 0)) {
+ xenvif_carrier_off(vif);
+ xenvif_rx_queue_purge(queue);
+ continue;
}
if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue);
+ /* If the guest hasn't provided any Rx slots for a
+ * while it's probably not responsive, drop the
+ * carrier so packets are dropped earlier.
+ */
+ if (xenvif_rx_queue_stalled(queue))
+ xenvif_queue_carrier_off(queue);
+ else if (xenvif_rx_queue_ready(queue))
+ xenvif_queue_carrier_on(queue);
+
+ /* Queued packets may have foreign pages from other
+ * domains. These cannot be queued indefinitely as
+ * this would starve guests of grant refs and transmit
+ * slots.
+ */
+ xenvif_rx_queue_drop_expired(queue);
+
+ xenvif_rx_queue_maybe_wake(queue);
+
cond_resched();
}
/* Bin any remaining skbs */
- while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
- dev_kfree_skb(skb);
+ xenvif_rx_queue_purge(queue);
return 0;
}
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+ /* Dealloc thread must remain running until all inflight
+ * packets complete.
+ */
+ return kthread_should_stop() &&
+ !atomic_read(&queue->inflight_packets);
+}
+
int xenvif_dealloc_kthread(void *data)
{
struct xenvif_queue *queue = data;
- while (!kthread_should_stop()) {
+ for (;;) {
wait_event_interruptible(queue->dealloc_wq,
tx_dealloc_work_todo(queue) ||
- kthread_should_stop());
- if (kthread_should_stop())
+ xenvif_dealloc_kthread_should_stop(queue));
+ if (xenvif_dealloc_kthread_should_stop(queue))
break;
xenvif_tx_dealloc_action(queue);
@@ -2095,6 +2195,7 @@ static int __init netback_init(void)
goto failed_init;
rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+ rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
#ifdef CONFIG_DEBUG_FS
xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 580517d857b..4e56a27f968 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -52,6 +52,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
struct xenvif_queue *queue = m->private;
struct xen_netif_tx_back_ring *tx_ring = &queue->tx;
struct xen_netif_rx_back_ring *rx_ring = &queue->rx;
+ struct netdev_queue *dev_queue;
if (tx_ring->sring) {
struct xen_netif_tx_sring *sring = tx_ring->sring;
@@ -112,10 +113,18 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
queue->credit_timeout.expires,
jiffies);
+ dev_queue = netdev_get_tx_queue(queue->vif->dev, queue->id);
+
+ seq_printf(m, "\nRx internal queue: len %u max %u pkts %u %s\n",
+ queue->rx_queue_len, queue->rx_queue_max,
+ skb_queue_len(&queue->rx_queue),
+ netif_tx_queue_stopped(dev_queue) ? "stopped" : "running");
+
return 0;
}
#define XENVIF_KICK_STR "kick"
+#define BUFFER_SIZE 32
static ssize_t
xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
@@ -124,22 +133,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
struct xenvif_queue *queue =
((struct seq_file *)filp->private_data)->private;
int len;
- char write[sizeof(XENVIF_KICK_STR)];
+ char write[BUFFER_SIZE];
/* don't allow partial writes and check the length */
if (*ppos != 0)
return 0;
- if (count < sizeof(XENVIF_KICK_STR) - 1)
+ if (count >= sizeof(write))
return -ENOSPC;
len = simple_write_to_buffer(write,
- sizeof(write),
+ sizeof(write) - 1,
ppos,
buf,
count);
if (len < 0)
return len;
+ write[len] = '\0';
+
if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
xenvif_interrupt(0, (void *)queue);
else {
@@ -171,10 +182,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
.write = xenvif_write_io_ring,
};
-static void xenvif_debugfs_addif(struct xenvif_queue *queue)
+static void xenvif_debugfs_addif(struct xenvif *vif)
{
struct dentry *pfile;
- struct xenvif *vif = queue->vif;
int i;
if (IS_ERR_OR_NULL(xen_netback_dbg_root))
@@ -701,6 +711,7 @@ static void connect(struct backend_info *be)
be->vif->queues = vzalloc(requested_num_queues *
sizeof(struct xenvif_queue));
be->vif->num_queues = requested_num_queues;
+ be->vif->stalled_queues = requested_num_queues;
for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
queue = &be->vif->queues[queue_index];
@@ -733,10 +744,11 @@ static void connect(struct backend_info *be)
be->vif->num_queues = queue_index;
goto err;
}
+ }
+
#ifdef CONFIG_DEBUG_FS
- xenvif_debugfs_addif(queue);
+ xenvif_debugfs_addif(be->vif);
#endif /* CONFIG_DEBUG_FS */
- }
/* Initialisation completed, tell core driver the number of
* active queues.
@@ -870,15 +882,10 @@ static int read_xenbus_vif_flags(struct backend_info *be)
if (!rx_copy)
return -EOPNOTSUPP;
- if (vif->dev->tx_queue_len != 0) {
- if (xenbus_scanf(XBT_NIL, dev->otherend,
- "feature-rx-notify", "%d", &val) < 0)
- val = 0;
- if (val)
- vif->can_queue = 1;
- else
- /* Must be non-zero for pfifo_fast to work. */
- vif->dev->tx_queue_len = 1;
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0 || val == 0) {
+ xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
+ return -EINVAL;
}
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
@@ -934,22 +941,18 @@ static int read_xenbus_vif_flags(struct backend_info *be)
return 0;
}
-
-/* ** Driver Registration ** */
-
-
static const struct xenbus_device_id netback_ids[] = {
{ "vif" },
{ "" }
};
-
-static DEFINE_XENBUS_DRIVER(netback, ,
+static struct xenbus_driver netback_driver = {
+ .ids = netback_ids,
.probe = netback_probe,
.remove = netback_remove,
.uevent = netback_uevent,
.otherend_changed = frontend_changed,
-);
+};
int xenvif_xenbus_init(void)
{