From 891070003999e7ac8881bba6f8242615382742eb Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 17 Sep 2013 09:25:23 +0930 Subject: virtio: pm: use CONFIG_PM_SLEEP instead of CONFIG_PM The freeze and restore functions defined in virtio drivers are used for suspend and hibernate, so CONFIG_PM_SLEEP is more appropriate than CONFIG_PM. This patch replace all CONFIG_PM with CONFIG_PM_SLEEP for virtio drivers that implement freeze and restore callbacks. Signed-off-by: Aaron Lu Reviewed-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index defec2b3c5a..2ad3d96d4a7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1683,7 +1683,7 @@ static void virtnet_remove(struct virtio_device *vdev) free_netdev(vi->dev); } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; @@ -1766,7 +1766,7 @@ static struct virtio_driver virtio_net_driver = { .probe = virtnet_probe, .remove = virtnet_remove, .config_changed = virtnet_config_changed, -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP .freeze = virtnet_freeze, .restore = virtnet_restore, #endif -- cgit v1.2.3-70-g09d2 From 855e0c5288177bcb193f6f6316952d2490478e1c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 14 Oct 2013 18:11:51 +1030 Subject: virtio: use size-based config accessors. This lets the transport do endian conversion if necessary, and insulates the drivers from the difference. Most drivers can use the simple helpers virtio_cread() and virtio_cwrite(). Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 77 ++++++++++++++++++----------------------- drivers/char/virtio_console.c | 15 +++----- drivers/net/caif/caif_virtio.c | 23 ++++++------ drivers/net/virtio_net.c | 28 +++++++++------ drivers/scsi/virtio_scsi.c | 12 +++---- drivers/virtio/virtio_balloon.c | 10 +++--- net/9p/trans_virtio.c | 9 +++-- 7 files changed, 79 insertions(+), 95 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 89245b5adca..6b66252fc4e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -456,18 +456,15 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { struct virtio_blk *vblk = bd->bd_disk->private_data; - struct virtio_blk_geometry vgeo; - int err; /* see if the host passed in geometry config */ - err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, - offsetof(struct virtio_blk_config, geometry), - &vgeo); - - if (!err) { - geo->heads = vgeo.heads; - geo->sectors = vgeo.sectors; - geo->cylinders = vgeo.cylinders; + if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { + virtio_cread(vblk->vdev, struct virtio_blk_config, + geometry.cylinders, &geo->cylinders); + virtio_cread(vblk->vdev, struct virtio_blk_config, + geometry.heads, &geo->heads); + virtio_cread(vblk->vdev, struct virtio_blk_config, + geometry.sectors, &geo->sectors); } else { /* some standard values, similar to sd */ geo->heads = 1 << 6; @@ -529,8 +526,7 @@ static void virtblk_config_changed_work(struct work_struct *work) goto done; /* Host must always specify the capacity. */ - vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), - &capacity, sizeof(capacity)); + virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); /* If capacity is too big, truncate with warning. */ if ((sector_t)capacity != capacity) { @@ -608,9 +604,9 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev) u8 writeback; int err; - err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE, - offsetof(struct virtio_blk_config, wce), - &writeback); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, + struct virtio_blk_config, wce, + &writeback); if (err) writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); @@ -642,7 +638,6 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, struct virtio_blk *vblk = disk->private_data; struct virtio_device *vdev = vblk->vdev; int i; - u8 writeback; BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; ) @@ -652,11 +647,7 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, if (i < 0) return -EINVAL; - writeback = i; - vdev->config->set(vdev, - offsetof(struct virtio_blk_config, wce), - &writeback, sizeof(writeback)); - + virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); virtblk_update_cache_mode(vdev); return count; } @@ -699,9 +690,9 @@ static int virtblk_probe(struct virtio_device *vdev) index = err; /* We need to know how many segments before we allocate. */ - err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, - offsetof(struct virtio_blk_config, seg_max), - &sg_elems); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, + struct virtio_blk_config, seg_max, + &sg_elems); /* We need at least one SG element, whatever they say. */ if (err || !sg_elems) @@ -772,8 +763,7 @@ static int virtblk_probe(struct virtio_device *vdev) set_disk_ro(vblk->disk, 1); /* Host must always specify the capacity. */ - vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), - &cap, sizeof(cap)); + virtio_cread(vdev, struct virtio_blk_config, capacity, &cap); /* If capacity is too big, truncate with warning. */ if ((sector_t)cap != cap) { @@ -794,46 +784,45 @@ static int virtblk_probe(struct virtio_device *vdev) /* Host can optionally specify maximum segment size and number of * segments. */ - err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, - offsetof(struct virtio_blk_config, size_max), - &v); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, + struct virtio_blk_config, size_max, &v); if (!err) blk_queue_max_segment_size(q, v); else blk_queue_max_segment_size(q, -1U); /* Host can optionally specify the block size of the device */ - err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, - offsetof(struct virtio_blk_config, blk_size), - &blk_size); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, + struct virtio_blk_config, blk_size, + &blk_size); if (!err) blk_queue_logical_block_size(q, blk_size); else blk_size = queue_logical_block_size(q); /* Use topology information if available */ - err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, - offsetof(struct virtio_blk_config, physical_block_exp), - &physical_block_exp); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, + struct virtio_blk_config, physical_block_exp, + &physical_block_exp); if (!err && physical_block_exp) blk_queue_physical_block_size(q, blk_size * (1 << physical_block_exp)); - err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, - offsetof(struct virtio_blk_config, alignment_offset), - &alignment_offset); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, + struct virtio_blk_config, alignment_offset, + &alignment_offset); if (!err && alignment_offset) blk_queue_alignment_offset(q, blk_size * alignment_offset); - err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, - offsetof(struct virtio_blk_config, min_io_size), - &min_io_size); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, + struct virtio_blk_config, min_io_size, + &min_io_size); if (!err && min_io_size) blk_queue_io_min(q, blk_size * min_io_size); - err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, - offsetof(struct virtio_blk_config, opt_io_size), - &opt_io_size); + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, + struct virtio_blk_config, opt_io_size, + &opt_io_size); if (!err && opt_io_size) blk_queue_io_opt(q, blk_size * opt_io_size); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 862fd54a0ff..2a8d9a7a183 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1837,12 +1837,8 @@ static void config_intr(struct virtio_device *vdev) struct port *port; u16 rows, cols; - vdev->config->get(vdev, - offsetof(struct virtio_console_config, cols), - &cols, sizeof(u16)); - vdev->config->get(vdev, - offsetof(struct virtio_console_config, rows), - &rows, sizeof(u16)); + virtio_cread(vdev, struct virtio_console_config, cols, &cols); + virtio_cread(vdev, struct virtio_console_config, rows, &rows); port = find_port_by_id(portdev, 0); set_console_size(port, rows, cols); @@ -2014,10 +2010,9 @@ static int virtcons_probe(struct virtio_device *vdev) /* Don't test MULTIPORT at all if we're rproc: not a valid feature! */ if (!is_rproc_serial(vdev) && - virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, - offsetof(struct virtio_console_config, - max_nr_ports), - &portdev->config.max_nr_ports) == 0) { + virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT, + struct virtio_console_config, max_nr_ports, + &portdev->config.max_nr_ports) == 0) { multiport = true; } diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index b9ed1288ce2..985608634f8 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -686,18 +686,19 @@ static int cfv_probe(struct virtio_device *vdev) goto err; /* Get the CAIF configuration from virtio config space, if available */ -#define GET_VIRTIO_CONFIG_OPS(_v, _var, _f) \ - ((_v)->config->get(_v, offsetof(struct virtio_caif_transf_config, _f), \ - &_var, \ - FIELD_SIZEOF(struct virtio_caif_transf_config, _f))) - if (vdev->config->get) { - GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_hr, headroom); - GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_hr, headroom); - GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_tr, tailroom); - GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_tr, tailroom); - GET_VIRTIO_CONFIG_OPS(vdev, cfv->mtu, mtu); - GET_VIRTIO_CONFIG_OPS(vdev, cfv->mru, mtu); + virtio_cread(vdev, struct virtio_caif_transf_config, headroom, + &cfv->tx_hr); + virtio_cread(vdev, struct virtio_caif_transf_config, headroom, + &cfv->rx_hr); + virtio_cread(vdev, struct virtio_caif_transf_config, tailroom, + &cfv->tx_tr); + virtio_cread(vdev, struct virtio_caif_transf_config, tailroom, + &cfv->rx_tr); + virtio_cread(vdev, struct virtio_caif_transf_config, mtu, + &cfv->mtu); + virtio_cread(vdev, struct virtio_caif_transf_config, mtu, + &cfv->mru); } else { cfv->tx_hr = CFV_DEF_HEADROOM; cfv->rx_hr = CFV_DEF_HEADROOM; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2ad3d96d4a7..ee022714ead 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -852,8 +852,13 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) return -EINVAL; } } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { - vdev->config->set(vdev, offsetof(struct virtio_net_config, mac), - addr->sa_data, dev->addr_len); + unsigned int i; + + /* Naturally, this has an atomicity problem. */ + for (i = 0; i < dev->addr_len; i++) + virtio_cwrite8(vdev, + offsetof(struct virtio_net_config, mac) + + i, addr->sa_data[i]); } eth_commit_mac_addr_change(dev, p); @@ -1266,9 +1271,8 @@ static void virtnet_config_changed_work(struct work_struct *work) if (!vi->config_enable) goto done; - if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS, - offsetof(struct virtio_net_config, status), - &v) < 0) + if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, + struct virtio_net_config, status, &v) < 0) goto done; if (v & VIRTIO_NET_S_ANNOUNCE) { @@ -1490,9 +1494,9 @@ static int virtnet_probe(struct virtio_device *vdev) u16 max_queue_pairs; /* Find if host supports multiqueue virtio_net device */ - err = virtio_config_val(vdev, VIRTIO_NET_F_MQ, - offsetof(struct virtio_net_config, - max_virtqueue_pairs), &max_queue_pairs); + err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, + struct virtio_net_config, + max_virtqueue_pairs, &max_queue_pairs); /* We need at least 2 queue's */ if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || @@ -1544,9 +1548,11 @@ static int virtnet_probe(struct virtio_device *vdev) dev->vlan_features = dev->features; /* Configuration may specify what MAC to use. Otherwise random. */ - if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC, - offsetof(struct virtio_net_config, mac), - dev->dev_addr, dev->addr_len) < 0) + if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) + virtio_cread_bytes(vdev, + offsetof(struct virtio_net_config, mac), + dev->dev_addr, dev->addr_len); + else eth_hw_addr_random(dev); /* Set up our device-specific information */ diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 2a110391f8c..e6bb2352df4 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -710,19 +710,15 @@ static struct scsi_host_template virtscsi_host_template_multi = { #define virtscsi_config_get(vdev, fld) \ ({ \ typeof(((struct virtio_scsi_config *)0)->fld) __val; \ - vdev->config->get(vdev, \ - offsetof(struct virtio_scsi_config, fld), \ - &__val, sizeof(__val)); \ + virtio_cread(vdev, struct virtio_scsi_config, fld, &__val); \ __val; \ }) #define virtscsi_config_set(vdev, fld, val) \ - (void)({ \ + do { \ typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \ - vdev->config->set(vdev, \ - offsetof(struct virtio_scsi_config, fld), \ - &__val, sizeof(__val)); \ - }) + virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \ + } while(0) static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) { diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index d6f68164160..c444654fc33 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -275,9 +275,8 @@ static inline s64 towards_target(struct virtio_balloon *vb) __le32 v; s64 target; - vb->vdev->config->get(vb->vdev, - offsetof(struct virtio_balloon_config, num_pages), - &v, sizeof(v)); + virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v); + target = le32_to_cpu(v); return target - vb->num_pages; } @@ -286,9 +285,8 @@ static void update_balloon_size(struct virtio_balloon *vb) { __le32 actual = cpu_to_le32(vb->num_pages); - vb->vdev->config->set(vb->vdev, - offsetof(struct virtio_balloon_config, actual), - &actual, sizeof(actual)); + virtio_cwrite(vb->vdev, struct virtio_balloon_config, num_pages, + &actual); } static int balloon(void *_vballoon) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 990afab2be1..9c5a1aa34d1 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -544,9 +544,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->inuse = false; if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { - vdev->config->get(vdev, - offsetof(struct virtio_9p_config, tag_len), - &tag_len, sizeof(tag_len)); + virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len); } else { err = -EINVAL; goto out_free_vq; @@ -556,8 +554,9 @@ static int p9_virtio_probe(struct virtio_device *vdev) err = -ENOMEM; goto out_free_vq; } - vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), - tag, tag_len); + + virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); chan->tag = tag; chan->tag_len = tag_len; err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); -- cgit v1.2.3-70-g09d2 From 3ab098df35f8b98b6553edc2e40234af512ba877 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Oct 2013 11:18:58 +0800 Subject: virtio-net: don't respond to cpu hotplug notifier if we're not ready We're trying to re-configure the affinity unconditionally in cpu hotplug callback. This may lead the issue during resuming from s3/s4 since - virt queues haven't been allocated at that time. - it's unnecessary since thaw method will re-configure the affinity. Fix this issue by checking the config_enable and do nothing is we're not ready. The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17 (virtio-net: reset virtqueue affinity when doing cpu hotplug). Cc: Rusty Russell Cc: Michael S. Tsirkin Cc: Wanlong Gao Acked-by: Michael S. Tsirkin Reviewed-by: Wanlong Gao Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index defec2b3c5a..c4bc1cc7ef3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1116,6 +1116,11 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, { struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); + mutex_lock(&vi->config_lock); + + if (!vi->config_enable) + goto done; + switch(action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -1128,6 +1133,9 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, default: break; } + +done: + mutex_unlock(&vi->config_lock); return NOTIFY_OK; } -- cgit v1.2.3-70-g09d2 From 35ed159bfd96a7547ec277ed8b550c7cbd9841b6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Oct 2013 11:18:59 +0800 Subject: virtio-net: refill only when device is up during setting queues We used to schedule the refill work unconditionally after changing the number of queues. This may lead an issue if the device is not up. Since we only try to cancel the work in ndo_stop(), this may cause the refill work still work after removing the device. Fix this by only schedule the work when device is up. The bug were introduce by commit 9b9cd8024a2882e896c65222aa421d461354e3f2. (virtio-net: fix the race between channels setting and refill) Cc: Rusty Russell Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c4bc1cc7ef3..9fbdfcd1e1a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -938,7 +938,9 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) return -EINVAL; } else { vi->curr_queue_pairs = queue_pairs; - schedule_delayed_work(&vi->refill, 0); + /* virtnet_open() will refill when device is going to up. */ + if (dev->flags & IFF_UP) + schedule_delayed_work(&vi->refill, 0); } return 0; @@ -1741,7 +1743,9 @@ static int virtnet_restore(struct virtio_device *vdev) vi->config_enable = true; mutex_unlock(&vi->config_lock); + rtnl_lock(); virtnet_set_queues(vi, vi->curr_queue_pairs); + rtnl_unlock(); return 0; } -- cgit v1.2.3-70-g09d2 From 67975901183799af8e93ec60e322f9e2a1940b9b Mon Sep 17 00:00:00 2001 From: Heinz Graalfs Date: Tue, 29 Oct 2013 09:40:02 +1030 Subject: virtio_net: verify if virtqueue_kick() succeeded Verify if a host kick succeeded by checking return value of virtqueue_kick(). Signed-off-by: Heinz Graalfs Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ee022714ead..ff208d2787d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -545,7 +545,8 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) } while (rq->vq->num_free); if (unlikely(rq->num > rq->max)) rq->max = rq->num; - virtqueue_kick(rq->vq); + if (unlikely(!virtqueue_kick(rq->vq))) + return false; return !oom; } @@ -751,7 +752,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) err = xmit_skb(sq, skb); /* This should not happen! */ - if (unlikely(err)) { + if (unlikely(err) || unlikely(!virtqueue_kick(sq->vq))) { dev->stats.tx_fifo_errors++; if (net_ratelimit()) dev_warn(&dev->dev, @@ -760,7 +761,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); return NETDEV_TX_OK; } - virtqueue_kick(sq->vq); /* Don't wait up for transmitted skbs to be freed. */ skb_orphan(skb); @@ -819,7 +819,8 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC) < 0); - virtqueue_kick(vi->cvq); + if (unlikely(!virtqueue_kick(vi->cvq))) + return status == VIRTIO_NET_OK; /* Spin for a response, the kick causes an ioport write, trapping * into the hypervisor, so the request should be handled immediately. -- cgit v1.2.3-70-g09d2 From 047b9b94951dba2e93c65a582ae2bce25c960b86 Mon Sep 17 00:00:00 2001 From: Heinz Graalfs Date: Tue, 29 Oct 2013 09:40:47 +1030 Subject: virtio_net: verify if queue is broken after virtqueue_get_buf() If a virtqueue_get_buf() call returns a NULL pointer a possibly endless while loop should be avoided by checking for a broken virtqueue. Signed-off-by: Heinz Graalfs Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ff208d2787d..057ea133cd2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -825,7 +825,8 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, /* Spin for a response, the kick causes an ioport write, trapping * into the hypervisor, so the request should be handled immediately. */ - while (!virtqueue_get_buf(vi->cvq, &tmp)) + while (!virtqueue_get_buf(vi->cvq, &tmp) && + !virtqueue_is_broken(vi->cvq)) cpu_relax(); return status == VIRTIO_NET_OK; -- cgit v1.2.3-70-g09d2 From 2613af0ed18a11d5c566a81f9a6510b73180660a Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Mon, 28 Oct 2013 15:44:18 -0700 Subject: virtio_net: migrate mergeable rx buffers to page frag allocators The virtio_net driver's mergeable receive buffer allocator uses 4KB packet buffers. For MTU-sized traffic, SKB truesize is > 4KB but only ~1500 bytes of the buffer is used to store packet data, reducing the effective TCP window size substantially. This patch addresses the performance concerns with mergeable receive buffers by allocating MTU-sized packet buffers using page frag allocators. If more than MAX_SKB_FRAGS buffers are needed, the SKB frag_list is used. Signed-off-by: Michael Dalton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 164 ++++++++++++++++++++++++++++++----------------- 1 file changed, 106 insertions(+), 58 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd1e1a..113ee93dbb2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -124,6 +124,11 @@ struct virtnet_info { /* Lock for config space updates */ struct mutex config_lock; + /* Page_frag for GFP_KERNEL packet buffer allocation when we run + * low on memory. + */ + struct page_frag alloc_frag; + /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; @@ -217,33 +222,18 @@ static void skb_xmit_done(struct virtqueue *vq) netif_wake_subqueue(vi->dev, vq2txq(vq)); } -static void set_skb_frag(struct sk_buff *skb, struct page *page, - unsigned int offset, unsigned int *len) -{ - int size = min((unsigned)PAGE_SIZE - offset, *len); - int i = skb_shinfo(skb)->nr_frags; - - __skb_fill_page_desc(skb, i, page, offset, size); - - skb->data_len += size; - skb->len += size; - skb->truesize += PAGE_SIZE; - skb_shinfo(skb)->nr_frags++; - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; - *len -= size; -} - /* Called from bottom half context */ static struct sk_buff *page_to_skb(struct receive_queue *rq, - struct page *page, unsigned int len) + struct page *page, unsigned int offset, + unsigned int len, unsigned int truesize) { struct virtnet_info *vi = rq->vq->vdev->priv; struct sk_buff *skb; struct skb_vnet_hdr *hdr; - unsigned int copy, hdr_len, offset; + unsigned int copy, hdr_len, hdr_padded_len; char *p; - p = page_address(page); + p = page_address(page) + offset; /* copy small packet so we can reuse these pages for small data */ skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); @@ -254,16 +244,17 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, if (vi->mergeable_rx_bufs) { hdr_len = sizeof hdr->mhdr; - offset = hdr_len; + hdr_padded_len = sizeof hdr->mhdr; } else { hdr_len = sizeof hdr->hdr; - offset = sizeof(struct padded_vnet_hdr); + hdr_padded_len = sizeof(struct padded_vnet_hdr); } memcpy(hdr, p, hdr_len); len -= hdr_len; - p += offset; + offset += hdr_padded_len; + p += hdr_padded_len; copy = len; if (copy > skb_tailroom(skb)) @@ -273,6 +264,14 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, len -= copy; offset += copy; + if (vi->mergeable_rx_bufs) { + if (len) + skb_add_rx_frag(skb, 0, page, offset, len, truesize); + else + put_page(page); + return skb; + } + /* * Verify that we can indeed put this data into a skb. * This is here to handle cases when the device erroneously @@ -284,9 +283,12 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, dev_kfree_skb(skb); return NULL; } - + BUG_ON(offset >= PAGE_SIZE); while (len) { - set_skb_frag(skb, page, offset, &len); + unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, + frag_size, truesize); + len -= frag_size; page = (struct page *)page->private; offset = 0; } @@ -297,33 +299,52 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } -static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb) +static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) { - struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb); + struct sk_buff *curr_skb = head_skb; + char *buf; struct page *page; - int num_buf, i, len; + int num_buf, len; num_buf = hdr->mhdr.num_buffers; while (--num_buf) { - i = skb_shinfo(skb)->nr_frags; - if (i >= MAX_SKB_FRAGS) { - pr_debug("%s: packet too long\n", skb->dev->name); - skb->dev->stats.rx_length_errors++; - return -EINVAL; - } - page = virtqueue_get_buf(rq->vq, &len); - if (!page) { + int num_skb_frags = skb_shinfo(curr_skb)->nr_frags; + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", - skb->dev->name, hdr->mhdr.num_buffers); - skb->dev->stats.rx_length_errors++; + head_skb->dev->name, hdr->mhdr.num_buffers); + head_skb->dev->stats.rx_length_errors++; return -EINVAL; } - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - - set_skb_frag(skb, page, 0, &len); - + if (unlikely(len > MAX_PACKET_LEN)) { + pr_debug("%s: rx error: merge buffer too long\n", + head_skb->dev->name); + len = MAX_PACKET_LEN; + } + if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { + struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); + if (unlikely(!nskb)) { + head_skb->dev->stats.rx_dropped++; + return -ENOMEM; + } + if (curr_skb == head_skb) + skb_shinfo(curr_skb)->frag_list = nskb; + else + curr_skb->next = nskb; + curr_skb = nskb; + head_skb->truesize += nskb->truesize; + num_skb_frags = 0; + } + if (curr_skb != head_skb) { + head_skb->data_len += len; + head_skb->len += len; + head_skb->truesize += MAX_PACKET_LEN; + } + page = virt_to_head_page(buf); + skb_add_rx_frag(curr_skb, num_skb_frags, page, + buf - (char *)page_address(page), len, + MAX_PACKET_LEN); --rq->num; } return 0; @@ -341,8 +362,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; - if (vi->mergeable_rx_bufs || vi->big_packets) + if (vi->big_packets) give_pages(rq, buf); + else if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); else dev_kfree_skb(buf); return; @@ -352,19 +375,28 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) skb = buf; len -= sizeof(struct virtio_net_hdr); skb_trim(skb, len); + } else if (vi->mergeable_rx_bufs) { + struct page *page = virt_to_head_page(buf); + skb = page_to_skb(rq, page, + (char *)buf - (char *)page_address(page), + len, MAX_PACKET_LEN); + if (unlikely(!skb)) { + dev->stats.rx_dropped++; + put_page(page); + return; + } + if (receive_mergeable(rq, skb)) { + dev_kfree_skb(skb); + return; + } } else { page = buf; - skb = page_to_skb(rq, page, len); + skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); if (unlikely(!skb)) { dev->stats.rx_dropped++; give_pages(rq, page); return; } - if (vi->mergeable_rx_bufs) - if (receive_mergeable(rq, skb)) { - dev_kfree_skb(skb); - return; - } } hdr = skb_vnet_hdr(skb); @@ -501,18 +533,28 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) { - struct page *page; + struct virtnet_info *vi = rq->vq->vdev->priv; + char *buf = NULL; int err; - page = get_a_page(rq, gfp); - if (!page) + if (gfp & __GFP_WAIT) { + if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag, + gfp)) { + buf = (char *)page_address(vi->alloc_frag.page) + + vi->alloc_frag.offset; + get_page(vi->alloc_frag.page); + vi->alloc_frag.offset += MAX_PACKET_LEN; + } + } else { + buf = netdev_alloc_frag(MAX_PACKET_LEN); + } + if (!buf) return -ENOMEM; - sg_init_one(rq->sg, page_address(page), PAGE_SIZE); - - err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp); + sg_init_one(rq->sg, buf, MAX_PACKET_LEN); + err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); if (err < 0) - give_pages(rq, page); + put_page(virt_to_head_page(buf)); return err; } @@ -1343,8 +1385,10 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->mergeable_rx_bufs || vi->big_packets) + if (vi->big_packets) give_pages(&vi->rq[i], buf); + else if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); else dev_kfree_skb(buf); --vi->rq[i].num; @@ -1650,6 +1694,8 @@ free_recv_bufs: free_vqs: cancel_delayed_work_sync(&vi->refill); virtnet_del_vqs(vi); + if (vi->alloc_frag.page) + put_page(vi->alloc_frag.page); free_index: free_percpu(vi->vq_index); free_stats: @@ -1685,6 +1731,8 @@ static void virtnet_remove(struct virtio_device *vdev) unregister_netdev(vi->dev); remove_vq_common(vi); + if (vi->alloc_frag.page) + put_page(vi->alloc_frag.page); flush_work(&vi->config_work); -- cgit v1.2.3-70-g09d2 From ec9debbd9a88d8ea86c488d6ffcac419ee7d46d9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 29 Oct 2013 15:11:07 +0800 Subject: virtio-net: correctly handle cpu hotplug notifier during resuming commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug notifier by checking the config_enable and does nothing is it was false. So it need to try to hold the config_lock mutex which may happen in atomic environment which leads the following warnings: [ 622.944441] CPU0 attaching NULL sched-domain. [ 622.944446] CPU1 attaching NULL sched-domain. [ 622.944485] CPU0 attaching NULL sched-domain. [ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616 [ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 [ 622.950796] no locks held by migration/1/10. [ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317 [ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 622.950802] 0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70 [ 622.950803] ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed [ 622.950805] 0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000 [ 622.950805] Call Trace: [ 622.950810] [] dump_stack+0x54/0x74 [ 622.950815] [] __might_sleep+0x112/0x114 [ 622.950817] [] mutex_lock_nested+0x3c/0x3c6 [ 622.950818] [] ? up+0x39/0x3e [ 622.950821] [] ? acpi_os_signal_semaphore+0x21/0x2d [ 622.950824] [] ? acpi_ut_release_mutex+0x5e/0x62 [ 622.950828] [] virtnet_cpu_callback+0x33/0x87 [ 622.950830] [] notifier_call_chain+0x3c/0x5e [ 622.950832] [] __raw_notifier_call_chain+0xe/0x10 [ 622.950835] [] __cpu_notify+0x20/0x37 [ 622.950836] [] cpu_notify+0x13/0x15 [ 622.950838] [] take_cpu_down+0x27/0x3a [ 622.950841] [] stop_machine_cpu_stop+0x93/0xf1 [ 622.950842] [] cpu_stopper_thread+0xa0/0x12f [ 622.950844] [] ? cpu_stopper_thread+0x12f/0x12f [ 622.950847] [] ? lock_release_holdtime.part.7+0xa3/0xa8 [ 622.950848] [] ? cpu_stop_should_run+0x3f/0x47 [ 622.950850] [] smpboot_thread_fn+0x1c5/0x1e3 [ 622.950852] [] ? lg_global_unlock+0x67/0x67 [ 622.950854] [] kthread+0xd8/0xe0 [ 622.950857] [] ? wait_for_common+0x12f/0x164 [ 622.950859] [] ? kthread_create_on_node+0x124/0x124 [ 622.950861] [] ret_from_fork+0x7c/0xb0 [ 622.950862] [] ? kthread_create_on_node+0x124/0x124 [ 622.950876] smpboot: CPU 1 is now offline [ 623.194556] SMP alternatives: lockdep: fixing up alternatives [ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 ... A correct fix is to unregister the hotcpu notifier during restore and register a new one in resume. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Cc: Wanlong Gao Cc: Rusty Russell Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Reviewed-by: Wanlong Gao Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd1e1a..bbc9cb84ec1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, { struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); - mutex_lock(&vi->config_lock); - - if (!vi->config_enable) - goto done; - switch(action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, break; } -done: - mutex_unlock(&vi->config_lock); return NOTIFY_OK; } @@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_device *vdev) struct virtnet_info *vi = vdev->priv; int i; + unregister_hotcpu_notifier(&vi->nb); + /* Prevent config work handler from accessing the device */ mutex_lock(&vi->config_lock); vi->config_enable = false; @@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio_device *vdev) virtnet_set_queues(vi, vi->curr_queue_pairs); rtnl_unlock(); + err = register_hotcpu_notifier(&vi->nb); + if (err) + return err; + return 0; } #endif -- cgit v1.2.3-70-g09d2 From ba275241030cfe87b87d6592345c7e7ebd9b6fba Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 1 Nov 2013 14:07:48 +0800 Subject: virtio-net: coalesce rx frags when possible during rx Commit 2613af0ed18a11d5c566a81f9a6510b73180660a (virtio_net: migrate mergeable rx buffers to page frag allocators) try to increase the payload/truesize for MTU-sized traffic. But this will introduce the extra overhead for GSO packets received because of the frag list. This commit tries to reduce this issue by coalesce the possible rx frags when possible during rx. Test result shows the about 15% improvement on full size GSO packet receiving (and even better than before commit 2613af0ed18a11d5c566a81f9a6510b73180660a). Before this commit: ./netperf -H 192.168.100.4 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.100.4 () port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 20303.87 After this commit: ./netperf -H 192.168.100.4 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.100.4 () port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 23841.26 Cc: Rusty Russell Cc: Michael S. Tsirkin Cc: Michael Dalton Cc: Eric Dumazet Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: Jason Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 656a02e28e2..a7e9ad9b213 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -305,7 +305,7 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) struct sk_buff *curr_skb = head_skb; char *buf; struct page *page; - int num_buf, len; + int num_buf, len, offset; num_buf = hdr->mhdr.num_buffers; while (--num_buf) { @@ -342,9 +342,16 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) head_skb->truesize += MAX_PACKET_LEN; } page = virt_to_head_page(buf); - skb_add_rx_frag(curr_skb, num_skb_frags, page, - buf - (char *)page_address(page), len, - MAX_PACKET_LEN); + offset = buf - (char *)page_address(page); + if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { + put_page(page); + skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, + len, MAX_PACKET_LEN); + } else { + skb_add_rx_frag(curr_skb, num_skb_frags, page, + offset, len, + MAX_PACKET_LEN); + } --rq->num; } return 0; -- cgit v1.2.3-70-g09d2 From 9bb8ca86075f37d3c169b9c46f8e7c6d3165e18f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 5 Nov 2013 18:19:45 +0800 Subject: virtio-net: switch to use XPS to choose txq We used to use a percpu structure vq_index to record the cpu to queue mapping, this is suboptimal since it duplicates the work of XPS and loses all other XPS functionality such as allowing user to configure their own transmission steering strategy. So this patch switches to use XPS and suggest a default mapping when the number of cpus is equal to the number of queues. With XPS support, there's no need for keeping per-cpu vq_index and .ndo_select_queue(), so they were removed also. Cc: Rusty Russell Cc: Michael S. Tsirkin Acked-by: Rusty Russell Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 48 ++---------------------------------------------- 1 file changed, 2 insertions(+), 46 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a7e9ad9b213..01f4eb5c8b7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -132,9 +132,6 @@ struct virtnet_info { /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; - /* Per-cpu variable to show the mapping from CPU to virtqueue */ - int __percpu *vq_index; - /* CPU hot plug notifier */ struct notifier_block nb; }; @@ -1114,7 +1111,6 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) { int i; - int cpu; if (vi->affinity_hint_set) { for (i = 0; i < vi->max_queue_pairs; i++) { @@ -1124,16 +1120,6 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) vi->affinity_hint_set = false; } - - i = 0; - for_each_online_cpu(cpu) { - if (cpu == hcpu) { - *per_cpu_ptr(vi->vq_index, cpu) = -1; - } else { - *per_cpu_ptr(vi->vq_index, cpu) = - ++i % vi->curr_queue_pairs; - } - } } static void virtnet_set_affinity(struct virtnet_info *vi) @@ -1155,7 +1141,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) for_each_online_cpu(cpu) { virtqueue_set_affinity(vi->rq[i].vq, cpu); virtqueue_set_affinity(vi->sq[i].vq, cpu); - *per_cpu_ptr(vi->vq_index, cpu) = i; + netif_set_xps_queue(vi->dev, cpumask_of(cpu), i); i++; } @@ -1269,28 +1255,6 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) return 0; } -/* To avoid contending a lock hold by a vcpu who would exit to host, select the - * txq based on the processor id. - */ -static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) -{ - int txq; - struct virtnet_info *vi = netdev_priv(dev); - - if (skb_rx_queue_recorded(skb)) { - txq = skb_get_rx_queue(skb); - } else { - txq = *__this_cpu_ptr(vi->vq_index); - if (txq == -1) - txq = 0; - } - - while (unlikely(txq >= dev->real_num_tx_queues)) - txq -= dev->real_num_tx_queues; - - return txq; -} - static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -1302,7 +1266,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, - .ndo_select_queue = virtnet_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = virtnet_netpoll, #endif @@ -1613,10 +1576,6 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; - vi->vq_index = alloc_percpu(int); - if (vi->vq_index == NULL) - goto free_stats; - mutex_init(&vi->config_lock); vi->config_enable = true; INIT_WORK(&vi->config_work, virtnet_config_changed_work); @@ -1643,7 +1602,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) - goto free_index; + goto free_stats; netif_set_real_num_tx_queues(dev, 1); netif_set_real_num_rx_queues(dev, 1); @@ -1696,8 +1655,6 @@ free_vqs: virtnet_del_vqs(vi); if (vi->alloc_frag.page) put_page(vi->alloc_frag.page); -free_index: - free_percpu(vi->vq_index); free_stats: free_percpu(vi->stats); free: @@ -1736,7 +1693,6 @@ static void virtnet_remove(struct virtio_device *vdev) flush_work(&vi->config_work); - free_percpu(vi->vq_index); free_percpu(vi->stats); free_netdev(vi->dev); } -- cgit v1.2.3-70-g09d2 From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:58 -0700 Subject: net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz Acked-by: Julian Anastasov Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Jesse Gross Cc: Mathieu Desnoyers Cc: "Michael S. Tsirkin" Cc: Mirko Lindner Cc: Patrick McHardy Cc: Roger Luethi Cc: Rusty Russell Cc: Simon Horman Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Petazzoni Cc: Wensong Zhang Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/net/dummy.c | 6 ++++++ drivers/net/ethernet/emulex/benet/be_main.c | 4 ++++ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ drivers/net/ethernet/marvell/mvneta.c | 3 +++ drivers/net/ethernet/marvell/sky2.c | 3 +++ drivers/net/ethernet/neterion/vxge/vxge-main.c | 4 ++++ drivers/net/ethernet/nvidia/forcedeth.c | 2 ++ drivers/net/ethernet/realtek/8139too.c | 3 +++ drivers/net/ethernet/tile/tilepro.c | 2 ++ drivers/net/ethernet/via/via-rhine.c | 3 +++ drivers/net/ifb.c | 5 +++++ drivers/net/loopback.c | 6 ++++++ drivers/net/macvlan.c | 7 +++++++ drivers/net/nlmon.c | 8 ++++++++ drivers/net/team/team.c | 6 ++++++ drivers/net/team/team_mode_loadbalance.c | 9 ++++++++- drivers/net/veth.c | 8 ++++++++ drivers/net/virtio_net.c | 8 ++++++++ drivers/net/vxlan.c | 8 ++++++++ drivers/net/xen-netfront.c | 6 ++++++ include/linux/u64_stats_sync.h | 7 +++++++ net/8021q/vlan_dev.c | 9 ++++++++- net/bridge/br_device.c | 7 +++++++ net/ipv4/af_inet.c | 14 ++++++++++++++ net/ipv4/ip_tunnel.c | 8 +++++++- net/ipv6/addrconf.c | 14 ++++++++++++++ net/ipv6/af_inet6.c | 14 ++++++++++++++ net/ipv6/ip6_gre.c | 15 +++++++++++++++ net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/sit.c | 15 +++++++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 25 ++++++++++++++++++++++--- net/openvswitch/datapath.c | 6 ++++++ net/openvswitch/vport.c | 8 ++++++++ 34 files changed, 253 insertions(+), 6 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index b710c6b2d65..bd8f84b0b89 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -88,10 +88,16 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) static int dummy_dev_init(struct net_device *dev) { + int i; dev->dstats = alloc_percpu(struct pcpu_dstats); if (!dev->dstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_dstats *dstats; + dstats = per_cpu_ptr(dev->dstats, i); + u64_stats_init(&dstats->syncp); + } return 0; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 2c38cc40211..edd75950855 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2047,6 +2047,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) if (status) return status; + u64_stats_init(&txo->stats.sync); + u64_stats_init(&txo->stats.sync_compl); + /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ @@ -2108,6 +2111,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) if (rc) return rc; + u64_stats_init(&rxo->stats.sync); eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; rc = be_cmd_cq_create(adapter, cq, eq, false, 3); if (rc) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8cf44f2a8cc..b6edb93a8fc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1223,6 +1223,9 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + u64_stats_init(&ring->tx_syncp); + u64_stats_init(&ring->tx_syncp2); + /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -1256,6 +1259,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->rx_ring_count; ring->queue_index = rxr_idx; + u64_stats_init(&ring->rx_syncp); + /* assign ring to adapter */ adapter->rx_ring[rxr_idx] = ring; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0ade0cd5ef5..c1750364ddc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4867,6 +4867,8 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); @@ -4949,6 +4951,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e35bac7cfdf..cb4635c0cd7 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2792,6 +2792,9 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); + u64_stats_init(&pp->tx_stats.syncp); + u64_stats_init(&pp->rx_stats.syncp); + pp->weight = MVNETA_RX_POLL_WEIGHT; pp->phy_node = phy_node; pp->phy_interface = phy_mode; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index e09a8c6f853..339d841a538 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4763,6 +4763,9 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, sky2->hw = hw; sky2->msg_enable = netif_msg_init(debug, default_msg); + u64_stats_init(&sky2->tx_stats.syncp); + u64_stats_init(&sky2->rx_stats.syncp); + /* Auto speed and flow control */ sky2->flags = SKY2_FLAG_AUTO_SPEED | SKY2_FLAG_AUTO_PAUSE; if (hw->chip_id != CHIP_ID_YUKON_XL) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5a20eaf903d..44626ec1127 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2072,6 +2072,10 @@ static int vxge_open_vpaths(struct vxgedev *vdev) vdev->config.tx_steering_type; vpath->fifo.ndev = vdev->ndev; vpath->fifo.pdev = vdev->pdev; + + u64_stats_init(&vpath->fifo.stats.syncp); + u64_stats_init(&vpath->ring.stats.syncp); + if (vdev->config.tx_steering_type) vpath->fifo.txq = netdev_get_tx_queue(vdev->ndev, i); diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 098b96dad66..2d045be4b5c 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5619,6 +5619,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) spin_lock_init(&np->lock); spin_lock_init(&np->hwstats_lock); SET_NETDEV_DEV(dev, &pci_dev->dev); + u64_stats_init(&np->swstats_rx_syncp); + u64_stats_init(&np->swstats_tx_syncp); init_timer(&np->oom_kick); np->oom_kick.data = (unsigned long) dev; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 3ccedeb8aba..c40e984868a 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -791,6 +791,9 @@ static struct net_device *rtl8139_init_board(struct pci_dev *pdev) pci_set_master (pdev); + u64_stats_init(&tp->rx_stats.syncp); + u64_stats_init(&tp->tx_stats.syncp); + retry: /* PIO bar register comes first. */ bar = !use_io; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 106be47716e..edb2e12a0fe 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -1008,6 +1008,8 @@ static void tile_net_register(void *dev_ptr) info->egress_timer.data = (long)info; info->egress_timer.function = tile_net_handle_egress_timer; + u64_stats_init(&info->stats.syncp); + priv->cpu[my_cpu] = info; /* diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index bdf697b184a..dd99715e664 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -987,6 +987,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rp->base = ioaddr; + u64_stats_init(&rp->tx_stats.syncp); + u64_stats_init(&rp->rx_stats.syncp); + /* Get chip registers into a sane state */ rhine_power_init(dev); rhine_hw_init(dev, pioaddr); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index a3bed28197d..c14d39bf32d 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -265,6 +265,7 @@ MODULE_PARM_DESC(numifbs, "Number of ifb devices"); static int __init ifb_init_one(int index) { struct net_device *dev_ifb; + struct ifb_private *dp; int err; dev_ifb = alloc_netdev(sizeof(struct ifb_private), @@ -273,6 +274,10 @@ static int __init ifb_init_one(int index) if (!dev_ifb) return -ENOMEM; + dp = netdev_priv(dev_ifb); + u64_stats_init(&dp->rsync); + u64_stats_init(&dp->tsync); + dev_ifb->rtnl_link_ops = &ifb_link_ops; err = register_netdevice(dev_ifb); if (err < 0) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a17d85a331f..ac24c27b4b2 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -137,10 +137,16 @@ static const struct ethtool_ops loopback_ethtool_ops = { static int loopback_dev_init(struct net_device *dev) { + int i; dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_lstats *lb_stats; + lb_stats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&lb_stats->syncp); + } return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9bf46bd19b8..0924e51b9ee 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -501,6 +501,7 @@ static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); const struct net_device *lowerdev = vlan->lowerdev; + int i; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); @@ -516,6 +517,12 @@ static int macvlan_init(struct net_device *dev) if (!vlan->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct macvlan_pcpu_stats *mvlstats; + mvlstats = per_cpu_ptr(vlan->pcpu_stats, i); + u64_stats_init(&mvlstats->syncp); + } + return 0; } diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index b57ce5f4896..d2bb12bfabd 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -47,8 +47,16 @@ static int nlmon_change_mtu(struct net_device *dev, int new_mtu) static int nlmon_dev_init(struct net_device *dev) { + int i; + dev->lstats = alloc_percpu(struct pcpu_lstats); + for_each_possible_cpu(i) { + struct pcpu_lstats *nlmstats; + nlmstats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&nlmstats->syncp); + } + return dev->lstats == NULL ? -ENOMEM : 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 50e43e64d51..6574eb8766f 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1540,6 +1540,12 @@ static int team_init(struct net_device *dev) if (!team->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct team_pcpu_stats *team_stats; + team_stats = per_cpu_ptr(team->pcpu_stats, i); + u64_stats_init(&team_stats->syncp); + } + for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) INIT_HLIST_HEAD(&team->en_port_hlist[i]); INIT_LIST_HEAD(&team->port_list); diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 829a9cd2b4d..d671fc3ac5a 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -570,7 +570,7 @@ static int lb_init(struct team *team) { struct lb_priv *lb_priv = get_lb_priv(team); lb_select_tx_port_func_t *func; - int err; + int i, err; /* set default tx port selector */ func = lb_select_tx_port_get_func("hash"); @@ -588,6 +588,13 @@ static int lb_init(struct team *team) goto err_alloc_pcpu_stats; } + for_each_possible_cpu(i) { + struct lb_pcpu_stats *team_lb_stats; + team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + u64_stats_init(&team_lb_stats->syncp); + } + + INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index eee1f19ef1e..46e83e3fe99 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -230,10 +230,18 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static int veth_dev_init(struct net_device *dev) { + int i; + dev->vstats = alloc_percpu(struct pcpu_vstats); if (!dev->vstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_vstats *veth_stats; + veth_stats = per_cpu_ptr(dev->vstats, i); + u64_stats_init(&veth_stats->syncp); + } + return 0; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd1e1a..ee384f3d612 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1569,6 +1569,14 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; + for_each_possible_cpu(i) { + struct virtnet_stats *virtnet_stats; + virtnet_stats = per_cpu_ptr(vi->stats, i); + u64_stats_init(&virtnet_stats->tx_syncp); + u64_stats_init(&virtnet_stats->rx_syncp); + } + + vi->vq_index = alloc_percpu(int); if (vi->vq_index == NULL) goto free_stats; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ef5b6219f3..01ab64d5f9a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1884,11 +1884,19 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_sock *vs; + int i; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *vxlan_stats; + vxlan_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&vxlan_stats->syncp); + } + + spin_lock(&vn->sock_lock); vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); if (vs) { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 36808bf2567..54223ac6d8a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1338,6 +1338,12 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) if (np->stats == NULL) goto exit; + for_each_possible_cpu(i) { + struct netfront_stats *xen_nf_stats; + xen_nf_stats = per_cpu_ptr(np->stats, i); + u64_stats_init(&xen_nf_stats->syncp); + } + /* Initialise tx_skbs as a free chain containing every entry. */ np->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 8da8c4e87da..7bfabd20204 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -67,6 +67,13 @@ struct u64_stats_sync { #endif }; + +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +# define u64_stats_init(syncp) seqcount_init(syncp.seq) +#else +# define u64_stats_init(syncp) do { } while (0) +#endif + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09bf1c38805..4deff3ed1da 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -558,7 +558,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; + int subclass = 0, i; netif_carrier_off(dev); @@ -612,6 +612,13 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct vlan_pcpu_stats *vlan_stat; + vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + u64_stats_init(&vlan_stat->syncp); + } + + return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163635d..7893d641775 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -88,11 +88,18 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + int i; br->stats = alloc_percpu(struct br_cpu_netstats); if (!br->stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct br_cpu_netstats *br_dev_stats; + br_dev_stats = per_cpu_ptr(br->stats, i); + u64_stats_init(&br_dev_stats->syncp); + } + return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cfeb85cff4f..5f4617e377b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1518,6 +1518,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) return -ENOMEM; + #if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) { @@ -1561,6 +1562,8 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1569,6 +1572,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet_stats; + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + u64_stats_init(&af_inet_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); + u64_stats_init(&af_inet_stats->syncp); +#endif + } + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib), __alignof__(struct linux_mib)) < 0) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 63a6d6d6b87..caf01176a5e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -976,13 +976,19 @@ int ip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - int err; + int i, err; dev->destructor = ip_tunnel_dev_free; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipt_stats; + ipt_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipt_stats->syncp); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { free_percpu(dev->tstats); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb301da3..d62d589bb62 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -281,10 +281,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { + int i; + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip; + + for_each_possible_cpu(i) { + struct ipstats_mib *addrconf_stats; + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + u64_stats_init(&addrconf_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + u64_stats_init(&addrconf_stats->syncp); +#endif + } + + idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), GFP_KERNEL); if (!idev->stats.icmpv6dev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100b021..a8f8559b3dc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -719,6 +719,8 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof(struct udp_mib), __alignof__(struct udp_mib)) < 0) @@ -731,6 +733,18 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet6_stats; + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + u64_stats_init(&af_inet6_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); + u64_stats_init(&af_inet6_stats->syncp); +#endif + } + + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bf4a9a084de..8acb28621f9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1252,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1269,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tunnel_stats; + ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tunnel_stats->syncp); + } + + return 0; } @@ -1449,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1462,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tap_stats; + ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tap_stats->syncp); + } + return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 583b77e2f69..df1fa58528c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1494,12 +1494,19 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6_tnl_stats; + ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6_tnl_stats->syncp); + } return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 19269453a8e..365dc5473eb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1310,6 +1310,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1322,6 +1323,12 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_tunnel_stats; + ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_tunnel_stats->syncp); + } + return 0; } @@ -1331,6 +1338,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1344,6 +1352,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_fb_stats; + ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_fb_stats->syncp); + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f..3825725907f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; - unsigned int atype; + unsigned int atype, i; EnterFunction(2); @@ -869,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, if (!dest->stats.cpustats) goto err_alloc; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_dest_stats; + ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); + u64_stats_init(&ip_vs_dest_stats->syncp); + } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; @@ -1134,7 +1140,7 @@ static int ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0; + int ret = 0, i; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; @@ -1184,6 +1190,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_stats; + ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); + u64_stats_init(&ip_vs_stats->syncp); + } + + /* I'm the first user of the service */ atomic_set(&svc->refcnt, 0); @@ -3780,7 +3793,7 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct net *net) { - int idx; + int i, idx; struct netns_ipvs *ipvs = net_ipvs(net); /* Initialize rs_table */ @@ -3799,6 +3812,12 @@ int __net_init ip_vs_control_net_init(struct net *net) if (!ipvs->tot_stats.cpustats) return -ENOMEM; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ipvs_tot_stats; + ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); + u64_stats_init(&ipvs_tot_stats->syncp); + } + spin_lock_init(&ipvs->tot_stats.lock); proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd7f2b..b92553c0227 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1698,6 +1698,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + for_each_possible_cpu(i) { + struct dp_stats_percpu *dpath_stats; + dpath_stats = per_cpu_ptr(dp->stats_percpu, i); + u64_stats_init(&dpath_stats->sync); + } + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6f65dbe1381..d830a95f03a 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -118,6 +118,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; + int i; alloc_size = sizeof(struct vport); if (priv_size) { @@ -141,6 +142,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return ERR_PTR(-ENOMEM); } + for_each_possible_cpu(i) { + struct pcpu_tstats *vport_stats; + vport_stats = per_cpu_ptr(vport->percpu_stats, i); + u64_stats_init(&vport_stats->syncp); + } + + spin_lock_init(&vport->stats_lock); return vport; -- cgit v1.2.3-70-g09d2 From 5061de3666382f1f53fab89756792a1b177fd858 Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 14 Nov 2013 10:41:04 -0800 Subject: virtio-net: mergeable buffer size should include virtio-net header Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size. However, the merge buffer size does not take into account the size of the virtio-net header. Consequently, packets that are MTU-size will take two buffers intead of one (to store the virtio-net header), substantially decreasing the throughput of MTU-size traffic due to TCP window / SKB truesize effects. This commit changes the mergeable buffer size to include the virtio-net header. The buffer size is cacheline-aligned because skb_page_frag_refill will not automatically align the requested size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs and vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Transmit offloads and mergeable receive buffers are enabled, but guest_tso4 / guest_csum are explicitly disabled to force MTU-sized packets on the receiver. next-net trunk before 2613af0ed18a (PAGE_SIZE buf): 3861.08Gb/s net-next trunk (MTU 1500- packet uses two buf due to size bug): 4076.62Gb/s net-next trunk (MTU 1480- packet fits in one buf): 6301.34Gb/s net-next trunk w/ size fix (MTU 1500 - packet fits in one buf): 6445.44Gb/s Suggested-by: Eric Northup Signed-off-by: Michael Dalton Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 01f4eb5c8b7..69fb225e59a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -36,7 +36,10 @@ module_param(csum, bool, 0444); module_param(gso, bool, 0444); /* FIXME: MTU in config. */ -#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) +#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) +#define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \ + sizeof(struct virtio_net_hdr_mrg_rxbuf), \ + L1_CACHE_BYTES)) #define GOOD_COPY_LEN 128 #define VIRTNET_DRIVER_VERSION "1.0.0" @@ -314,10 +317,10 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) head_skb->dev->stats.rx_length_errors++; return -EINVAL; } - if (unlikely(len > MAX_PACKET_LEN)) { + if (unlikely(len > MERGE_BUFFER_LEN)) { pr_debug("%s: rx error: merge buffer too long\n", head_skb->dev->name); - len = MAX_PACKET_LEN; + len = MERGE_BUFFER_LEN; } if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); @@ -336,18 +339,17 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) if (curr_skb != head_skb) { head_skb->data_len += len; head_skb->len += len; - head_skb->truesize += MAX_PACKET_LEN; + head_skb->truesize += MERGE_BUFFER_LEN; } page = virt_to_head_page(buf); offset = buf - (char *)page_address(page); if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { put_page(page); skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, - len, MAX_PACKET_LEN); + len, MERGE_BUFFER_LEN); } else { skb_add_rx_frag(curr_skb, num_skb_frags, page, - offset, len, - MAX_PACKET_LEN); + offset, len, MERGE_BUFFER_LEN); } --rq->num; } @@ -383,7 +385,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) struct page *page = virt_to_head_page(buf); skb = page_to_skb(rq, page, (char *)buf - (char *)page_address(page), - len, MAX_PACKET_LEN); + len, MERGE_BUFFER_LEN); if (unlikely(!skb)) { dev->stats.rx_dropped++; put_page(page); @@ -471,11 +473,11 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp) struct skb_vnet_hdr *hdr; int err; - skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp); + skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp); if (unlikely(!skb)) return -ENOMEM; - skb_put(skb, MAX_PACKET_LEN); + skb_put(skb, GOOD_PACKET_LEN); hdr = skb_vnet_hdr(skb); sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr); @@ -542,20 +544,20 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) int err; if (gfp & __GFP_WAIT) { - if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag, + if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, gfp)) { buf = (char *)page_address(vi->alloc_frag.page) + vi->alloc_frag.offset; get_page(vi->alloc_frag.page); - vi->alloc_frag.offset += MAX_PACKET_LEN; + vi->alloc_frag.offset += MERGE_BUFFER_LEN; } } else { - buf = netdev_alloc_frag(MAX_PACKET_LEN); + buf = netdev_alloc_frag(MERGE_BUFFER_LEN); } if (!buf) return -ENOMEM; - sg_init_one(rq->sg, buf, MAX_PACKET_LEN); + sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); if (err < 0) put_page(virt_to_head_page(buf)); -- cgit v1.2.3-70-g09d2 From 0f13b66b01c6e2ec4913a7812414183844d1cc4f Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Mon, 18 Nov 2013 21:19:27 +0800 Subject: net, virtio_net: replace the magic value It is more appropriate to use # of queue pairs currently used by the driver instead of a magic value. Signed-off-by: Zhi Yong Wu Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 69fb225e59a..69ad42ba153 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1606,8 +1606,8 @@ static int virtnet_probe(struct virtio_device *vdev) if (err) goto free_stats; - netif_set_real_num_tx_queues(dev, 1); - netif_set_real_num_rx_queues(dev, 1); + netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); + netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); err = register_netdev(dev); if (err) { -- cgit v1.2.3-70-g09d2 From 99e872ae1eacb560152c0123cf1cef571569e681 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 29 Nov 2013 10:02:19 +0100 Subject: virtio_net: Fixed a trivial typo (fitler --> filter) "MAC filter" sounds more reasonable than "MAC fitler". Signed-off-by: Thomas Huth Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7bab4de658a..acda6616997 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1084,7 +1084,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, sg, NULL)) - dev_warn(&dev->dev, "Failed to set MAC fitler table.\n"); + dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); kfree(buf); } -- cgit v1.2.3-70-g09d2 From 8fc3b9e9a229778e5af3aa453c44f1a3857ba769 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 28 Nov 2013 13:30:55 +0200 Subject: virtio_net: fix error handling for mergeable buffers Eric Dumazet noticed that if we encounter an error when processing a mergeable buffer, we don't dequeue all of the buffers from this packet, the result is almost sure to be loss of networking. Jason Wang noticed that we also leak a page and that we don't decrement the rq buf count, so we won't repost buffers (a resource leak). Fix both issues. Cc: Rusty Russell Cc: Michael Dalton Reported-by: Eric Dumazet Reported-by: Jason Wang Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 82 ++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 31 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index acda6616997..71a2eac7b03 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -299,35 +299,47 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } -static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) +static struct sk_buff *receive_mergeable(struct net_device *dev, + struct receive_queue *rq, + void *buf, + unsigned int len) { - struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb); + struct skb_vnet_hdr *hdr = buf; + int num_buf = hdr->mhdr.num_buffers; + struct page *page = virt_to_head_page(buf); + int offset = buf - page_address(page); + struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, + MERGE_BUFFER_LEN); struct sk_buff *curr_skb = head_skb; - char *buf; - struct page *page; - int num_buf, len, offset; - num_buf = hdr->mhdr.num_buffers; + if (unlikely(!curr_skb)) + goto err_skb; + while (--num_buf) { - int num_skb_frags = skb_shinfo(curr_skb)->nr_frags; + int num_skb_frags; + buf = virtqueue_get_buf(rq->vq, &len); if (unlikely(!buf)) { - pr_debug("%s: rx error: %d buffers missing\n", - head_skb->dev->name, hdr->mhdr.num_buffers); - head_skb->dev->stats.rx_length_errors++; - return -EINVAL; + pr_debug("%s: rx error: %d buffers out of %d missing\n", + dev->name, num_buf, hdr->mhdr.num_buffers); + dev->stats.rx_length_errors++; + goto err_buf; } if (unlikely(len > MERGE_BUFFER_LEN)) { pr_debug("%s: rx error: merge buffer too long\n", - head_skb->dev->name); + dev->name); len = MERGE_BUFFER_LEN; } + + page = virt_to_head_page(buf); + --rq->num; + + num_skb_frags = skb_shinfo(curr_skb)->nr_frags; if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); - if (unlikely(!nskb)) { - head_skb->dev->stats.rx_dropped++; - return -ENOMEM; - } + + if (unlikely(!nskb)) + goto err_skb; if (curr_skb == head_skb) skb_shinfo(curr_skb)->frag_list = nskb; else @@ -341,8 +353,7 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) head_skb->len += len; head_skb->truesize += MERGE_BUFFER_LEN; } - page = virt_to_head_page(buf); - offset = buf - (char *)page_address(page); + offset = buf - page_address(page); if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { put_page(page); skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, @@ -351,9 +362,28 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) skb_add_rx_frag(curr_skb, num_skb_frags, page, offset, len, MERGE_BUFFER_LEN); } + } + + return head_skb; + +err_skb: + put_page(page); + while (--num_buf) { + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { + pr_debug("%s: rx error: %d buffers missing\n", + dev->name, num_buf); + dev->stats.rx_length_errors++; + break; + } + page = virt_to_head_page(buf); + put_page(page); --rq->num; } - return 0; +err_buf: + dev->stats.rx_dropped++; + dev_kfree_skb(head_skb); + return NULL; } static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) @@ -382,19 +412,9 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) len -= sizeof(struct virtio_net_hdr); skb_trim(skb, len); } else if (vi->mergeable_rx_bufs) { - struct page *page = virt_to_head_page(buf); - skb = page_to_skb(rq, page, - (char *)buf - (char *)page_address(page), - len, MERGE_BUFFER_LEN); - if (unlikely(!skb)) { - dev->stats.rx_dropped++; - put_page(page); + skb = receive_mergeable(dev, rq, buf, len); + if (unlikely(!skb)) return; - } - if (receive_mergeable(rq, skb)) { - dev_kfree_skb(skb); - return; - } } else { page = buf; skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); -- cgit v1.2.3-70-g09d2 From f121159d72091f25afb22007c833e60a6845e912 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 28 Nov 2013 13:30:59 +0200 Subject: virtio_net: make all RX paths handle erors consistently receive mergeable now handles errors internally. Do same for big and small packet paths, otherwise the logic is too hard to follow. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 17 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 71a2eac7b03..916241d16c6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -299,6 +299,35 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } +static struct sk_buff *receive_small(void *buf, unsigned int len) +{ + struct sk_buff * skb = buf; + + len -= sizeof(struct virtio_net_hdr); + skb_trim(skb, len); + + return skb; +} + +static struct sk_buff *receive_big(struct net_device *dev, + struct receive_queue *rq, + void *buf, + unsigned int len) +{ + struct page *page = buf; + struct sk_buff *skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); + + if (unlikely(!skb)) + goto err; + + return skb; + +err: + dev->stats.rx_dropped++; + give_pages(rq, page); + return NULL; +} + static struct sk_buff *receive_mergeable(struct net_device *dev, struct receive_queue *rq, void *buf, @@ -392,7 +421,6 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) struct net_device *dev = vi->dev; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); struct sk_buff *skb; - struct page *page; struct skb_vnet_hdr *hdr; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { @@ -407,23 +435,15 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) return; } - if (!vi->mergeable_rx_bufs && !vi->big_packets) { - skb = buf; - len -= sizeof(struct virtio_net_hdr); - skb_trim(skb, len); - } else if (vi->mergeable_rx_bufs) { + if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, rq, buf, len); - if (unlikely(!skb)) - return; - } else { - page = buf; - skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); - if (unlikely(!skb)) { - dev->stats.rx_dropped++; - give_pages(rq, page); - return; - } - } + else if (vi->big_packets) + skb = receive_big(dev, rq, buf, len); + else + skb = receive_small(buf, len); + + if (unlikely(!skb)) + return; hdr = skb_vnet_hdr(skb); -- cgit v1.2.3-70-g09d2 From fa9fac1725fd38dad414e3729c56c089a7188383 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 5 Dec 2013 18:36:20 +0400 Subject: virtio-net: determine type of bufs correctly free_unused_bufs must check vi->mergeable_rx_bufs before vi->big_packets, because we use this sequence in other places. Otherwise we allocate buffer of one type, then free it as another type. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables pcspkr virtio_balloon virtio_net(-) i2c_pii CPU: 0 PID: 400 Comm: rmmod Not tainted 3.13.0-rc2+ #170 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b6d2a210 ti: ffff8800aed32000 task.ti: ffff8800aed32000 RIP: 0010:[] [] free_unused_bufs+0xc3/0x190 [virtio_net] RSP: 0018:ffff8800aed33dd8 EFLAGS: 00010202 RAX: ffff8800b1fe2c00 RBX: ffff8800b66a7240 RCX: 6b6b6b6b6b6b6b6b RDX: 6b6b6b6b6b6b6b6b RSI: ffff8800b8419a68 RDI: ffff8800b66a1148 RBP: ffff8800aed33e00 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffff8800b66a1148 R14: 0000000000000000 R15: 000077ff80000000 FS: 00007fc4f9c4e740(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f63f432f000 CR3: 00000000b6538000 CR4: 00000000000006f0 Stack: ffff8800b66a7240 ffff8800b66a7380 ffff8800377bd3f0 0000000000000000 00000000023302f0 ffff8800aed33e18 ffffffffa00346e2 ffff8800b66a7240 ffff8800aed33e38 ffffffffa003474d ffff8800377bd388 ffff8800377bd390 Call Trace: [] remove_vq_common+0x22/0x40 [virtio_net] [] virtnet_remove+0x4d/0x90 [virtio_net] [] virtio_dev_remove+0x23/0x80 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xc0/0xd0 [] bus_remove_driver+0x58/0xd0 [] driver_unregister+0x2c/0x50 [] unregister_virtio_driver+0xe/0x10 [] virtio_net_driver_exit+0x10/0x7be [virtio_net] [] SyS_delete_module+0x172/0x220 [] ? trace_hardirqs_on+0xd/0x10 [] ? __audit_syscall_entry+0x9c/0xf0 [] system_call_fastpath+0x16/0x1b Code: c0 74 55 0f 1f 44 00 00 80 7b 30 00 74 7a 48 8b 50 30 4c 89 e6 48 03 73 20 48 85 d2 0f 84 bb 00 00 00 66 0f RIP [] free_unused_bufs+0xc3/0x190 [virtio_net] RSP ---[ end trace edb570ea923cce9c ]--- Fixes: 2613af0ed18a (virtio_net: migrate mergeable rx buffers to page frag allocators) Cc: Michael Dalton Cc: Rusty Russell Cc: "Michael S. Tsirkin" Signed-off-by: Andrey Vagin Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 916241d16c6..930039a8d0e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1396,10 +1396,10 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->big_packets) - give_pages(&vi->rq[i], buf); - else if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(&vi->rq[i], buf); else dev_kfree_skb(buf); --vi->rq[i].num; -- cgit v1.2.3-70-g09d2 From d4fb84eefe5164f6a6ea51d0a9e26280c661a0dd Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 5 Dec 2013 18:36:21 +0400 Subject: virtio: delete napi structures from netdev before releasing memory free_netdev calls netif_napi_del too, but it's too late, because napi structures are placed on vi->rq. netif_napi_add() is called from virtnet_alloc_queues. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables virtio_balloon pcspkr virtio_net(-) i2c_pii CPU: 1 PID: 347 Comm: rmmod Not tainted 3.13.0-rc2+ #171 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b779c420 ti: ffff8800379e0000 task.ti: ffff8800379e0000 RIP: 0010:[] [] __list_del_entry+0x29/0xd0 RSP: 0018:ffff8800379e1dd0 EFLAGS: 00010a83 RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800379c2fd0 RCX: dead000000200200 RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000001 RDI: ffff8800379c2fd0 RBP: ffff8800379e1dd0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800379c2f90 R13: ffff880037839160 R14: 0000000000000000 R15: 00000000013352f0 FS: 00007f1400e34740(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f464124c763 CR3: 00000000b68cf000 CR4: 00000000000006e0 Stack: ffff8800379e1df0 ffffffff8155beab 6b6b6b6b6b6b6b2b ffff8800378391c0 ffff8800379e1e18 ffffffff8156499b ffff880037839be0 ffff880037839d20 ffff88003779d3f0 ffff8800379e1e38 ffffffffa003477c ffff88003779d388 Call Trace: [] netif_napi_del+0x1b/0x80 [] free_netdev+0x8b/0x110 [] virtnet_remove+0x7c/0x90 [virtio_net] [] virtio_dev_remove+0x23/0x80 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xc0/0xd0 [] bus_remove_driver+0x58/0xd0 [] driver_unregister+0x2c/0x50 [] unregister_virtio_driver+0xe/0x10 [] virtio_net_driver_exit+0x10/0x6ce [virtio_net] [] SyS_delete_module+0x172/0x220 [] ? trace_hardirqs_on+0xd/0x10 [] ? __audit_syscall_entry+0x9c/0xf0 [] system_call_fastpath+0x16/0x1b Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 RIP [] __list_del_entry+0x29/0xd0 RSP ---[ end trace d5931cd3f87c9763 ]--- Fixes: 986a4f4d452d (virtio_net: multiqueue support) Cc: Rusty Russell Cc: "Michael S. Tsirkin" Signed-off-by: Andrey Vagin Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 930039a8d0e..c2937644342 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1367,6 +1367,11 @@ static void virtnet_config_changed(struct virtio_device *vdev) static void virtnet_free_queues(struct virtnet_info *vi) { + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) + netif_napi_del(&vi->rq[i].napi); + kfree(vi->rq); kfree(vi->sq); } -- cgit v1.2.3-70-g09d2 From 98bfd23cdb30e68e90571d7a2607e9479f8a50ec Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 5 Dec 2013 13:14:05 -0800 Subject: virtio-net: free bufs correctly on invalid packet length When a packet with invalid length arrives, ensure that the packet is freed correctly if mergeable packet buffers and big packets (GUEST_TSO4) are both enabled. Signed-off-by: Michael Dalton Acked-by: Jason Wang Acked-by: Andrew Vagin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c2937644342..d208f860498 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -426,10 +426,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; - if (vi->big_packets) - give_pages(rq, buf); - else if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(rq, buf); else dev_kfree_skb(buf); return; -- cgit v1.2.3-70-g09d2