From adf8d3ff6e982621c8cc2e8cac8e68d19fba6ac0 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Fri, 6 Dec 2013 06:28:47 -0800 Subject: drivers/net/*: Fix FSF address in file headers Several files refer to an old address for the Free Software Foundation in the file header comment. Resolve by replacing the address with the URL so that we do not have to keep updating the header comments anytime the address changes. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Haiyang Zhang CC: "K. Y. Srinivasan" CC: Paul Mackerras CC: Ian Campbell CC: Wei Liu CC: Rusty Russell CC: "Michael S. Tsirkin" Signed-off-by: Jeff Kirsher Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 916241d16c6..5c24288814d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . */ //#define DEBUG #include -- cgit v1.2.3-70-g09d2 From d24bae32fa9b541eb87edf1b3439f590c3b2dda5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 9 Dec 2013 16:17:40 -0800 Subject: virtio_net: remove unused parameter to send_command All the code passes NULL for the last sg list (in). Simplify by just removing it. Signed-off-by: Stephen Hemminger Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56c2229d28f..d26f695ee64 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -876,13 +876,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) * never fail unless improperly formated. */ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, - struct scatterlist *out, - struct scatterlist *in) + struct scatterlist *out) { struct scatterlist *sgs[4], hdr, stat; struct virtio_net_ctrl_hdr ctrl; virtio_net_ctrl_ack status = ~0; - unsigned out_num = 0, in_num = 0, tmp; + unsigned out_num = 0, tmp; /* Caller should know better */ BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); @@ -895,16 +894,13 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, if (out) sgs[out_num++] = out; - if (in) - sgs[out_num + in_num++] = in; /* Add return status. */ sg_init_one(&stat, &status, sizeof(status)); - sgs[out_num + in_num++] = &stat; + sgs[out_num] = &stat; - BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); - BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC) - < 0); + BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); + BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC) < 0); if (unlikely(!virtqueue_kick(vi->cvq))) return status == VIRTIO_NET_OK; @@ -934,8 +930,7 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { sg_init_one(&sg, addr->sa_data, dev->addr_len); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, - VIRTIO_NET_CTRL_MAC_ADDR_SET, - &sg, NULL)) { + VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { dev_warn(&vdev->dev, "Failed to set mac address by vq command.\n"); return -EINVAL; @@ -1008,7 +1003,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) { rtnl_lock(); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, - VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL, NULL)) + VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); rtnl_unlock(); } @@ -1026,7 +1021,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) sg_init_one(&sg, &s, sizeof(s)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, NULL)) { + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", queue_pairs); return -EINVAL; @@ -1076,16 +1071,14 @@ static void virtnet_set_rx_mode(struct net_device *dev) sg_init_one(sg, &promisc, sizeof(promisc)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, - VIRTIO_NET_CTRL_RX_PROMISC, - sg, NULL)) + VIRTIO_NET_CTRL_RX_PROMISC, sg)) dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", promisc ? "en" : "dis"); sg_init_one(sg, &allmulti, sizeof(allmulti)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, - VIRTIO_NET_CTRL_RX_ALLMULTI, - sg, NULL)) + VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", allmulti ? "en" : "dis"); @@ -1121,8 +1114,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, - VIRTIO_NET_CTRL_MAC_TABLE_SET, - sg, NULL)) + VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); kfree(buf); @@ -1137,7 +1129,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, sg_init_one(&sg, &vid, sizeof(vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, - VIRTIO_NET_CTRL_VLAN_ADD, &sg, NULL)) + VIRTIO_NET_CTRL_VLAN_ADD, &sg)) dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); return 0; } @@ -1151,7 +1143,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, sg_init_one(&sg, &vid, sizeof(vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, - VIRTIO_NET_CTRL_VLAN_DEL, &sg, NULL)) + VIRTIO_NET_CTRL_VLAN_DEL, &sg)) dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); return 0; } -- cgit v1.2.3-70-g09d2 From 788a8b6dd3a28e02af5581923a14667f550009b5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 9 Dec 2013 16:18:45 -0800 Subject: virtio_net: spelling fixes Signed-off-by: Stephen Hemminger Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d26f695ee64..c51a98867a4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -873,7 +873,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* * Send command via the control virtqueue and check status. Commands * supported by the hypervisor, as indicated by feature bits, should - * never fail unless improperly formated. + * never fail unless improperly formatted. */ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, struct scatterlist *out) @@ -1061,7 +1061,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) void *buf; int i; - /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */ + /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) return; -- cgit v1.2.3-70-g09d2 From be121f46affce8a2ba284ef0388a8ff6ff85ec1b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 16 Jan 2014 14:45:24 +0800 Subject: virtio-net: drop rq->max and rq->num It looks like there's no need for those two fields: - Unless there's a failure for the first refill try, rq->max should be always equal to the vring size. - rq->num is only used to determine the condition that we need to do the refill, we could check vq->num_free instead. - rq->num was required to be increased or decreased explicitly after each get/put which results a bad API. So this patch removes them both to make the code simpler. Cc: Rusty Russell Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Rusty Russell Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7b172408cff..9bd70aa87bf 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -72,9 +72,6 @@ struct receive_queue { struct napi_struct napi; - /* Number of input buffers, and max we've ever had. */ - unsigned int num, max; - /* Chain pages by the private ptr. */ struct page *pages; @@ -360,7 +357,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } page = virt_to_head_page(buf); - --rq->num; num_skb_frags = skb_shinfo(curr_skb)->nr_frags; if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { @@ -406,7 +402,6 @@ err_skb: } page = virt_to_head_page(buf); put_page(page); - --rq->num; } err_buf: dev->stats.rx_dropped++; @@ -628,10 +623,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) oom = err == -ENOMEM; if (err) break; - ++rq->num; } while (rq->vq->num_free); - if (unlikely(rq->num > rq->max)) - rq->max = rq->num; if (unlikely(!virtqueue_kick(rq->vq))) return false; return !oom; @@ -699,11 +691,10 @@ again: while (received < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { receive_buf(rq, buf, len); - --rq->num; received++; } - if (rq->num < rq->max / 2) { + if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { if (!try_fill_recv(rq, GFP_ATOMIC)) schedule_delayed_work(&vi->refill, 0); } @@ -1398,9 +1389,7 @@ static void free_unused_bufs(struct virtnet_info *vi) give_pages(&vi->rq[i], buf); else dev_kfree_skb(buf); - --vi->rq[i].num; } - BUG_ON(vi->rq[i].num != 0); } } @@ -1671,7 +1660,8 @@ static int virtnet_probe(struct virtio_device *vdev) try_fill_recv(&vi->rq[i], GFP_KERNEL); /* If we didn't even get one input buffer, we're useless. */ - if (vi->rq[i].num == 0) { + if (vi->rq[i].vq->num_free == + virtqueue_get_vring_size(vi->rq[i].vq)) { free_unused_bufs(vi); err = -ENOMEM; goto free_recv_bufs; -- cgit v1.2.3-70-g09d2 From fb51879dbceab9c40a39018d5322451691909e15 Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:26 -0800 Subject: virtio-net: use per-receive queue page frag alloc for mergeable bufs The virtio-net driver currently uses netdev_alloc_frag() for GFP_ATOMIC mergeable rx buffer allocations. This commit migrates virtio-net to use per-receive queue page frags for GFP_ATOMIC allocation. This change unifies mergeable rx buffer memory allocation, which now will use skb_refill_frag() for both atomic and GFP-WAIT buffer allocations. To address fragmentation concerns, if after buffer allocation there is too little space left in the page frag to allocate a subsequent buffer, the remaining space is added to the current allocated buffer so that the remaining space can be used to store packet data. Acked-by: Michael S. Tsirkin Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 69 ++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 34 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9bd70aa87bf..5ee71dccd09 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -75,6 +75,9 @@ struct receive_queue { /* Chain pages by the private ptr. */ struct page *pages; + /* Page frag for packet buffer allocation. */ + struct page_frag alloc_frag; + /* RX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; @@ -123,11 +126,6 @@ struct virtnet_info { /* Lock for config space updates */ struct mutex config_lock; - /* Page_frag for GFP_KERNEL packet buffer allocation when we run - * low on memory. - */ - struct page_frag alloc_frag; - /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; @@ -333,8 +331,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, int num_buf = hdr->mhdr.num_buffers; struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); - struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, - MERGE_BUFFER_LEN); + unsigned int truesize = max_t(unsigned int, len, MERGE_BUFFER_LEN); + struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize); struct sk_buff *curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -350,11 +348,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, dev->stats.rx_length_errors++; goto err_buf; } - if (unlikely(len > MERGE_BUFFER_LEN)) { - pr_debug("%s: rx error: merge buffer too long\n", - dev->name); - len = MERGE_BUFFER_LEN; - } page = virt_to_head_page(buf); @@ -372,19 +365,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, head_skb->truesize += nskb->truesize; num_skb_frags = 0; } + truesize = max_t(unsigned int, len, MERGE_BUFFER_LEN); if (curr_skb != head_skb) { head_skb->data_len += len; head_skb->len += len; - head_skb->truesize += MERGE_BUFFER_LEN; + head_skb->truesize += truesize; } offset = buf - page_address(page); if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { put_page(page); skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, - len, MERGE_BUFFER_LEN); + len, truesize); } else { skb_add_rx_frag(curr_skb, num_skb_frags, page, - offset, len, MERGE_BUFFER_LEN); + offset, len, truesize); } } @@ -573,25 +567,24 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) { - struct virtnet_info *vi = rq->vq->vdev->priv; - char *buf = NULL; + struct page_frag *alloc_frag = &rq->alloc_frag; + char *buf; int err; + unsigned int len, hole; - if (gfp & __GFP_WAIT) { - if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, - gfp)) { - buf = (char *)page_address(vi->alloc_frag.page) + - vi->alloc_frag.offset; - get_page(vi->alloc_frag.page); - vi->alloc_frag.offset += MERGE_BUFFER_LEN; - } - } else { - buf = netdev_alloc_frag(MERGE_BUFFER_LEN); - } - if (!buf) + if (unlikely(!skb_page_frag_refill(MERGE_BUFFER_LEN, alloc_frag, gfp))) return -ENOMEM; + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + get_page(alloc_frag->page); + len = MERGE_BUFFER_LEN; + alloc_frag->offset += len; + hole = alloc_frag->size - alloc_frag->offset; + if (hole < MERGE_BUFFER_LEN) { + len += hole; + alloc_frag->offset += hole; + } - sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); + sg_init_one(rq->sg, buf, len); err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); if (err < 0) put_page(virt_to_head_page(buf)); @@ -612,6 +605,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) int err; bool oom; + gfp |= __GFP_COLD; do { if (vi->mergeable_rx_bufs) err = add_recvbuf_mergeable(rq, gfp); @@ -1368,6 +1362,14 @@ static void free_receive_bufs(struct virtnet_info *vi) } } +static void free_receive_page_frags(struct virtnet_info *vi) +{ + int i; + for (i = 0; i < vi->max_queue_pairs; i++) + if (vi->rq[i].alloc_frag.page) + put_page(vi->rq[i].alloc_frag.page); +} + static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -1695,9 +1697,8 @@ free_recv_bufs: unregister_netdev(dev); free_vqs: cancel_delayed_work_sync(&vi->refill); + free_receive_page_frags(vi); virtnet_del_vqs(vi); - if (vi->alloc_frag.page) - put_page(vi->alloc_frag.page); free_stats: free_percpu(vi->stats); free: @@ -1714,6 +1715,8 @@ static void remove_vq_common(struct virtnet_info *vi) free_receive_bufs(vi); + free_receive_page_frags(vi); + virtnet_del_vqs(vi); } @@ -1731,8 +1734,6 @@ static void virtnet_remove(struct virtio_device *vdev) unregister_netdev(vi->dev); remove_vq_common(vi); - if (vi->alloc_frag.page) - put_page(vi->alloc_frag.page); flush_work(&vi->config_work); -- cgit v1.2.3-70-g09d2 From ab7db91705e95ed1bba1304388936fccfa58c992 Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:27 -0800 Subject: virtio-net: auto-tune mergeable rx buffer size for improved performance Commit 2613af0ed18a ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba275241030c ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0ed18a (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 100 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 25 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5ee71dccd09..dacd43b276d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -26,6 +26,7 @@ #include #include #include +#include static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); @@ -36,11 +37,18 @@ module_param(gso, bool, 0444); /* FIXME: MTU in config. */ #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) -#define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \ - sizeof(struct virtio_net_hdr_mrg_rxbuf), \ - L1_CACHE_BYTES)) #define GOOD_COPY_LEN 128 +/* Weight used for the RX packet size EWMA. The average packet size is used to + * determine the packet buffer size when refilling RX rings. As the entire RX + * ring may be refilled at once, the weight is chosen so that the EWMA will be + * insensitive to short-term, transient changes in packet size. + */ +#define RECEIVE_AVG_WEIGHT 64 + +/* Minimum alignment for mergeable packet buffers. */ +#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256) + #define VIRTNET_DRIVER_VERSION "1.0.0" struct virtnet_stats { @@ -75,6 +83,9 @@ struct receive_queue { /* Chain pages by the private ptr. */ struct page *pages; + /* Average packet length for mergeable receive buffers. */ + struct ewma mrg_avg_pkt_len; + /* Page frag for packet buffer allocation. */ struct page_frag alloc_frag; @@ -216,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq) netif_wake_subqueue(vi->dev, vq2txq(vq)); } +static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx) +{ + unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1); + return (truesize + 1) * MERGEABLE_BUFFER_ALIGN; +} + +static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx) +{ + return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN); + +} + +static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize) +{ + unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN; + return (unsigned long)buf | (size - 1); +} + /* Called from bottom half context */ static struct sk_buff *page_to_skb(struct receive_queue *rq, struct page *page, unsigned int offset, @@ -324,31 +353,33 @@ err: static struct sk_buff *receive_mergeable(struct net_device *dev, struct receive_queue *rq, - void *buf, + unsigned long ctx, unsigned int len) { + void *buf = mergeable_ctx_to_buf_address(ctx); struct skb_vnet_hdr *hdr = buf; int num_buf = hdr->mhdr.num_buffers; struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); - unsigned int truesize = max_t(unsigned int, len, MERGE_BUFFER_LEN); + unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize); struct sk_buff *curr_skb = head_skb; if (unlikely(!curr_skb)) goto err_skb; - while (--num_buf) { int num_skb_frags; - buf = virtqueue_get_buf(rq->vq, &len); - if (unlikely(!buf)) { + ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); + if (unlikely(!ctx)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, num_buf, hdr->mhdr.num_buffers); dev->stats.rx_length_errors++; goto err_buf; } + buf = mergeable_ctx_to_buf_address(ctx); page = virt_to_head_page(buf); num_skb_frags = skb_shinfo(curr_skb)->nr_frags; @@ -365,7 +396,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, head_skb->truesize += nskb->truesize; num_skb_frags = 0; } - truesize = max_t(unsigned int, len, MERGE_BUFFER_LEN); + truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); if (curr_skb != head_skb) { head_skb->data_len += len; head_skb->len += len; @@ -382,19 +413,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } } + ewma_add(&rq->mrg_avg_pkt_len, head_skb->len); return head_skb; err_skb: put_page(page); while (--num_buf) { - buf = virtqueue_get_buf(rq->vq, &len); - if (unlikely(!buf)) { + ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); + if (unlikely(!ctx)) { pr_debug("%s: rx error: %d buffers missing\n", dev->name, num_buf); dev->stats.rx_length_errors++; break; } - page = virt_to_head_page(buf); + page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx)); put_page(page); } err_buf: @@ -414,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; - if (vi->mergeable_rx_bufs) - put_page(virt_to_head_page(buf)); - else if (vi->big_packets) + if (vi->mergeable_rx_bufs) { + unsigned long ctx = (unsigned long)buf; + void *base = mergeable_ctx_to_buf_address(ctx); + put_page(virt_to_head_page(base)); + } else if (vi->big_packets) { give_pages(rq, buf); - else + } else { dev_kfree_skb(buf); + } return; } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, rq, buf, len); + skb = receive_mergeable(dev, rq, (unsigned long)buf, len); else if (vi->big_packets) skb = receive_big(dev, rq, buf, len); else @@ -567,25 +602,36 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) { + const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); struct page_frag *alloc_frag = &rq->alloc_frag; char *buf; + unsigned long ctx; int err; unsigned int len, hole; - if (unlikely(!skb_page_frag_refill(MERGE_BUFFER_LEN, alloc_frag, gfp))) + len = hdr_len + clamp_t(unsigned int, ewma_read(&rq->mrg_avg_pkt_len), + GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); + len = ALIGN(len, MERGEABLE_BUFFER_ALIGN); + if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) return -ENOMEM; + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + ctx = mergeable_buf_to_ctx(buf, len); get_page(alloc_frag->page); - len = MERGE_BUFFER_LEN; alloc_frag->offset += len; hole = alloc_frag->size - alloc_frag->offset; - if (hole < MERGE_BUFFER_LEN) { + if (hole < len) { + /* To avoid internal fragmentation, if there is very likely not + * enough space for another buffer, add the remaining space to + * the current buffer. This extra space is not included in + * the truesize stored in ctx. + */ len += hole; alloc_frag->offset += hole; } sg_init_one(rq->sg, buf, len); - err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); + err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); @@ -1385,12 +1431,15 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->mergeable_rx_bufs) - put_page(virt_to_head_page(buf)); - else if (vi->big_packets) + if (vi->mergeable_rx_bufs) { + unsigned long ctx = (unsigned long)buf; + void *base = mergeable_ctx_to_buf_address(ctx); + put_page(virt_to_head_page(base)); + } else if (vi->big_packets) { give_pages(&vi->rq[i], buf); - else + } else { dev_kfree_skb(buf); + } } } } @@ -1498,6 +1547,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) napi_weight); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); + ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); } -- cgit v1.2.3-70-g09d2 From fbf28d78f54016faa7f0b68cf632ac739f2204f7 Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:30 -0800 Subject: virtio-net: initial rx sysfs support, export mergeable rx buffer size Add initial support for per-rx queue sysfs attributes to virtio-net. If mergeable packet buffers are enabled, adds a read-only mergeable packet buffer size sysfs attribute for each RX queue. Suggested-by: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dacd43b276d..d75f8edf4fb 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -600,18 +600,25 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) return err; } -static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) +static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len) { const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + unsigned int len; + + len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len), + GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); + return ALIGN(len, MERGEABLE_BUFFER_ALIGN); +} + +static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) +{ struct page_frag *alloc_frag = &rq->alloc_frag; char *buf; unsigned long ctx; int err; unsigned int len, hole; - len = hdr_len + clamp_t(unsigned int, ewma_read(&rq->mrg_avg_pkt_len), - GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); - len = ALIGN(len, MERGEABLE_BUFFER_ALIGN); + len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) return -ENOMEM; @@ -1584,6 +1591,33 @@ err: return ret; } +#ifdef CONFIG_SYSFS +static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attribute, char *buf) +{ + struct virtnet_info *vi = netdev_priv(queue->dev); + unsigned int queue_index = get_netdev_rx_queue_index(queue); + struct ewma *avg; + + BUG_ON(queue_index >= vi->max_queue_pairs); + avg = &vi->rq[queue_index].mrg_avg_pkt_len; + return sprintf(buf, "%u\n", get_mergeable_buf_len(avg)); +} + +static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = + __ATTR_RO(mergeable_rx_buffer_size); + +static struct attribute *virtio_net_mrg_rx_attrs[] = { + &mergeable_rx_buffer_size_attribute.attr, + NULL +}; + +static const struct attribute_group virtio_net_mrg_rx_group = { + .name = "virtio_net", + .attrs = virtio_net_mrg_rx_attrs +}; +#endif + static int virtnet_probe(struct virtio_device *vdev) { int i, err; @@ -1698,6 +1732,10 @@ static int virtnet_probe(struct virtio_device *vdev) if (err) goto free_stats; +#ifdef CONFIG_SYSFS + if (vi->mergeable_rx_bufs) + dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; +#endif netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); -- cgit v1.2.3-70-g09d2