From 5b54dac856cb5bd6f33f4159012773e4a33704f7 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 21 Apr 2014 10:20:28 -0700 Subject: hyperv: Add support for virtual Receive Side Scaling (vRSS) This feature allows multiple channels to be used by each virtual NIC. It is available on Hyper-V host 2012 R2. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 110 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d18f711d0b0..57eb3f906d6 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -28,6 +28,96 @@ #include #include +/* RSS related */ +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ + +#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 +#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 + +#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 +#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 + +struct ndis_obj_header { + u8 type; + u8 rev; + u16 size; +} __packed; + +/* ndis_recv_scale_cap/cap_flag */ +#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 +#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000 +#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 +#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 + +struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ + struct ndis_obj_header hdr; + u32 cap_flag; + u32 num_int_msg; + u32 num_recv_que; + u16 num_indirect_tabent; +} __packed; + + +/* ndis_recv_scale_param flags */ +#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 +#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 +#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 +#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 +#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 + +/* Hash info bits */ +#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + +#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) +#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 + +#define ITAB_NUM 128 +#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 +extern u8 netvsc_hash_key[]; + +struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ + struct ndis_obj_header hdr; + + /* Qualifies the rest of the information */ + u16 flag; + + /* The base CPU number to do receive processing. not used */ + u16 base_cpu_number; + + /* This describes the hash function and type being enabled */ + u32 hashinfo; + + /* The size of indirection table array */ + u16 indirect_tabsize; + + /* The offset of the indirection table from the beginning of this + * structure + */ + u32 indirect_taboffset; + + /* The size of the hash secret key */ + u16 hashkey_size; + + /* The offset of the secret key from the beginning of this structure */ + u32 kashkey_offset; + + u32 processor_masks_offset; + u32 num_processor_masks; + u32 processor_masks_entry_size; +}; + /* Fwd declaration */ struct hv_netvsc_packet; struct ndis_tcp_ip_checksum_info; @@ -39,6 +129,8 @@ struct xferpage_packet { /* # of netvsc packets this xfer packet contains */ u32 count; + + struct vmbus_channel *channel; }; /* @@ -54,6 +146,9 @@ struct hv_netvsc_packet { bool is_data_pkt; u16 vlan_tci; + u16 q_idx; + struct vmbus_channel *channel; + /* * Valid only for receives when we break a xfer page packet * into multiple netvsc packets @@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, struct ndis_tcp_ip_checksum_info *csum_info); +void netvsc_channel_cb(void *context); int rndis_filter_open(struct hv_device *dev); int rndis_filter_close(struct hv_device *dev); int rndis_filter_device_add(struct hv_device *dev, @@ -522,6 +618,8 @@ struct nvsp_message { #define NETVSC_PACKET_SIZE 2048 +#define VRSS_SEND_TAB_SIZE 16 + /* Per netvsc channel-specific */ struct netvsc_device { struct hv_device *dev; @@ -555,10 +653,20 @@ struct netvsc_device { struct net_device *ndev; + struct vmbus_channel *chn_table[NR_CPUS]; + u32 send_table[VRSS_SEND_TAB_SIZE]; + u32 num_chn; + atomic_t queue_sends[NR_CPUS]; + /* Holds rndis device info */ void *extension; - /* The recive buffer for this device */ + + int ring_size; + + /* The primary channel callback buffer */ unsigned char cb_buffer[NETVSC_PACKET_SIZE]; + /* The sub channel callback buffer */ + unsigned char *sub_cb_buf; }; /* NdisInitialize message */ -- cgit v1.2.3-70-g09d2 From 4baab26129e0540746744232022110dbe9e011e7 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 21 Apr 2014 14:54:43 -0700 Subject: hyperv: Remove recv_pkt_list and lock Removed recv_pkt_list and lock, and updated related code, so that the locking overhead is reduced especially when multiple channels are in use. The recv_pkt_list isn't actually necessary because the packets are processed sequentially in each channel. It has been replaced by a local variable, and the related lock for this list is also removed. The is_data_pkt field is not used in receive path, so its assignment is cleaned up. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 33 -------- drivers/net/hyperv/netvsc.c | 174 +++----------------------------------- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/hyperv/rndis_filter.c | 2 - 4 files changed, 13 insertions(+), 198 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 57eb3f906d6..a1af0f7711e 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -119,27 +119,14 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ }; /* Fwd declaration */ -struct hv_netvsc_packet; struct ndis_tcp_ip_checksum_info; -/* Represent the xfer page packet which contains 1 or more netvsc packet */ -struct xferpage_packet { - struct list_head list_ent; - u32 status; - - /* # of netvsc packets this xfer packet contains */ - u32 count; - - struct vmbus_channel *channel; -}; - /* * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame * within the RNDIS */ struct hv_netvsc_packet { /* Bookkeeping stuff */ - struct list_head list_ent; u32 status; struct hv_device *device; @@ -149,18 +136,7 @@ struct hv_netvsc_packet { u16 q_idx; struct vmbus_channel *channel; - /* - * Valid only for receives when we break a xfer page packet - * into multiple netvsc packets - */ - struct xferpage_packet *xfer_page_pkt; - union { - struct { - u64 recv_completion_tid; - void *recv_completion_ctx; - void (*recv_completion)(void *context); - } recv; struct { u64 send_completion_tid; void *send_completion_ctx; @@ -613,9 +589,6 @@ struct nvsp_message { #define NETVSC_RECEIVE_BUFFER_ID 0xcafe -/* Preallocated receive packets */ -#define NETVSC_RECEIVE_PACKETLIST_COUNT 256 - #define NETVSC_PACKET_SIZE 2048 #define VRSS_SEND_TAB_SIZE 16 @@ -630,12 +603,6 @@ struct netvsc_device { wait_queue_head_t wait_drain; bool start_remove; bool destroy; - /* - * List of free preallocated hv_netvsc_packet to represent receive - * packet - */ - struct list_head recv_pkt_list; - spinlock_t recv_pkt_list_lock; /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e7e77f12bc3..b10334773b3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -387,7 +387,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device) int netvsc_device_remove(struct hv_device *device) { struct netvsc_device *net_device; - struct hv_netvsc_packet *netvsc_packet, *pos; unsigned long flags; net_device = hv_get_drvdata(device); @@ -416,12 +415,6 @@ int netvsc_device_remove(struct hv_device *device) vmbus_close(device->channel); /* Release all resources */ - list_for_each_entry_safe(netvsc_packet, pos, - &net_device->recv_pkt_list, list_ent) { - list_del(&netvsc_packet->list_ent); - kfree(netvsc_packet); - } - if (net_device->sub_cb_buf) vfree(net_device->sub_cb_buf); @@ -641,62 +634,6 @@ retry_send_cmplt: } } -/* Send a receive completion packet to RNDIS device (ie NetVsp) */ -static void netvsc_receive_completion(void *context) -{ - struct hv_netvsc_packet *packet = context; - struct hv_device *device = packet->device; - struct vmbus_channel *channel; - struct netvsc_device *net_device; - u64 transaction_id = 0; - bool fsend_receive_comp = false; - unsigned long flags; - struct net_device *ndev; - u32 status = NVSP_STAT_NONE; - - /* - * Even though it seems logical to do a GetOutboundNetDevice() here to - * send out receive completion, we are using GetInboundNetDevice() - * since we may have disable outbound traffic already. - */ - net_device = get_inbound_net_device(device); - if (!net_device) - return; - ndev = net_device->ndev; - - /* Overloading use of the lock. */ - spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); - - if (packet->status != NVSP_STAT_SUCCESS) - packet->xfer_page_pkt->status = NVSP_STAT_FAIL; - - packet->xfer_page_pkt->count--; - - /* - * Last one in the line that represent 1 xfer page packet. - * Return the xfer page packet itself to the freelist - */ - if (packet->xfer_page_pkt->count == 0) { - fsend_receive_comp = true; - channel = packet->xfer_page_pkt->channel; - transaction_id = packet->completion.recv.recv_completion_tid; - status = packet->xfer_page_pkt->status; - list_add_tail(&packet->xfer_page_pkt->list_ent, - &net_device->recv_pkt_list); - - } - - /* Put the packet back */ - list_add_tail(&packet->list_ent, &net_device->recv_pkt_list); - spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); - - /* Send a receive completion for the xfer page packet */ - if (fsend_receive_comp) - netvsc_send_recv_completion(device, channel, net_device, - transaction_id, status); - -} - static void netvsc_receive(struct netvsc_device *net_device, struct vmbus_channel *channel, struct hv_device *device, @@ -704,16 +641,13 @@ static void netvsc_receive(struct netvsc_device *net_device, { struct vmtransfer_page_packet_header *vmxferpage_packet; struct nvsp_message *nvsp_packet; - struct hv_netvsc_packet *netvsc_packet = NULL; - /* struct netvsc_driver *netvscDriver; */ - struct xferpage_packet *xferpage_packet = NULL; + struct hv_netvsc_packet nv_pkt; + struct hv_netvsc_packet *netvsc_packet = &nv_pkt; + u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; - unsigned long flags; struct net_device *ndev; - LIST_HEAD(listHead); - ndev = net_device->ndev; /* @@ -746,78 +680,14 @@ static void netvsc_receive(struct netvsc_device *net_device, return; } - /* - * Grab free packets (range count + 1) to represent this xfer - * page packet. +1 to represent the xfer page packet itself. - * We grab it here so that we know exactly how many we can - * fulfil - */ - spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); - while (!list_empty(&net_device->recv_pkt_list)) { - list_move_tail(net_device->recv_pkt_list.next, &listHead); - if (++count == vmxferpage_packet->range_cnt + 1) - break; - } - spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); - - /* - * We need at least 2 netvsc pkts (1 to represent the xfer - * page and at least 1 for the range) i.e. we can handled - * some of the xfer page packet ranges... - */ - if (count < 2) { - netdev_err(ndev, "Got only %d netvsc pkt...needed " - "%d pkts. Dropping this xfer page packet completely!\n", - count, vmxferpage_packet->range_cnt + 1); - - /* Return it to the freelist */ - spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); - for (i = count; i != 0; i--) { - list_move_tail(listHead.next, - &net_device->recv_pkt_list); - } - spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, - flags); - - netvsc_send_recv_completion(device, channel, net_device, - vmxferpage_packet->d.trans_id, - NVSP_STAT_FAIL); - - return; - } - - /* Remove the 1st packet to represent the xfer page packet itself */ - xferpage_packet = (struct xferpage_packet *)listHead.next; - list_del(&xferpage_packet->list_ent); - xferpage_packet->status = NVSP_STAT_SUCCESS; - xferpage_packet->channel = channel; - - /* This is how much we can satisfy */ - xferpage_packet->count = count - 1; - - if (xferpage_packet->count != vmxferpage_packet->range_cnt) { - netdev_err(ndev, "Needed %d netvsc pkts to satisfy " - "this xfer page...got %d\n", - vmxferpage_packet->range_cnt, xferpage_packet->count); - } + count = vmxferpage_packet->range_cnt; + netvsc_packet->device = device; + netvsc_packet->channel = channel; /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ - for (i = 0; i < (count - 1); i++) { - netvsc_packet = (struct hv_netvsc_packet *)listHead.next; - list_del(&netvsc_packet->list_ent); - + for (i = 0; i < count; i++) { /* Initialize the netvsc packet */ netvsc_packet->status = NVSP_STAT_SUCCESS; - netvsc_packet->xfer_page_pkt = xferpage_packet; - netvsc_packet->completion.recv.recv_completion = - netvsc_receive_completion; - netvsc_packet->completion.recv.recv_completion_ctx = - netvsc_packet; - netvsc_packet->device = device; - /* Save this so that we can send it back */ - netvsc_packet->completion.recv.recv_completion_tid = - vmxferpage_packet->d.trans_id; - netvsc_packet->data = (void *)((unsigned long)net_device-> recv_buf + vmxferpage_packet->ranges[i].byte_offset); netvsc_packet->total_data_buflen = @@ -826,10 +696,12 @@ static void netvsc_receive(struct netvsc_device *net_device, /* Pass it to the upper layer */ rndis_filter_receive(device, netvsc_packet); - netvsc_receive_completion(netvsc_packet-> - completion.recv.recv_completion_ctx); + if (netvsc_packet->status != NVSP_STAT_SUCCESS) + status = NVSP_STAT_FAIL; } + netvsc_send_recv_completion(device, channel, net_device, + vmxferpage_packet->d.trans_id, status); } @@ -956,11 +828,9 @@ void netvsc_channel_cb(void *context) int netvsc_device_add(struct hv_device *device, void *additional_info) { int ret = 0; - int i; int ring_size = ((struct netvsc_device_info *)additional_info)->ring_size; struct netvsc_device *net_device; - struct hv_netvsc_packet *packet, *pos; struct net_device *ndev; net_device = alloc_net_device(device); @@ -981,18 +851,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) ndev = net_device->ndev; /* Initialize the NetVSC channel extension */ - spin_lock_init(&net_device->recv_pkt_list_lock); - - INIT_LIST_HEAD(&net_device->recv_pkt_list); - - for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) { - packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL); - if (!packet) - break; - - list_add_tail(&packet->list_ent, - &net_device->recv_pkt_list); - } init_completion(&net_device->channel_init_wait); set_per_channel_state(device->channel, net_device->cb_buffer); @@ -1028,16 +886,8 @@ close: cleanup: - if (net_device) { - list_for_each_entry_safe(packet, pos, - &net_device->recv_pkt_list, - list_ent) { - list_del(&packet->list_ent); - kfree(packet); - } - + if (net_device) kfree(net_device); - } return ret; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 093cf3fc46b..8f6d53a2ed9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -638,7 +638,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), packet->vlan_tci); - skb_record_rx_queue(skb, packet->xfer_page_pkt->channel-> + skb_record_rx_queue(skb, packet->channel-> offermsg.offer.sub_channel_index % net->real_num_rx_queues); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index d92cfbe4341..48f5a0fbd67 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -401,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev, pkt->total_data_buflen = rndis_pkt->data_len; pkt->data = (void *)((unsigned long)pkt->data + data_offset); - pkt->is_data_pkt = true; - vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); if (vlan) { pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | -- cgit v1.2.3-70-g09d2 From 893f66277799cd46bdf97429cc5d16a815a51273 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 21 Apr 2014 14:54:44 -0700 Subject: hyperv: Simplify the send_completion variables The union contains only one member now, so we use the variables in it directly. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 10 +++------- drivers/net/hyperv/netvsc.c | 7 +++---- drivers/net/hyperv/netvsc_drv.c | 8 ++++---- drivers/net/hyperv/rndis_filter.c | 2 +- 4 files changed, 11 insertions(+), 16 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a1af0f7711e..d1f7826aa75 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -136,13 +136,9 @@ struct hv_netvsc_packet { u16 q_idx; struct vmbus_channel *channel; - union { - struct { - u64 send_completion_tid; - void *send_completion_ctx; - void (*send_completion)(void *context); - } send; - } completion; + u64 send_completion_tid; + void *send_completion_ctx; + void (*send_completion)(void *context); /* This points to the memory after page_buf */ struct rndis_message *rndis_msg; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b10334773b3..bbee4463503 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -479,9 +479,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device, if (nvsc_packet) { q_idx = nvsc_packet->q_idx; channel = nvsc_packet->channel; - nvsc_packet->completion.send.send_completion( - nvsc_packet->completion.send. - send_completion_ctx); + nvsc_packet->send_completion(nvsc_packet-> + send_completion_ctx); } num_outstanding_sends = @@ -534,7 +533,7 @@ int netvsc_send(struct hv_device *device, 0xFFFFFFFF; sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; - if (packet->completion.send.send_completion) + if (packet->send_completion) req_id = (ulong)packet; else req_id = 0; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 8f6d53a2ed9..c76b66515e9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -235,7 +235,7 @@ static void netvsc_xmit_completion(void *context) { struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; struct sk_buff *skb = (struct sk_buff *) - (unsigned long)packet->completion.send.send_completion_tid; + (unsigned long)packet->send_completion_tid; kfree(packet); @@ -425,9 +425,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) (num_data_pgs * sizeof(struct hv_page_buffer))); /* Set the completion routine */ - packet->completion.send.send_completion = netvsc_xmit_completion; - packet->completion.send.send_completion_ctx = packet; - packet->completion.send.send_completion_tid = (unsigned long)skb; + packet->send_completion = netvsc_xmit_completion; + packet->send_completion_ctx = packet; + packet->send_completion_tid = (unsigned long)skb; isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 48f5a0fbd67..99c527adae5 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -236,7 +236,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->page_buf[0].len; } - packet->completion.send.send_completion = NULL; + packet->send_completion = NULL; ret = netvsc_send(dev->net_dev->dev, packet); return ret; -- cgit v1.2.3-70-g09d2 From c25aaf814a63f9d9c4e45416f13d70ef0aa0be2e Mon Sep 17 00:00:00 2001 From: KY Srinivasan Date: Wed, 30 Apr 2014 10:14:31 -0700 Subject: hyperv: Enable sendbuf mechanism on the send path We send packets using a copy-free mechanism (this is the Guest to Host transport via VMBUS). While this is obviously optimal for large packets, it may not be optimal for small packets. Hyper-V host supports a second mechanism for sending packets that is "copy based". We implement that mechanism in this patch. In this version of the patch I have addressed a comment from David Miller. With this patch (and all of the other offload and VRSS patches), we are now able to almost saturate a 10G interface between Linux VMs on Hyper-V on different hosts - close to 9 Gbps as measured via iperf. Signed-off-by: K. Y. Srinivasan Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 14 +++ drivers/net/hyperv/netvsc.c | 226 ++++++++++++++++++++++++++++++++++++++-- drivers/net/hyperv/netvsc_drv.c | 3 +- 3 files changed, 234 insertions(+), 9 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d1f7826aa75..4b7df5a5c96 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -140,6 +140,8 @@ struct hv_netvsc_packet { void *send_completion_ctx; void (*send_completion)(void *context); + u32 send_buf_index; + /* This points to the memory after page_buf */ struct rndis_message *rndis_msg; @@ -582,6 +584,9 @@ struct nvsp_message { #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ +#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */ +#define NETVSC_INVALID_INDEX -1 + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe @@ -607,6 +612,15 @@ struct netvsc_device { u32 recv_section_cnt; struct nvsp_1_receive_buffer_section *recv_section; + /* Send buffer allocated by us */ + void *send_buf; + u32 send_buf_size; + u32 send_buf_gpadl_handle; + u32 send_section_cnt; + u32 send_section_size; + unsigned long *send_section_map; + int map_words; + /* Used for NetVSP initialization protocol */ struct completion channel_init_wait; struct nvsp_message channel_init_pkt; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index bbee4463503..c041f63a6d3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "hyperv_net.h" @@ -80,7 +81,7 @@ get_in_err: } -static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) +static int netvsc_destroy_buf(struct netvsc_device *net_device) { struct nvsp_message *revoke_packet; int ret = 0; @@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) net_device->recv_section = NULL; } + /* Deal with the send buffer we may have setup. + * If we got a send section size, it means we received a + * SendsendBufferComplete msg (ie sent + * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need + * to send a revoke msg here + */ + if (net_device->send_section_size) { + /* Send the revoke receive buffer */ + revoke_packet = &net_device->revoke_packet; + memset(revoke_packet, 0, sizeof(struct nvsp_message)); + + revoke_packet->hdr.msg_type = + NVSP_MSG1_TYPE_REVOKE_SEND_BUF; + revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0; + + ret = vmbus_sendpacket(net_device->dev->channel, + revoke_packet, + sizeof(struct nvsp_message), + (unsigned long)revoke_packet, + VM_PKT_DATA_INBAND, 0); + /* If we failed here, we might as well return and + * have a leak rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, "unable to send " + "revoke send buffer to netvsp\n"); + return ret; + } + } + /* Teardown the gpadl on the vsp end */ + if (net_device->send_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(net_device->dev->channel, + net_device->send_buf_gpadl_handle); + + /* If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, + "unable to teardown send buffer's gpadl\n"); + return ret; + } + net_device->recv_buf_gpadl_handle = 0; + } + if (net_device->send_buf) { + /* Free up the receive buffer */ + free_pages((unsigned long)net_device->send_buf, + get_order(net_device->send_buf_size)); + net_device->send_buf = NULL; + } + kfree(net_device->send_section_map); + return ret; } -static int netvsc_init_recv_buf(struct hv_device *device) +static int netvsc_init_buf(struct hv_device *device) { int ret = 0; int t; @@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device) goto cleanup; } + /* Now setup the send buffer. + */ + net_device->send_buf = + (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, + get_order(net_device->send_buf_size)); + if (!net_device->send_buf) { + netdev_err(ndev, "unable to allocate send " + "buffer of size %d\n", net_device->send_buf_size); + ret = -ENOMEM; + goto cleanup; + } + + /* Establish the gpadl handle for this buffer on this + * channel. Note: This call uses the vmbus connection rather + * than the channel to establish the gpadl handle. + */ + ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, + net_device->send_buf_size, + &net_device->send_buf_gpadl_handle); + if (ret != 0) { + netdev_err(ndev, + "unable to establish send buffer's gpadl\n"); + goto cleanup; + } + + /* Notify the NetVsp of the gpadl handle */ + init_packet = &net_device->channel_init_pkt; + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; + init_packet->msg.v1_msg.send_recv_buf.gpadl_handle = + net_device->send_buf_gpadl_handle; + init_packet->msg.v1_msg.send_recv_buf.id = 0; + + /* Send the gpadl notification request */ + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret != 0) { + netdev_err(ndev, + "unable to send send buffer's gpadl to netvsp\n"); + goto cleanup; + } + + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + BUG_ON(t == 0); + + /* Check the response */ + if (init_packet->msg.v1_msg. + send_send_buf_complete.status != NVSP_STAT_SUCCESS) { + netdev_err(ndev, "Unable to complete send buffer " + "initialization with NetVsp - status %d\n", + init_packet->msg.v1_msg. + send_recv_buf_complete.status); + ret = -EINVAL; + goto cleanup; + } + + /* Parse the response */ + net_device->send_section_size = init_packet->msg. + v1_msg.send_send_buf_complete.section_size; + + /* Section count is simply the size divided by the section size. + */ + net_device->send_section_cnt = + net_device->send_buf_size/net_device->send_section_size; + + dev_info(&device->device, "Send section size: %d, Section count:%d\n", + net_device->send_section_size, net_device->send_section_cnt); + + /* Setup state for managing the send buffer. */ + net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, + BITS_PER_LONG); + + net_device->send_section_map = + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); + if (net_device->send_section_map == NULL) + goto cleanup; + goto exit; cleanup: - netvsc_destroy_recv_buf(net_device); + netvsc_destroy_buf(net_device); exit: return ret; @@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device) net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; else net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; - ret = netvsc_init_recv_buf(device); + ret = netvsc_init_buf(device); cleanup: return ret; @@ -378,7 +512,7 @@ cleanup: static void netvsc_disconnect_vsp(struct netvsc_device *net_device) { - netvsc_destroy_recv_buf(net_device); + netvsc_destroy_buf(net_device); } /* @@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent( return avail_write * 100 / ring_info->ring_datasize; } +static inline void netvsc_free_send_slot(struct netvsc_device *net_device, + u32 index) +{ + sync_change_bit(index, net_device->send_section_map); +} + static void netvsc_send_completion(struct netvsc_device *net_device, struct hv_device *device, struct vmpacket_descriptor *packet) @@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, struct nvsp_message *nvsp_packet; struct hv_netvsc_packet *nvsc_packet; struct net_device *ndev; + u32 send_index; ndev = net_device->ndev; @@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, /* Notify the layer above us */ if (nvsc_packet) { + send_index = nvsc_packet->send_buf_index; + if (send_index != NETVSC_INVALID_INDEX) + netvsc_free_send_slot(net_device, send_index); q_idx = nvsc_packet->q_idx; channel = nvsc_packet->channel; nvsc_packet->send_completion(nvsc_packet-> @@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device, } +static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) +{ + unsigned long index; + u32 max_words = net_device->map_words; + unsigned long *map_addr = (unsigned long *)net_device->send_section_map; + u32 section_cnt = net_device->send_section_cnt; + int ret_val = NETVSC_INVALID_INDEX; + int i; + int prev_val; + + for (i = 0; i < max_words; i++) { + if (!~(map_addr[i])) + continue; + index = ffz(map_addr[i]); + prev_val = sync_test_and_set_bit(index, &map_addr[i]); + if (prev_val) + continue; + if ((index + (i * BITS_PER_LONG)) >= section_cnt) + break; + ret_val = (index + (i * BITS_PER_LONG)); + break; + } + return ret_val; +} + +u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, + unsigned int section_index, + struct hv_netvsc_packet *packet) +{ + char *start = net_device->send_buf; + char *dest = (start + (section_index * net_device->send_section_size)); + int i; + u32 msg_size = 0; + + for (i = 0; i < packet->page_buf_cnt; i++) { + char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); + u32 offset = packet->page_buf[i].offset; + u32 len = packet->page_buf[i].len; + + memcpy(dest, (src + offset), len); + msg_size += len; + dest += len; + } + return msg_size; +} + int netvsc_send(struct hv_device *device, struct hv_netvsc_packet *packet) { @@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device, struct net_device *ndev; struct vmbus_channel *out_channel = NULL; u64 req_id; + unsigned int section_index = NETVSC_INVALID_INDEX; + u32 msg_size = 0; + struct sk_buff *skb; + net_device = get_outbound_net_device(device); if (!net_device) @@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device, sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; } - /* Not using send buffer section */ + /* Attempt to send via sendbuf */ + if (packet->total_data_buflen < net_device->send_section_size) { + section_index = netvsc_get_next_send_section(net_device); + if (section_index != NETVSC_INVALID_INDEX) { + msg_size = netvsc_copy_to_send_buf(net_device, + section_index, + packet); + skb = (struct sk_buff *) + (unsigned long)packet->send_completion_tid; + if (skb) + dev_kfree_skb_any(skb); + packet->page_buf_cnt = 0; + } + } + packet->send_buf_index = section_index; + + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = - 0xFFFFFFFF; - sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + section_index; + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size; if (packet->send_completion) req_id = (ulong)packet; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c76b66515e9..939e3af60ec 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context) struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; struct sk_buff *skb = (struct sk_buff *) (unsigned long)packet->send_completion_tid; + u32 index = packet->send_buf_index; kfree(packet); - if (skb) + if (skb && (index == NETVSC_INVALID_INDEX)) dev_kfree_skb_any(skb); } -- cgit v1.2.3-70-g09d2 From 307f099520b66504cf6c5638f3f404c48b9fb45b Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 21 May 2014 12:55:39 -0700 Subject: hyperv: Add hash value into RNDIS Per-packet info It passes the hash value as the RNDIS Per-packet info to the Hyper-V host, so that the send completion notices can be spread across multiple channels. MS-TFS: 140273 Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 4 ++++ drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 4b7df5a5c96..6cc37c15e0b 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -791,6 +791,7 @@ enum ndis_per_pkt_info_type { IEEE_8021Q_INFO, ORIGINAL_PKTINFO, PACKET_CANCEL_ID, + NBL_HASH_VALUE = PACKET_CANCEL_ID, ORIGINAL_NET_BUFLIST, CACHED_NET_BUFLIST, SHORT_PKT_PADINFO, @@ -937,6 +938,9 @@ struct ndis_tcp_lso_info { #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ sizeof(struct ndis_tcp_lso_info)) +#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(u32)) + /* Format of Information buffer passed in a SetRequest for the OID */ /* OID_GEN_RNDIS_CONFIG_PARAMETER. */ struct rndis_config_parameter_info { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2e967a7bdb3..4fd71b75e66 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -224,9 +224,11 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) return 0; - if (netvsc_set_hash(&hash, skb)) + if (netvsc_set_hash(&hash, skb)) { q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % ndev->real_num_tx_queues; + skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); + } return q_idx; } @@ -384,6 +386,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct ndis_tcp_lso_info *lso_info; int hdr_offset; u32 net_trans_info; + u32 hash; /* We will atmost need two pages to describe the rndis @@ -402,9 +405,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) packet = kzalloc(sizeof(struct hv_netvsc_packet) + (num_data_pgs * sizeof(struct hv_page_buffer)) + sizeof(struct rndis_message) + - NDIS_VLAN_PPI_SIZE + - NDIS_CSUM_PPI_SIZE + - NDIS_LSO_PPI_SIZE, GFP_ATOMIC); + NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE + + NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC); if (!packet) { /* out of memory, drop packet */ netdev_err(net, "unable to allocate hv_netvsc_packet\n"); @@ -443,6 +445,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); + hash = skb_get_hash_raw(skb); + if (hash != 0 && net->real_num_tx_queues > 1) { + rndis_msg_size += NDIS_HASH_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, + NBL_HASH_VALUE); + *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; + } + if (isvlan) { struct ndis_pkt_8021q_info *vlan; -- cgit v1.2.3-70-g09d2