From ae5e8127b712313ec1b99356019ce9226fea8b88 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 25 Nov 2013 16:52:34 +0000
Subject: xen-netback: include definition of csum_ipv6_magic

We are now using csum_ipv6_magic, include the appropriate header.
Avoids the following error:

    drivers/net/xen-netback/netback.c:1313:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration]
        tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 919b6509455..64f0e0d18b8 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -39,6 +39,7 @@
 #include <linux/udp.h>
 
 #include <net/tcp.h>
+#include <net/ip6_checksum.h>
 
 #include <xen/xen.h>
 #include <xen/events.h>
-- 
cgit v1.2.3-70-g09d2


From 1431fb31ecbaba1b5718006128f0f2ed0b94e1c3 Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Tue, 3 Dec 2013 17:39:29 +0000
Subject: xen-netback: fix fragment detection in checksum setup

The code to detect fragments in checksum_setup() was missing for IPv4 and
too eager for IPv6. (It transpires that Windows seems to send IPv6 packets
with a fragment header even if they are not a fragment - i.e. offset is zero,
and M bit is not set).

This patch also incorporates a fix to callers of maybe_pull_tail() where
skb->network_header was being erroneously added to the length argument.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
cc: David Miller <davem@davemloft.net>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 236 ++++++++++++++++++++++----------------
 include/linux/ipv6.h              |   1 +
 include/net/ipv6.h                |   3 +-
 3 files changed, 140 insertions(+), 100 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 64f0e0d18b8..acf13920e6d 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1149,49 +1149,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
 	return 0;
 }
 
-static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
+static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len,
+				  unsigned int max)
 {
-	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
-		/* If we need to pullup then pullup to the max, so we
-		 * won't need to do it again.
-		 */
-		int target = min_t(int, skb->len, MAX_TCP_HEADER);
-		__pskb_pull_tail(skb, target - skb_headlen(skb));
-	}
+	if (skb_headlen(skb) >= len)
+		return 0;
+
+	/* If we need to pullup then pullup to the max, so we
+	 * won't need to do it again.
+	 */
+	if (max > skb->len)
+		max = skb->len;
+
+	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	if (skb_headlen(skb) < len)
+		return -EPROTO;
+
+	return 0;
 }
 
+/* This value should be large enough to cover a tagged ethernet header plus
+ * maximally sized IP and TCP or UDP headers.
+ */
+#define MAX_IP_HDR_LEN 128
+
 static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			     int recalculate_partial_csum)
 {
-	struct iphdr *iph = (void *)skb->data;
-	unsigned int header_size;
 	unsigned int off;
-	int err = -EPROTO;
+	bool fragment;
+	int err;
+
+	fragment = false;
+
+	err = maybe_pull_tail(skb,
+			      sizeof(struct iphdr),
+			      MAX_IP_HDR_LEN);
+	if (err < 0)
+		goto out;
 
-	off = sizeof(struct iphdr);
+	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
+		fragment = true;
 
-	header_size = skb->network_header + off + MAX_IPOPTLEN;
-	maybe_pull_tail(skb, header_size);
+	off = ip_hdrlen(skb);
 
-	off = iph->ihl * 4;
+	err = -EPROTO;
 
-	switch (iph->protocol) {
+	switch (ip_hdr(skb)->protocol) {
 	case IPPROTO_TCP:
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = tcp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct tcphdr);
-			maybe_pull_tail(skb, header_size);
-
-			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - off,
-							 IPPROTO_TCP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct tcphdr),
+					      MAX_IP_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
@@ -1200,24 +1223,20 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct udphdr *udph = udp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct udphdr);
-			maybe_pull_tail(skb, header_size);
-
-			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - off,
-							 IPPROTO_UDP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct udphdr),
+					      MAX_IP_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			udp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			netdev_err(vif->dev,
-				   "Attempting to checksum a non-TCP/UDP packet, "
-				   "dropping a protocol %d packet\n",
-				   iph->protocol);
 		goto out;
 	}
 
@@ -1227,75 +1246,99 @@ out:
 	return err;
 }
 
+/* This value should be large enough to cover a tagged ethernet header plus
+ * an IPv6 header, all options, and a maximal TCP or UDP header.
+ */
+#define MAX_IPV6_HDR_LEN 256
+
+#define OPT_HDR(type, skb, off) \
+	(type *)(skb_network_header(skb) + (off))
+
 static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			       int recalculate_partial_csum)
 {
-	int err = -EPROTO;
-	struct ipv6hdr *ipv6h = (void *)skb->data;
+	int err;
 	u8 nexthdr;
-	unsigned int header_size;
 	unsigned int off;
+	unsigned int len;
 	bool fragment;
 	bool done;
 
+	fragment = false;
 	done = false;
 
 	off = sizeof(struct ipv6hdr);
 
-	header_size = skb->network_header + off;
-	maybe_pull_tail(skb, header_size);
+	err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
+	if (err < 0)
+		goto out;
 
-	nexthdr = ipv6h->nexthdr;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
 
-	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
-	       !done) {
+	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+	while (off <= len && !done) {
 		switch (nexthdr) {
 		case IPPROTO_DSTOPTS:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING: {
-			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+			struct ipv6_opt_hdr *hp;
 
-			header_size = skb->network_header +
-				off +
-				sizeof(struct ipv6_opt_hdr);
-			maybe_pull_tail(skb, header_size);
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct ipv6_opt_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
 
+			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
 			nexthdr = hp->nexthdr;
 			off += ipv6_optlen(hp);
 			break;
 		}
 		case IPPROTO_AH: {
-			struct ip_auth_hdr *hp = (void *)(skb->data + off);
+			struct ip_auth_hdr *hp;
+
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct ip_auth_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			hp = OPT_HDR(struct ip_auth_hdr, skb, off);
+			nexthdr = hp->nexthdr;
+			off += ipv6_authlen(hp);
+			break;
+		}
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr *hp;
 
-			header_size = skb->network_header +
-				off +
-				sizeof(struct ip_auth_hdr);
-			maybe_pull_tail(skb, header_size);
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct frag_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			hp = OPT_HDR(struct frag_hdr, skb, off);
+
+			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
+				fragment = true;
 
 			nexthdr = hp->nexthdr;
-			off += (hp->hdrlen+2)<<2;
+			off += sizeof(struct frag_hdr);
 			break;
 		}
-		case IPPROTO_FRAGMENT:
-			fragment = true;
-			/* fall through */
 		default:
 			done = true;
 			break;
 		}
 	}
 
-	if (!done) {
-		if (net_ratelimit())
-			netdev_err(vif->dev, "Failed to parse packet header\n");
-		goto out;
-	}
+	err = -EPROTO;
 
-	if (fragment) {
-		if (net_ratelimit())
-			netdev_err(vif->dev, "Packet is a fragment!\n");
+	if (!done || fragment)
 		goto out;
-	}
 
 	switch (nexthdr) {
 	case IPPROTO_TCP:
@@ -1304,17 +1347,17 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = tcp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct tcphdr);
-			maybe_pull_tail(skb, header_size);
-
-			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
-						       &ipv6h->daddr,
-						       skb->len - off,
-						       IPPROTO_TCP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct tcphdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 skb->len - off,
+						 IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
@@ -1323,25 +1366,20 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct udphdr *udph = udp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct udphdr);
-			maybe_pull_tail(skb, header_size);
-
-			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
-						       &ipv6h->daddr,
-						       skb->len - off,
-						       IPPROTO_UDP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct udphdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			udp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 skb->len - off,
+						 IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			netdev_err(vif->dev,
-				   "Attempting to checksum a non-TCP/UDP packet, "
-				   "dropping a protocol %d packet\n",
-				   nexthdr);
 		goto out;
 	}
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5d89d1b808a..c56c350324e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -4,6 +4,7 @@
 #include <uapi/linux/ipv6.h>
 
 #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
+#define ipv6_authlen(p) (((p)->hdrlen+2) << 2)
 /*
  * This structure contains configuration options per IPv6 link.
  */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index eb198acaac1..488316e339a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -110,7 +110,8 @@ struct frag_hdr {
 	__be32	identification;
 };
 
-#define	IP6_MF	0x0001
+#define	IP6_MF		0x0001
+#define	IP6_OFFSET	0xFFF8
 
 #include <net/sock.h>
 
-- 
cgit v1.2.3-70-g09d2


From d52eb0d46f3606b9de9965cebb2beb2202a0dc62 Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Wed, 11 Dec 2013 16:37:40 +0000
Subject: xen-netback: make sure skb linear area covers checksum field

skb_partial_csum_set requires that the linear area of the skb covers the
checksum field. The checksum setup code in netback was only doing that
pullup in the case when the pseudo header checksum was being recalculated
though. This patch makes that pullup unconditional. (I pullup the whole
transport header just for simplicity; the requirement is only for the check
field but in the case of UDP this is the last field in the header and in the
case of TCP it's the last but one).

The lack of pullup manifested as failures running Microsoft HCK network
tests on a pair of Windows 8 VMs and it has been verified that this patch
fixes the problem.

Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 60 ++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index acf13920e6d..d158fc40cff 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1199,42 +1199,40 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 
 	switch (ip_hdr(skb)->protocol) {
 	case IPPROTO_TCP:
+		err = maybe_pull_tail(skb,
+				      off + sizeof(struct tcphdr),
+				      MAX_IP_HDR_LEN);
+		if (err < 0)
+			goto out;
+
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
-		if (recalculate_partial_csum) {
-			err = maybe_pull_tail(skb,
-					      off + sizeof(struct tcphdr),
-					      MAX_IP_HDR_LEN);
-			if (err < 0)
-				goto out;
-
+		if (recalculate_partial_csum)
 			tcp_hdr(skb)->check =
 				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
 						   ip_hdr(skb)->daddr,
 						   skb->len - off,
 						   IPPROTO_TCP, 0);
-		}
 		break;
 	case IPPROTO_UDP:
+		err = maybe_pull_tail(skb,
+				      off + sizeof(struct udphdr),
+				      MAX_IP_HDR_LEN);
+		if (err < 0)
+			goto out;
+
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct udphdr, check)))
 			goto out;
 
-		if (recalculate_partial_csum) {
-			err = maybe_pull_tail(skb,
-					      off + sizeof(struct udphdr),
-					      MAX_IP_HDR_LEN);
-			if (err < 0)
-				goto out;
-
+		if (recalculate_partial_csum)
 			udp_hdr(skb)->check =
 				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
 						   ip_hdr(skb)->daddr,
 						   skb->len - off,
 						   IPPROTO_UDP, 0);
-		}
 		break;
 	default:
 		goto out;
@@ -1342,42 +1340,40 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 
 	switch (nexthdr) {
 	case IPPROTO_TCP:
+		err = maybe_pull_tail(skb,
+				      off + sizeof(struct tcphdr),
+				      MAX_IPV6_HDR_LEN);
+		if (err < 0)
+			goto out;
+
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
-		if (recalculate_partial_csum) {
-			err = maybe_pull_tail(skb,
-					      off + sizeof(struct tcphdr),
-					      MAX_IPV6_HDR_LEN);
-			if (err < 0)
-				goto out;
-
+		if (recalculate_partial_csum)
 			tcp_hdr(skb)->check =
 				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						 &ipv6_hdr(skb)->daddr,
 						 skb->len - off,
 						 IPPROTO_TCP, 0);
-		}
 		break;
 	case IPPROTO_UDP:
+		err = maybe_pull_tail(skb,
+				      off + sizeof(struct udphdr),
+				      MAX_IPV6_HDR_LEN);
+		if (err < 0)
+			goto out;
+
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct udphdr, check)))
 			goto out;
 
-		if (recalculate_partial_csum) {
-			err = maybe_pull_tail(skb,
-					      off + sizeof(struct udphdr),
-					      MAX_IPV6_HDR_LEN);
-			if (err < 0)
-				goto out;
-
+		if (recalculate_partial_csum)
 			udp_hdr(skb)->check =
 				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						 &ipv6_hdr(skb)->daddr,
 						 skb->len - off,
 						 IPPROTO_UDP, 0);
-		}
 		break;
 	default:
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From 10574059ce0451c6572c85329c772aa15085f8eb Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Wed, 11 Dec 2013 10:57:15 +0000
Subject: xen-netback: napi: fix abuse of budget

netback seems to be somewhat confused about the napi budget parameter. The
parameter is supposed to limit the number of skbs processed in each poll,
but netback has this confused with grant operations.

This patch fixes that, properly limiting the work done in each poll. Note
that this limit makes sure we do not process any more data from the shared
ring than we intend to pass back from the poll. This is important to
prevent tx_queue potentially growing without bound.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index d158fc40cff..db79e29b3d0 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1445,14 +1445,15 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 	return false;
 }
 
-static unsigned xenvif_tx_build_gops(struct xenvif *vif)
+static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 {
 	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
 	struct sk_buff *skb;
 	int ret;
 
 	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
-		< MAX_PENDING_REQS)) {
+		< MAX_PENDING_REQS) &&
+	       (skb_queue_len(&vif->tx_queue) < budget)) {
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
 		struct page *page;
@@ -1614,14 +1615,13 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif)
 }
 
 
-static int xenvif_tx_submit(struct xenvif *vif, int budget)
+static int xenvif_tx_submit(struct xenvif *vif)
 {
 	struct gnttab_copy *gop = vif->tx_copy_ops;
 	struct sk_buff *skb;
 	int work_done = 0;
 
-	while (work_done < budget &&
-	       (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
+	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
 		u16 pending_idx;
 		unsigned data_len;
@@ -1696,14 +1696,14 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
 	if (unlikely(!tx_work_todo(vif)))
 		return 0;
 
-	nr_gops = xenvif_tx_build_gops(vif);
+	nr_gops = xenvif_tx_build_gops(vif, budget);
 
 	if (nr_gops == 0)
 		return 0;
 
 	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
 
-	work_done = xenvif_tx_submit(vif, nr_gops);
+	work_done = xenvif_tx_submit(vif);
 
 	return work_done;
 }
-- 
cgit v1.2.3-70-g09d2


From d9601a36ffdb5c142697bef1228afb5ba4ee4003 Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Wed, 11 Dec 2013 10:57:16 +0000
Subject: xen-netback: napi: don't prematurely request a tx event

This patch changes the RING_FINAL_CHECK_FOR_REQUESTS in
xenvif_build_tx_gops to a check for RING_HAS_UNCONSUMED_REQUESTS as the
former call has the side effect of advancing the ring event pointer and
therefore inviting another interrupt from the frontend before the napi
poll has actually finished, thereby defeating the point of napi.

The event pointer is updated by RING_FINAL_CHECK_FOR_REQUESTS in
xenvif_poll, the napi poll function, if the work done is less than the
budget i.e. when actually transitioning back to interrupt mode.

Reported-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index db79e29b3d0..33b8aa612d1 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1475,7 +1475,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 			continue;
 		}
 
-		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
+		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
 		if (!work_to_do)
 			break;
 
-- 
cgit v1.2.3-70-g09d2


From a3314f3d40215349ab2427800c1e10676691389f Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Thu, 12 Dec 2013 14:20:13 +0000
Subject: xen-netback: fix gso_prefix check

There is a mistake in checking the gso_prefix mask when passing large
packets to a guest. The wrong shift is applied to the bit - the raw skb
gso type is used rather then the translated one. This leads to large packets
being handed to the guest without the GSO metadata. This patch fixes the
check.

The mistake manifested as errors whilst running Microsoft HCK large packet
offload tests between a pair of Windows 8 VMs. I have verified this patch
fixes those errors.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 33b8aa612d1..e884ee1fe7e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -452,7 +452,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 	}
 
 	/* Set up a GSO prefix descriptor, if necessary */
-	if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) {
+	if ((1 << gso_type) & vif->gso_prefix_mask) {
 		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 		meta = npo->meta + npo->meta_prod++;
 		meta->gso_type = gso_type;
-- 
cgit v1.2.3-70-g09d2


From 7022ef8b2a673db3d12a348331181f579afe9b22 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 16 Dec 2013 10:45:05 +0800
Subject: xen-netback: fix fragments error handling in checksum_setup_ip()

Fix to return -EPROTO error if fragments detected in checksum_setup_ip().

Fixes: 1431fb31ecba ('xen-netback: fix fragment detection in checksum setup')
Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e884ee1fe7e..27bbe58dcbe 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1197,6 +1197,9 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 
 	err = -EPROTO;
 
+	if (fragment)
+		goto out;
+
 	switch (ip_hdr(skb)->protocol) {
 	case IPPROTO_TCP:
 		err = maybe_pull_tail(skb,
-- 
cgit v1.2.3-70-g09d2


From 0c8d087c04cdcef501064552149289866e53aa6c Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyj.lk@gmail.com>
Date: Tue, 17 Dec 2013 10:42:09 +0800
Subject: xen-netback: fix some error return code

'err' is overwrited to 0 after maybe_pull_tail() call, so the error
code was not set if skb_partial_csum_set() call failed. Fix to return
error -EPROTO from those error handling case instead of 0.

Fixes: d52eb0d46f36 ('xen-netback: make sure skb linear area covers checksum field')
Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 27bbe58dcbe..7b4fd93be76 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1209,8 +1209,10 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (!skb_partial_csum_set(skb, off,
-					  offsetof(struct tcphdr, check)))
+					  offsetof(struct tcphdr, check))) {
+			err = -EPROTO;
 			goto out;
+		}
 
 		if (recalculate_partial_csum)
 			tcp_hdr(skb)->check =
@@ -1227,8 +1229,10 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (!skb_partial_csum_set(skb, off,
-					  offsetof(struct udphdr, check)))
+					  offsetof(struct udphdr, check))) {
+			err = -EPROTO;
 			goto out;
+		}
 
 		if (recalculate_partial_csum)
 			udp_hdr(skb)->check =
@@ -1350,8 +1354,10 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (!skb_partial_csum_set(skb, off,
-					  offsetof(struct tcphdr, check)))
+					  offsetof(struct tcphdr, check))) {
+			err = -EPROTO;
 			goto out;
+		}
 
 		if (recalculate_partial_csum)
 			tcp_hdr(skb)->check =
@@ -1368,8 +1374,10 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (!skb_partial_csum_set(skb, off,
-					  offsetof(struct udphdr, check)))
+					  offsetof(struct udphdr, check))) {
+			err = -EPROTO;
 			goto out;
+		}
 
 		if (recalculate_partial_csum)
 			udp_hdr(skb)->check =
-- 
cgit v1.2.3-70-g09d2


From ac3d5ac277352fe6e27809286768e9f1f8aa388d Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Mon, 23 Dec 2013 09:27:17 +0000
Subject: xen-netback: fix guest-receive-side array sizes

The sizes chosen for the metadata and grant_copy_op arrays on the guest
receive size are wrong;

- The meta array is needlessly twice the ring size, when we only ever
  consume a single array element per RX ring slot
- The grant_copy_op array is way too small. It's sized based on a bogus
  assumption: that at most two copy ops will be used per ring slot. This
  may have been true at some point in the past but it's clear from looking
  at start_new_rx_buffer() that a new ring slot is only consumed if a frag
  would overflow the current slot (plus some other conditions) so the actual
  limit is MAX_SKB_FRAGS grant_copy_ops per ring slot.

This patch fixes those two sizing issues and, because grant_copy_ops grows
so much, it pulls it out into a separate chunk of vmalloc()ed memory.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/common.h    | 19 +++++++++++++------
 drivers/net/xen-netback/interface.c | 10 ++++++++++
 drivers/net/xen-netback/netback.c   |  2 +-
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 08ae01b41c8..c47794b9d42 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -101,6 +101,13 @@ struct xenvif_rx_meta {
 
 #define MAX_PENDING_REQS 256
 
+/* It's possible for an skb to have a maximal number of frags
+ * but still be less than MAX_BUFFER_OFFSET in size. Thus the
+ * worst-case number of copy operations is MAX_SKB_FRAGS per
+ * ring slot.
+ */
+#define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE)
+
 struct xenvif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
@@ -143,13 +150,13 @@ struct xenvif {
 	 */
 	RING_IDX rx_req_cons_peek;
 
-	/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
-	 * head/fragment page uses 2 copy operations because it
-	 * straddles two buffers in the frontend.
-	 */
-	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
-	struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
+	/* This array is allocated seperately as it is large */
+	struct gnttab_copy *grant_copy_op;
 
+	/* We create one meta structure per ring request we consume, so
+	 * the maximum number is the same as the ring size.
+	 */
+	struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE];
 
 	u8               fe_dev_addr[6];
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 870f1fa5837..34ca4e58a43 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -307,6 +307,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	SET_NETDEV_DEV(dev, parent);
 
 	vif = netdev_priv(dev);
+
+	vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) *
+				     MAX_GRANT_COPY_OPS);
+	if (vif->grant_copy_op == NULL) {
+		pr_warn("Could not allocate grant copy space for %s\n", name);
+		free_netdev(dev);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	vif->domid  = domid;
 	vif->handle = handle;
 	vif->can_sg = 1;
@@ -487,6 +496,7 @@ void xenvif_free(struct xenvif *vif)
 
 	unregister_netdev(vif->dev);
 
+	vfree(vif->grant_copy_op);
 	free_netdev(vif->dev);
 
 	module_put(THIS_MODULE);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 7b4fd93be76..78425554a53 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -608,7 +608,7 @@ void xenvif_rx_action(struct xenvif *vif)
 	if (!npo.copy_prod)
 		return;
 
-	BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op));
+	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
 	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
 
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-- 
cgit v1.2.3-70-g09d2