From 11ef7a8996d5d433c9cd75d80651297eccbf6d42 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 30 Jun 2014 09:50:40 -0700
Subject: net: Performance fix for process_backlog

In process_backlog the input_pkt_queue is only checked once for new
packets and quota is artificially reduced to reflect precisely the
number of packets on the input_pkt_queue so that the loop exits
appropriately.

This patches changes the behavior to be more straightforward and
less convoluted. Packets are processed until either the quota
is met or there are no more packets to process.

This patch seems to provide a small, but noticeable performance
improvement. The performance improvement is a result of staying
in the process_backlog loop longer which can reduce number of IPI's.

Performance data using super_netperf TCP_RR with 200 flows:

Before fix:

88.06% CPU utilization
125/190/309 90/95/99% latencies
1.46808e+06 tps
1145382 intrs.sec.

With fix:

87.73% CPU utilization
122/183/296 90/95/99% latencies
1.4921e+06 tps
1021674.30 intrs./sec.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 30eedf67791..77c19c7bb49 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4227,9 +4227,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
 #endif
 	napi->weight = weight_p;
 	local_irq_disable();
-	while (work < quota) {
+	while (1) {
 		struct sk_buff *skb;
-		unsigned int qlen;
 
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			local_irq_enable();
@@ -4243,24 +4242,24 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		}
 
 		rps_lock(sd);
-		qlen = skb_queue_len(&sd->input_pkt_queue);
-		if (qlen)
-			skb_queue_splice_tail_init(&sd->input_pkt_queue,
-						   &sd->process_queue);
-
-		if (qlen < quota - work) {
+		if (skb_queue_empty(&sd->input_pkt_queue)) {
 			/*
 			 * Inline a custom version of __napi_complete().
 			 * only current cpu owns and manipulates this napi,
-			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
-			 * we can use a plain write instead of clear_bit(),
+			 * and NAPI_STATE_SCHED is the only possible flag set
+			 * on backlog.
+			 * We can use a plain write instead of clear_bit(),
 			 * and we dont need an smp_mb() memory barrier.
 			 */
 			list_del(&napi->poll_list);
 			napi->state = 0;
+			rps_unlock(sd);
 
-			quota = work + qlen;
+			break;
 		}
+
+		skb_queue_splice_tail_init(&sd->input_pkt_queue,
+					   &sd->process_queue);
 		rps_unlock(sd);
 	}
 	local_irq_enable();
-- 
cgit v1.2.3-70-g09d2


From 54951194656e4853e441266fd095f880bc0398f3 Mon Sep 17 00:00:00 2001
From: Loic Prylli <loicp@google.com>
Date: Tue, 1 Jul 2014 21:39:43 -0700
Subject: net: Fix NETDEV_CHANGE notifier usage causing spurious arp flush

A bug was introduced in NETDEV_CHANGE notifier sequence causing the
arp table to be sometimes spuriously cleared (including manual arp
entries marked permanent), upon network link carrier changes.

The changed argument for the notifier was applied only to a single
caller of NETDEV_CHANGE, missing among others netdev_state_change().
So upon net_carrier events induced by the network, which are
triggering a call to netdev_state_change(), arp_netdev_event() would
decide whether to clear or not arp cache based on random/junk stack
values (a kind of read buffer overflow).

Fixes: be9efd365328 ("net: pass changed flags along with NETDEV_CHANGE event")
Fixes: 6c8b4e3ff81b ("arp: flush arp cache on IFF_NOARP change")
Signed-off-by: Loic Prylli <loicp@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 77c19c7bb49..7990984ca36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -148,6 +148,9 @@ struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
 
 static int netif_rx_internal(struct sk_buff *skb);
+static int call_netdevice_notifiers_info(unsigned long val,
+					 struct net_device *dev,
+					 struct netdev_notifier_info *info);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -1207,7 +1210,11 @@ EXPORT_SYMBOL(netdev_features_change);
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
-		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+		struct netdev_notifier_change_info change_info;
+
+		change_info.flags_changed = 0;
+		call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+					      &change_info.info);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From c3caf1192f904de2f1381211f564537235d50de3 Mon Sep 17 00:00:00 2001
From: Jerry Chu <hkchu@google.com>
Date: Mon, 14 Jul 2014 15:54:46 -0700
Subject: net-gre-gro: Fix a bug that breaks the forwarding path

Fixed a bug that was introduced by my GRE-GRO patch
(bf5a755f5e9186406bbf50f4087100af5bd68e40 net-gre-gro: Add GRE
support to the GRO stack) that breaks the forwarding path
because various GSO related fields were not set. The bug will
cause on the egress path either the GSO code to fail, or a
GRE-TSO capable (NETIF_F_GSO_GRE) NICs to choke. The following
fix has been tested for both cases.

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c           | 2 ++
 net/ipv4/af_inet.c       | 3 +++
 net/ipv4/gre_offload.c   | 3 +++
 net/ipv4/tcp_offload.c   | 2 +-
 net/ipv6/tcpv6_offload.c | 2 +-
 5 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7990984ca36..367a586d0c8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4096,6 +4096,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
+	skb->encapsulation = 0;
+	skb_shinfo(skb)->gso_type = 0;
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 
 	napi->skb = skb;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d5e6836cf77..d156b3c5f36 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1429,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
 	int proto = iph->protocol;
 	int err = -ENOSYS;
 
+	if (skb->encapsulation)
+		skb_set_inner_network_header(skb, nhoff);
+
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
 
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index eb92deb1266..f0bdd47bbbc 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -263,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
 	int err = -ENOENT;
 	__be16 type;
 
+	skb->encapsulation = 1;
+	skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
+
 	type = greh->protocol;
 	if (greh->flags & GRE_KEY)
 		grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4e86c59ec7f..55046ecd083 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -309,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 
 	th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
 				  iph->daddr, 0);
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
 
 	return tcp_gro_complete(skb);
 }
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 8517d3cd1ae..01b0ff9a0c2 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
 
 	th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
 				  &iph->daddr, 0);
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
 
 	return tcp_gro_complete(skb);
 }
-- 
cgit v1.2.3-70-g09d2