summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h80
-rw-r--r--include/linux/netpoll.h5
-rw-r--r--net/core/dev.c193
3 files changed, 219 insertions, 59 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bdf5465deb9..58856b6737f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,8 +314,9 @@ struct napi_struct {
spinlock_t poll_lock;
int poll_owner;
struct net_device *dev;
- struct list_head dev_list;
#endif
+ struct list_head dev_list;
+ struct sk_buff *gro_list;
};
enum
@@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi)
*
* Mark NAPI processing as complete.
*/
-static inline void __napi_complete(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- list_del(&n->poll_list);
- smp_mb__before_clear_bit();
- clear_bit(NAPI_STATE_SCHED, &n->state);
-}
-
-static inline void napi_complete(struct napi_struct *n)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __napi_complete(n);
- local_irq_restore(flags);
-}
+extern void __napi_complete(struct napi_struct *n);
+extern void napi_complete(struct napi_struct *n);
/**
* napi_disable - prevent NAPI from scheduling
@@ -640,9 +627,7 @@ struct net_device
unsigned long state;
struct list_head dev_list;
-#ifdef CONFIG_NETPOLL
struct list_head napi_list;
-#endif
/* Net device features */
unsigned long features;
@@ -661,6 +646,7 @@ struct net_device
#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */
/* do not use LLTX in new drivers */
#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
+#define NETIF_F_GRO 16384 /* Generic receive offload */
#define NETIF_F_LRO 32768 /* large receive offload */
/* Segmentation offload features */
@@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev)
* netif_napi_add() must be used to initialize a napi context prior to calling
* *any* of the other napi related functions.
*/
-static inline void netif_napi_add(struct net_device *dev,
- struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int),
- int weight)
-{
- INIT_LIST_HEAD(&napi->poll_list);
- napi->poll = poll;
- napi->weight = weight;
-#ifdef CONFIG_NETPOLL
- napi->dev = dev;
- list_add(&napi->dev_list, &dev->napi_list);
- spin_lock_init(&napi->poll_lock);
- napi->poll_owner = -1;
-#endif
- set_bit(NAPI_STATE_SCHED, &napi->state);
-}
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight);
/**
* netif_napi_del - remove a napi context
@@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev,
*
* netif_napi_del() removes a napi context from the network device napi list
*/
-static inline void netif_napi_del(struct napi_struct *napi)
-{
-#ifdef CONFIG_NETPOLL
- list_del(&napi->dev_list);
-#endif
-}
+void netif_napi_del(struct napi_struct *napi);
+
+struct napi_gro_cb {
+ /* This is non-zero if the packet may be of the same flow. */
+ int same_flow;
+
+ /* This is non-zero if the packet cannot be merged with the new skb. */
+ int flush;
+
+ /* Number of segments aggregated. */
+ int count;
+};
+
+#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
@@ -1024,6 +1004,9 @@ struct packet_type {
struct sk_buff *(*gso_segment)(struct sk_buff *skb,
int features);
int (*gso_send_check)(struct sk_buff *skb);
+ struct sk_buff **(*gro_receive)(struct sk_buff **head,
+ struct sk_buff *skb);
+ int (*gro_complete)(struct sk_buff *skb);
void *af_packet_priv;
struct list_head list;
};
@@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb);
extern int netif_rx_ni(struct sk_buff *skb);
#define HAVE_NETIF_RECEIVE_SKB 1
extern int netif_receive_skb(struct sk_buff *skb);
+extern void napi_gro_flush(struct napi_struct *napi);
+extern int napi_gro_receive(struct napi_struct *napi,
+ struct sk_buff *skb);
extern void netif_nit_deliver(struct sk_buff *skb);
extern int dev_valid_name(const char *name);
extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
@@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev,
static inline void netif_rx_complete(struct net_device *dev,
struct napi_struct *napi)
{
- unsigned long flags;
-
- /*
- * don't let napi dequeue from the cpu poll list
- * just in case its running on a different cpu
- */
- if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
- return;
- local_irq_save(flags);
- __netif_rx_complete(dev, napi);
- local_irq_restore(flags);
+ napi_complete(napi);
}
static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e3d79593fb3..e38d3c9dccd 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have)
rcu_read_unlock();
}
-static inline void netpoll_netdev_init(struct net_device *dev)
-{
- INIT_LIST_HEAD(&dev->napi_list);
-}
-
#else
static inline int netpoll_rx(struct sk_buff *skb)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index e415f0b0d0d..d8d7d1fccde 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,9 @@
#include "net-sysfs.h"
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
@@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg)
}
}
+static int napi_gro_complete(struct sk_buff *skb)
+{
+ struct packet_type *ptype;
+ __be16 type = skb->protocol;
+ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ int err = -ENOENT;
+
+ if (!skb_shinfo(skb)->frag_list)
+ goto out;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, head, list) {
+ if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ continue;
+
+ err = ptype->gro_complete(skb);
+ break;
+ }
+ rcu_read_unlock();
+
+ if (err) {
+ WARN_ON(&ptype->list == head);
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+out:
+ __skb_push(skb, -skb_network_offset(skb));
+ return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+ struct sk_buff *skb, *next;
+
+ for (skb = napi->gro_list; skb; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+ napi_gro_complete(skb);
+ }
+
+ napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ struct sk_buff **pp = NULL;
+ struct packet_type *ptype;
+ __be16 type = skb->protocol;
+ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ int count = 0;
+ int mac_len;
+
+ if (!(skb->dev->features & NETIF_F_GRO))
+ goto normal;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, head, list) {
+ struct sk_buff *p;
+
+ if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ continue;
+
+ skb_reset_network_header(skb);
+ mac_len = skb->network_header - skb->mac_header;
+ skb->mac_len = mac_len;
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 0;
+
+ for (p = napi->gro_list; p; p = p->next) {
+ count++;
+ NAPI_GRO_CB(p)->same_flow =
+ p->mac_len == mac_len &&
+ !memcmp(skb_mac_header(p), skb_mac_header(skb),
+ mac_len);
+ NAPI_GRO_CB(p)->flush = 0;
+ }
+
+ pp = ptype->gro_receive(&napi->gro_list, skb);
+ break;
+ }
+ rcu_read_unlock();
+
+ if (&ptype->list == head)
+ goto normal;
+
+ if (pp) {
+ struct sk_buff *nskb = *pp;
+
+ *pp = nskb->next;
+ nskb->next = NULL;
+ napi_gro_complete(nskb);
+ count--;
+ }
+
+ if (NAPI_GRO_CB(skb)->same_flow)
+ goto ok;
+
+ if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
+ __skb_push(skb, -skb_network_offset(skb));
+ goto normal;
+ }
+
+ NAPI_GRO_CB(skb)->count = 1;
+ skb->next = napi->gro_list;
+ napi->gro_list = skb;
+
+ok:
+ return NET_RX_SUCCESS;
+
+normal:
+ return netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
@@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
local_irq_enable();
- netif_receive_skb(skb);
+ napi_gro_receive(napi, skb);
} while (++work < quota && jiffies == start_time);
+ napi_gro_flush(napi);
+
return work;
}
@@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule);
+void __napi_complete(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ BUG_ON(n->gro_list);
+
+ list_del(&n->poll_list);
+ smp_mb__before_clear_bit();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+ unsigned long flags;
+
+ /*
+ * don't let napi dequeue from the cpu poll list
+ * just in case its running on a different cpu
+ */
+ if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+ return;
+
+ napi_gro_flush(n);
+ local_irq_save(flags);
+ __napi_complete(n);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
+{
+ INIT_LIST_HEAD(&napi->poll_list);
+ napi->gro_list = NULL;
+ napi->poll = poll;
+ napi->weight = weight;
+ list_add(&napi->dev_list, &dev->napi_list);
+#ifdef CONFIG_NETPOLL
+ napi->dev = dev;
+ spin_lock_init(&napi->poll_lock);
+ napi->poll_owner = -1;
+#endif
+ set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+ struct sk_buff *skb, *next;
+
+ list_del(&napi->dev_list);
+
+ for (skb = napi->gro_list; skb; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+ kfree_skb(skb);
+ }
+
+ napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
static void net_rx_action(struct softirq_action *h)
{
@@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
netdev_init_queues(dev);
- netpoll_netdev_init(dev);
+ INIT_LIST_HEAD(&dev->napi_list);
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
*/
void free_netdev(struct net_device *dev)
{
+ struct napi_struct *p, *n;
+
release_net(dev_net(dev));
kfree(dev->_tx);
+ list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+ netif_napi_del(p);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -4949,6 +5137,7 @@ static int __init net_dev_init(void)
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
+ queue->backlog.gro_list = NULL;
}
dev_boot_phase = 0;