summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c458
1 files changed, 265 insertions, 193 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index f411c28d0a6..8f9710c62e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
- return &ptype_all;
+ return pt->dev ? &pt->dev->ptype_all : &ptype_all;
else
- return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+ return pt->dev ? &pt->dev->ptype_specific :
+ &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
/**
@@ -1694,6 +1695,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_scrub_packet(skb, true);
skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
return 0;
}
@@ -1733,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+ struct packet_type **pt,
+ struct net_device *dev, __be16 type,
+ struct list_head *ptype_list)
+{
+ struct packet_type *ptype, *pt_prev = *pt;
+
+ list_for_each_entry_rcu(ptype, ptype_list, list) {
+ if (ptype->type != type)
+ continue;
+ if (pt_prev)
+ deliver_skb(skb, pt_prev, dev);
+ pt_prev = ptype;
+ }
+ *pt = pt_prev;
+}
+
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
if (!ptype->af_packet_priv || !skb->sk)
@@ -1756,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype;
struct sk_buff *skb2 = NULL;
struct packet_type *pt_prev = NULL;
+ struct list_head *ptype_list = &ptype_all;
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+ list_for_each_entry_rcu(ptype, ptype_list, list) {
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
- if ((ptype->dev == dev || !ptype->dev) &&
- (!skb_loop_sk(ptype, skb))) {
- if (pt_prev) {
- deliver_skb(skb2, pt_prev, skb->dev);
- pt_prev = ptype;
- continue;
- }
+ if (skb_loop_sk(ptype, skb))
+ continue;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- break;
+ if (pt_prev) {
+ deliver_skb(skb2, pt_prev, skb->dev);
+ pt_prev = ptype;
+ continue;
+ }
- net_timestamp_set(skb2);
+ /* need to clone skb, done only once */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ goto out_unlock;
- /* skb->nh should be correctly
- set by sender, so that the second statement is
- just protection against buggy protocols.
- */
- skb_reset_mac_header(skb2);
-
- if (skb_network_header(skb2) < skb2->data ||
- skb_network_header(skb2) > skb_tail_pointer(skb2)) {
- net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
- ntohs(skb2->protocol),
- dev->name);
- skb_reset_network_header(skb2);
- }
+ net_timestamp_set(skb2);
- skb2->transport_header = skb2->network_header;
- skb2->pkt_type = PACKET_OUTGOING;
- pt_prev = ptype;
+ /* skb->nh should be correctly
+ * set by sender, so that the second statement is
+ * just protection against buggy protocols.
+ */
+ skb_reset_mac_header(skb2);
+
+ if (skb_network_header(skb2) < skb2->data ||
+ skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+ net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+ ntohs(skb2->protocol),
+ dev->name);
+ skb_reset_network_header(skb2);
}
+
+ skb2->transport_header = skb2->network_header;
+ skb2->pkt_type = PACKET_OUTGOING;
+ pt_prev = ptype;
}
+
+ if (ptype_list == &ptype_all) {
+ ptype_list = &dev->ptype_all;
+ goto again;
+ }
+out_unlock:
if (pt_prev)
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
@@ -2351,7 +2379,6 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
- unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
/* Tunnel gso handlers can set protocol to ethernet. */
@@ -2365,35 +2392,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- /* if skb->protocol is 802.1Q/AD then the header should already be
- * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
- * ETH_HLEN otherwise
- */
- if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- if (vlan_depth) {
- if (WARN_ON(vlan_depth < VLAN_HLEN))
- return 0;
- vlan_depth -= VLAN_HLEN;
- } else {
- vlan_depth = ETH_HLEN;
- }
- do {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb,
- vlan_depth + VLAN_HLEN)))
- return 0;
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- } while (type == htons(ETH_P_8021Q) ||
- type == htons(ETH_P_8021AD));
- }
-
- *depth = vlan_depth;
-
- return type;
+ return __vlan_get_protocol(skb, type, depth);
}
/**
@@ -2522,7 +2521,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
-#ifdef CONFIG_NET_MPLS_GSO
+#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
@@ -2562,7 +2561,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
- const struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
u16 gso_segs = skb_shinfo(skb)->gso_segs;
__be16 protocol = skb->protocol;
@@ -2570,11 +2569,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, features);
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
+ if (!skb_vlan_tag_present(skb)) {
+ if (unlikely(protocol == htons(ETH_P_8021Q) ||
+ protocol == htons(ETH_P_8021AD))) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else {
+ goto finalize;
+ }
}
features = netdev_intersect_features(features,
@@ -2591,6 +2600,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
+finalize:
+ if (dev->netdev_ops->ndo_features_check)
+ features &= dev->netdev_ops->ndo_features_check(skb, dev,
+ features);
+
return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);
@@ -2601,7 +2615,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
unsigned int len;
int rc;
- if (!list_empty(&ptype_all))
+ if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
dev_queue_xmit_nit(skb, dev);
len = skb->len;
@@ -2643,7 +2657,7 @@ out:
static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
netdev_features_t features)
{
- if (vlan_tx_tag_present(skb) &&
+ if (skb_vlan_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto))
skb = __vlan_hwaccel_push_inside(skb);
return skb;
@@ -2661,19 +2675,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
-
if (netif_needs_gso(dev, skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
if (IS_ERR(segs)) {
- segs = NULL;
+ goto out_kfree_skb;
} else if (segs) {
consume_skb(skb);
skb = segs;
@@ -3023,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
/* One global table that all flow-based protocols share. */
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+u32 rps_cpu_mask __read_mostly;
+EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
@@ -3079,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
- struct netdev_rx_queue *rxqueue;
- struct rps_map *map;
+ const struct rps_sock_flow_table *sock_flow_table;
+ struct netdev_rx_queue *rxqueue = dev->_rx;
struct rps_dev_flow_table *flow_table;
- struct rps_sock_flow_table *sock_flow_table;
+ struct rps_map *map;
int cpu = -1;
- u16 tcpu;
+ u32 tcpu;
u32 hash;
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
+
if (unlikely(index >= dev->real_num_rx_queues)) {
WARN_ONCE(dev->real_num_rx_queues > 1,
"%s received packet on queue %u, but number "
@@ -3096,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
dev->name, index, dev->real_num_rx_queues);
goto done;
}
- rxqueue = dev->_rx + index;
- } else
- rxqueue = dev->_rx;
+ rxqueue += index;
+ }
+ /* Avoid computing hash if RFS/RPS is not active for this rxqueue */
+
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
map = rcu_dereference(rxqueue->rps_map);
- if (map) {
- if (map->len == 1 &&
- !rcu_access_pointer(rxqueue->rps_flow_table)) {
- tcpu = map->cpus[0];
- if (cpu_online(tcpu))
- cpu = tcpu;
- goto done;
- }
- } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
+ if (!flow_table && !map)
goto done;
- }
skb_reset_network_header(skb);
hash = skb_get_hash(skb);
if (!hash)
goto done;
- flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
- u16 next_cpu;
struct rps_dev_flow *rflow;
+ u32 next_cpu;
+ u32 ident;
+ /* First check into global flow table if there is a match */
+ ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+ if ((ident ^ hash) & ~rps_cpu_mask)
+ goto try_rps;
+
+ next_cpu = ident & rps_cpu_mask;
+
+ /* OK, now we know there is a match,
+ * we can look at the local (per receive queue) flow table
+ */
rflow = &flow_table->flows[hash & flow_table->mask];
tcpu = rflow->cpu;
- next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
-
/*
* If the desired CPU (where last recvmsg was done) is
* different from current CPU (one in the rx-queue flow
@@ -3155,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
}
}
+try_rps:
+
if (map) {
tcpu = map->cpus[reciprocal_scale(hash, map->len)];
if (cpu_online(tcpu)) {
@@ -3606,7 +3619,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct net_device *orig_dev;
- struct net_device *null_or_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
@@ -3649,11 +3661,15 @@ another_round:
goto skip_taps;
list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev) {
- if (pt_prev)
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = ptype;
+ }
+
+ list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = ptype;
}
skip_taps:
@@ -3667,7 +3683,7 @@ ncls:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
- if (vlan_tx_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
@@ -3699,8 +3715,8 @@ ncls:
}
}
- if (unlikely(vlan_tx_tag_present(skb))) {
- if (vlan_tx_tag_get_id(skb))
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ if (skb_vlan_tag_get_id(skb))
skb->pkt_type = PACKET_OTHERHOST;
/* Note: we might in the future use prio bits
* and set skb->priority like in vlan_do_receive()
@@ -3709,19 +3725,21 @@ ncls:
skb->vlan_tci = 0;
}
+ type = skb->protocol;
+
/* deliver only exact match when indicated */
- null_or_dev = deliver_exact ? skb->dev : NULL;
+ if (likely(!deliver_exact)) {
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &ptype_base[ntohs(type) &
+ PTYPE_HASH_MASK]);
+ }
- type = skb->protocol;
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type &&
- (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
- ptype->dev == orig_dev)) {
- if (pt_prev)
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &orig_dev->ptype_specific);
+
+ if (unlikely(skb->dev != orig_dev)) {
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &skb->dev->ptype_specific);
}
if (pt_prev) {
@@ -4006,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
+ NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
@@ -4557,6 +4576,68 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ void *have;
+ int work, weight;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ /* This NAPI_STATE_SCHED test is for avoiding a race
+ * with netpoll's poll_napi(). Only the entity which
+ * obtains the lock and sees NAPI_STATE_SCHED set will
+ * actually make the ->poll() call. Therefore we avoid
+ * accidentally calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ work = n->poll(n, weight);
+ trace_napi_poll(n);
+ }
+
+ WARN_ON_ONCE(work > weight);
+
+ if (likely(work < weight))
+ goto out_unlock;
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(napi_disable_pending(n))) {
+ napi_complete(n);
+ goto out_unlock;
+ }
+
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ napi_gro_flush(n, HZ >= 1000);
+ }
+
+ /* Some drivers may have called napi_schedule
+ * prior to exhausting their budget.
+ */
+ if (unlikely(!list_empty(&n->poll_list))) {
+ pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
+ n->dev ? n->dev->name : "backlog");
+ goto out_unlock;
+ }
+
+ list_add_tail(&n->poll_list, repoll);
+
+out_unlock:
+ netpoll_poll_unlock(have);
+
+ return work;
+}
+
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4564,74 +4645,34 @@ static void net_rx_action(struct softirq_action *h)
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
- void *have;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
- while (!list_empty(&list)) {
+ for (;;) {
struct napi_struct *n;
- int work, weight;
-
- /* If softirq window is exhausted then punt.
- * Allow this to run for 2 jiffies since which will allow
- * an average latency of 1.5/HZ.
- */
- if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
- goto softnet_break;
-
- n = list_first_entry(&list, struct napi_struct, poll_list);
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
-
- weight = n->weight;
-
- /* This NAPI_STATE_SCHED test is for avoiding a race
- * with netpoll's poll_napi(). Only the entity which
- * obtains the lock and sees NAPI_STATE_SCHED set will
- * actually make the ->poll() call. Therefore we avoid
- * accidentally calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- work = n->poll(n, weight);
- trace_napi_poll(n);
+ if (list_empty(&list)) {
+ if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
+ return;
+ break;
}
- WARN_ON_ONCE(work > weight);
-
- budget -= work;
+ n = list_first_entry(&list, struct napi_struct, poll_list);
+ budget -= napi_poll(n, &repoll);
- /* Drivers must not modify the NAPI state if they
- * consume the entire weight. In such cases this code
- * still "owns" the NAPI instance and therefore can
- * move the instance around on the list at-will.
+ /* If softirq window is exhausted then punt.
+ * Allow this to run for 2 jiffies since which will allow
+ * an average latency of 1.5/HZ.
*/
- if (unlikely(work == weight)) {
- if (unlikely(napi_disable_pending(n))) {
- napi_complete(n);
- } else {
- if (n->gro_list) {
- /* flush too old packets
- * If HZ < 1000, flush all packets.
- */
- napi_gro_flush(n, HZ >= 1000);
- }
- list_add_tail(&n->poll_list, &repoll);
- }
+ if (unlikely(budget <= 0 ||
+ time_after_eq(jiffies, time_limit))) {
+ sd->time_squeeze++;
+ break;
}
-
- netpoll_poll_unlock(have);
}
- if (!sd_has_rps_ipi_waiting(sd) &&
- list_empty(&list) &&
- list_empty(&repoll))
- return;
-out:
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -4641,12 +4682,6 @@ out:
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
-
- return;
-
-softnet_break:
- sd->time_squeeze++;
- goto out;
}
struct netdev_adjacent {
@@ -5298,7 +5333,27 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
-void netdev_adjacent_add_links(struct net_device *dev)
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+ struct netdev_bonding_info *bonding_info)
+{
+ struct netdev_notifier_bonding_info info;
+
+ memcpy(&info.bonding_info, bonding_info,
+ sizeof(struct netdev_bonding_info));
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
+static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -5323,7 +5378,7 @@ void netdev_adjacent_add_links(struct net_device *dev)
}
}
-void netdev_adjacent_del_links(struct net_device *dev)
+static void netdev_adjacent_del_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -6147,13 +6202,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
+ size_t sz = count * sizeof(*rx);
BUG_ON(count < 1);
- rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!rx)
- return -ENOMEM;
-
+ rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!rx) {
+ rx = vzalloc(sz);
+ if (!rx)
+ return -ENOMEM;
+ }
dev->_rx = rx;
for (i = 0; i < count; i++)
@@ -6551,6 +6609,8 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
+ BUG_ON(!list_empty(&dev->ptype_all));
+ BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
@@ -6631,7 +6691,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
if (!queue)
return NULL;
netdev_init_one_queue(dev, queue, NULL);
- queue->qdisc = &noop_qdisc;
+ RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
queue->qdisc_sleeping = &noop_qdisc;
rcu_assign_pointer(dev->ingress_queue, queue);
#endif
@@ -6733,6 +6793,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->all_adj_list.upper);
INIT_LIST_HEAD(&dev->all_adj_list.lower);
+ INIT_LIST_HEAD(&dev->ptype_all);
+ INIT_LIST_HEAD(&dev->ptype_specific);
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -6783,7 +6845,7 @@ void free_netdev(struct net_device *dev)
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
+ kvfree(dev->_rx);
#endif
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -7047,10 +7109,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
oldsd->output_queue = NULL;
oldsd->output_queue_tailp = &oldsd->output_queue;
}
- /* Append NAPI poll list from offline CPU. */
- if (!list_empty(&oldsd->poll_list)) {
- list_splice_init(&oldsd->poll_list, &sd->poll_list);
- raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ /* Append NAPI poll list from offline CPU, with one exception :
+ * process_backlog() must be called by cpu owning percpu backlog.
+ * We properly handle process_queue & input_pkt_queue later.
+ */
+ while (!list_empty(&oldsd->poll_list)) {
+ struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
+ struct napi_struct,
+ poll_list);
+
+ list_del_init(&napi->poll_list);
+ if (napi->poll == process_backlog)
+ napi->state = 0;
+ else
+ ____napi_schedule(sd, napi);
}
raise_softirq_irqoff(NET_TX_SOFTIRQ);
@@ -7058,11 +7130,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
- netif_rx_internal(skb);
+ netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
- netif_rx_internal(skb);
+ while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
+ netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}