summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 17:22:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 17:22:09 -0800
commit9753dfe19a85e7e45a34a56f4cb2048bb4f50e27 (patch)
treec017a1b4a70b8447c71b01d8b320e071546b5c9d /net/core/dev.c
parentedf7c8148ec40c0fd27c0ef3f688defcc65e3913 (diff)
parent9f42f126154786e6e76df513004800c8c633f020 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1958 commits) net: pack skb_shared_info more efficiently net_sched: red: split red_parms into parms and vars net_sched: sfq: extend limits cnic: Improve error recovery on bnx2x devices cnic: Re-init dev->stats_addr after chip reset net_sched: Bug in netem reordering bna: fix sparse warnings/errors bna: make ethtool_ops and strings const xgmac: cleanups net: make ethtool_ops const vmxnet3" make ethtool ops const xen-netback: make ops structs const virtio_net: Pass gfp flags when allocating rx buffers. ixgbe: FCoE: Add support for ndo_get_fcoe_hbainfo() call netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call igb: reset PHY after recovering from PHY power down igb: add basic runtime PM support igb: Add support for byte queue limits. e1000: cleanup CE4100 MDIO registers access e1000: unmap ce4100_gbe_mdio_base_virt in e1000_remove ...
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c307
1 files changed, 115 insertions, 192 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5a13edfc9f7..f494675471a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,10 +133,9 @@
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
-#include <linux/if_tunnel.h>
-#include <linux/if_pppox.h>
-#include <linux/ppp_defs.h>
#include <linux/net_tstamp.h>
+#include <linux/jump_label.h>
+#include <net/flow_keys.h>
#include "net-sysfs.h"
@@ -1320,8 +1319,6 @@ EXPORT_SYMBOL(dev_close);
*/
void dev_disable_lro(struct net_device *dev)
{
- u32 flags;
-
/*
* If we're trying to disable lro on a vlan device
* use the underlying physical device instead
@@ -1329,15 +1326,9 @@ void dev_disable_lro(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
- if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
- flags = dev->ethtool_ops->get_flags(dev);
- else
- flags = ethtool_op_get_flags(dev);
+ dev->wanted_features &= ~NETIF_F_LRO;
+ netdev_update_features(dev);
- if (!(flags & ETH_FLAG_LRO))
- return;
-
- __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
if (unlikely(dev->features & NETIF_F_LRO))
netdev_WARN(dev, "failed to disable LRO!\n");
}
@@ -1450,34 +1441,55 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
-/* When > 0 there are consumers of rx skb time stamps */
-static atomic_t netstamp_needed = ATOMIC_INIT(0);
+static struct jump_label_key netstamp_needed __read_mostly;
+#ifdef HAVE_JUMP_LABEL
+/* We are not allowed to call jump_label_dec() from irq context
+ * If net_disable_timestamp() is called from irq context, defer the
+ * jump_label_dec() calls.
+ */
+static atomic_t netstamp_needed_deferred;
+#endif
void net_enable_timestamp(void)
{
- atomic_inc(&netstamp_needed);
+#ifdef HAVE_JUMP_LABEL
+ int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+
+ if (deferred) {
+ while (--deferred)
+ jump_label_dec(&netstamp_needed);
+ return;
+ }
+#endif
+ WARN_ON(in_interrupt());
+ jump_label_inc(&netstamp_needed);
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
- atomic_dec(&netstamp_needed);
+#ifdef HAVE_JUMP_LABEL
+ if (in_interrupt()) {
+ atomic_inc(&netstamp_needed_deferred);
+ return;
+ }
+#endif
+ jump_label_dec(&netstamp_needed);
}
EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
- if (atomic_read(&netstamp_needed))
+ skb->tstamp.tv64 = 0;
+ if (static_branch(&netstamp_needed))
__net_timestamp(skb);
- else
- skb->tstamp.tv64 = 0;
}
-static inline void net_timestamp_check(struct sk_buff *skb)
-{
- if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
- __net_timestamp(skb);
-}
+#define net_timestamp_check(COND, SKB) \
+ if (static_branch(&netstamp_needed)) { \
+ if ((COND) && !(SKB)->tstamp.tv64) \
+ __net_timestamp(SKB); \
+ } \
static int net_hwtstamp_validate(struct ifreq *ifr)
{
@@ -1924,7 +1936,8 @@ EXPORT_SYMBOL(skb_checksum_help);
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
@@ -1954,9 +1967,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
dev->ethtool_ops->get_drvinfo(dev, &info);
- WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
- info.driver, dev ? dev->features : 0L,
- skb->sk ? skb->sk->sk_route_caps : 0L,
+ WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n",
+ info.driver, dev ? &dev->features : NULL,
+ skb->sk ? &skb->sk->sk_route_caps : NULL,
skb->len, skb->data_len, skb->ip_summed);
if (skb_header_cloned(skb) &&
@@ -2065,7 +2078,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
-static int dev_gso_segment(struct sk_buff *skb, int features)
+static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs;
@@ -2104,7 +2117,7 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
{
return ((features & NETIF_F_GEN_CSUM) ||
((features & NETIF_F_V4_CSUM) &&
@@ -2115,7 +2128,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
protocol == htons(ETH_P_FCOE)));
}
-static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+static netdev_features_t harmonize_features(struct sk_buff *skb,
+ __be16 protocol, netdev_features_t features)
{
if (!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
@@ -2127,10 +2141,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features
return features;
}
-u32 netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- u32 features = skb->dev->features;
+ netdev_features_t features = skb->dev->features;
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2176,7 +2190,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
unsigned int skb_len;
if (likely(!skb->next)) {
- u32 features;
+ netdev_features_t features;
/*
* If device doesn't need skb->dst, release it right now while
@@ -2257,7 +2271,7 @@ gso:
return rc;
}
txq_trans_update(txq);
- if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
+ if (unlikely(netif_xmit_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -2457,6 +2471,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return rc;
}
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+static void skb_update_prio(struct sk_buff *skb)
+{
+ struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+
+ if ((!skb->priority) && (skb->sk) && map)
+ skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+}
+#else
+#define skb_update_prio(skb)
+#endif
+
static DEFINE_PER_CPU(int, xmit_recursion);
#define RECURSION_LIMIT 10
@@ -2497,6 +2523,8 @@ int dev_queue_xmit(struct sk_buff *skb)
*/
rcu_read_lock_bh();
+ skb_update_prio(skb);
+
txq = dev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
@@ -2531,7 +2559,7 @@ int dev_queue_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_tx_queue_stopped(txq)) {
+ if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
__this_cpu_dec(xmit_recursion);
@@ -2592,123 +2620,28 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/
void __skb_get_rxhash(struct sk_buff *skb)
{
- int nhoff, hash = 0, poff;
- const struct ipv6hdr *ip6;
- const struct iphdr *ip;
- const struct vlan_hdr *vlan;
- u8 ip_proto;
- u32 addr1, addr2;
- u16 proto;
- union {
- u32 v32;
- u16 v16[2];
- } ports;
-
- nhoff = skb_network_offset(skb);
- proto = skb->protocol;
-
-again:
- switch (proto) {
- case __constant_htons(ETH_P_IP):
-ip:
- if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
- goto done;
-
- ip = (const struct iphdr *) (skb->data + nhoff);
- if (ip_is_fragment(ip))
- ip_proto = 0;
- else
- ip_proto = ip->protocol;
- addr1 = (__force u32) ip->saddr;
- addr2 = (__force u32) ip->daddr;
- nhoff += ip->ihl * 4;
- break;
- case __constant_htons(ETH_P_IPV6):
-ipv6:
- if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
- goto done;
-
- ip6 = (const struct ipv6hdr *) (skb->data + nhoff);
- ip_proto = ip6->nexthdr;
- addr1 = (__force u32) ip6->saddr.s6_addr32[3];
- addr2 = (__force u32) ip6->daddr.s6_addr32[3];
- nhoff += 40;
- break;
- case __constant_htons(ETH_P_8021Q):
- if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
- goto done;
- vlan = (const struct vlan_hdr *) (skb->data + nhoff);
- proto = vlan->h_vlan_encapsulated_proto;
- nhoff += sizeof(*vlan);
- goto again;
- case __constant_htons(ETH_P_PPP_SES):
- if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
- goto done;
- proto = *((__be16 *) (skb->data + nhoff +
- sizeof(struct pppoe_hdr)));
- nhoff += PPPOE_SES_HLEN;
- switch (proto) {
- case __constant_htons(PPP_IP):
- goto ip;
- case __constant_htons(PPP_IPV6):
- goto ipv6;
- default:
- goto done;
- }
- default:
- goto done;
- }
-
- switch (ip_proto) {
- case IPPROTO_GRE:
- if (pskb_may_pull(skb, nhoff + 16)) {
- u8 *h = skb->data + nhoff;
- __be16 flags = *(__be16 *)h;
+ struct flow_keys keys;
+ u32 hash;
- /*
- * Only look inside GRE if version zero and no
- * routing
- */
- if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
- proto = *(__be16 *)(h + 2);
- nhoff += 4;
- if (flags & GRE_CSUM)
- nhoff += 4;
- if (flags & GRE_KEY)
- nhoff += 4;
- if (flags & GRE_SEQ)
- nhoff += 4;
- goto again;
- }
- }
- break;
- case IPPROTO_IPIP:
- goto again;
- default:
- break;
- }
+ if (!skb_flow_dissect(skb, &keys))
+ return;
- ports.v32 = 0;
- poff = proto_ports_offset(ip_proto);
- if (poff >= 0) {
- nhoff += poff;
- if (pskb_may_pull(skb, nhoff + 4)) {
- ports.v32 = * (__force u32 *) (skb->data + nhoff);
- if (ports.v16[1] < ports.v16[0])
- swap(ports.v16[0], ports.v16[1]);
- skb->l4_rxhash = 1;
- }
+ if (keys.ports) {
+ if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
+ swap(keys.port16[0], keys.port16[1]);
+ skb->l4_rxhash = 1;
}
/* get a consistent hash (same value on both flow directions) */
- if (addr2 < addr1)
- swap(addr1, addr2);
+ if ((__force u32)keys.dst < (__force u32)keys.src)
+ swap(keys.dst, keys.src);
- hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ hash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports, hashrnd);
if (!hash)
hash = 1;
-done:
skb->rxhash = hash;
}
EXPORT_SYMBOL(__skb_get_rxhash);
@@ -2719,6 +2652,8 @@ EXPORT_SYMBOL(__skb_get_rxhash);
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+struct jump_label_key rps_needed __read_mostly;
+
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2998,12 +2933,11 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
#ifdef CONFIG_RPS
- {
+ if (static_branch(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
@@ -3018,14 +2952,13 @@ int netif_rx(struct sk_buff *skb)
rcu_read_unlock();
preempt_enable();
- }
-#else
+ } else
+#endif
{
unsigned int qtail;
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
-#endif
return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -3231,8 +3164,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
int ret = NET_RX_DROP;
__be16 type;
- if (!netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(!netdev_tstamp_prequeue, skb);
trace_netif_receive_skb(skb);
@@ -3363,14 +3295,13 @@ out:
*/
int netif_receive_skb(struct sk_buff *skb)
{
- if (netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
#ifdef CONFIG_RPS
- {
+ if (static_branch(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu, ret;
@@ -3381,16 +3312,12 @@ int netif_receive_skb(struct sk_buff *skb)
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- } else {
- rcu_read_unlock();
- ret = __netif_receive_skb(skb);
+ return ret;
}
-
- return ret;
+ rcu_read_unlock();
}
-#else
- return __netif_receive_skb(skb);
#endif
+ return __netif_receive_skb(skb);
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -4539,7 +4466,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
- unsigned short old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
uid_t uid;
gid_t gid;
@@ -4596,7 +4523,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
*/
int dev_set_promiscuity(struct net_device *dev, int inc)
{
- unsigned short old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
int err;
err = __dev_set_promiscuity(dev, inc);
@@ -4623,7 +4550,7 @@ EXPORT_SYMBOL(dev_set_promiscuity);
int dev_set_allmulti(struct net_device *dev, int inc)
{
- unsigned short old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
ASSERT_RTNL();
@@ -4726,7 +4653,7 @@ EXPORT_SYMBOL(dev_get_flags);
int __dev_change_flags(struct net_device *dev, unsigned int flags)
{
- int old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
int ret;
ASSERT_RTNL();
@@ -4809,10 +4736,10 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
* Change settings on device based state flags. The flags are
* in the userspace exported format.
*/
-int dev_change_flags(struct net_device *dev, unsigned flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags)
{
- int ret, changes;
- int old_flags = dev->flags;
+ int ret;
+ unsigned int changes, old_flags = dev->flags;
ret = __dev_change_flags(dev, flags);
if (ret < 0)
@@ -5369,7 +5296,8 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
-static u32 netdev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netdev_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
/* Fix illegal checksum combinations */
if ((features & NETIF_F_HW_CSUM) &&
@@ -5378,12 +5306,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
}
- if ((features & NETIF_F_NO_CSUM) &&
- (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- netdev_warn(dev, "mixed no checksumming and other settings.\n");
- features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
- }
-
/* Fix illegal SG+CSUM combinations. */
if ((features & NETIF_F_SG) &&
!(features & NETIF_F_ALL_CSUM)) {
@@ -5431,7 +5353,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
int __netdev_update_features(struct net_device *dev)
{
- u32 features;
+ netdev_features_t features;
int err = 0;
ASSERT_RTNL();
@@ -5447,16 +5369,16 @@ int __netdev_update_features(struct net_device *dev)
if (dev->features == features)
return 0;
- netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
- dev->features, features);
+ netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
+ &dev->features, &features);
if (dev->netdev_ops->ndo_set_features)
err = dev->netdev_ops->ndo_set_features(dev, features);
if (unlikely(err < 0)) {
netdev_err(dev,
- "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
- err, features, dev->features);
+ "set_features() failed (%d); wanted %pNF, left %pNF\n",
+ err, &features, &dev->features);
return -1;
}
@@ -5555,6 +5477,9 @@ static void netdev_init_one_queue(struct net_device *dev,
queue->xmit_lock_owner = -1;
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
+#ifdef CONFIG_BQL
+ dql_init(&queue->dql, HZ);
+#endif
}
static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -5640,11 +5565,12 @@ int register_netdevice(struct net_device *dev)
dev->wanted_features = dev->features & dev->hw_features;
/* Turn on no cache copy if HW is doing checksum */
- dev->hw_features |= NETIF_F_NOCACHE_COPY;
- if ((dev->features & NETIF_F_ALL_CSUM) &&
- !(dev->features & NETIF_F_NO_CSUM)) {
- dev->wanted_features |= NETIF_F_NOCACHE_COPY;
- dev->features |= NETIF_F_NOCACHE_COPY;
+ if (!(dev->flags & IFF_LOOPBACK)) {
+ dev->hw_features |= NETIF_F_NOCACHE_COPY;
+ if (dev->features & NETIF_F_ALL_CSUM) {
+ dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+ dev->features |= NETIF_F_NOCACHE_COPY;
+ }
}
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6380,7 +6306,8 @@ static int dev_cpu_callback(struct notifier_block *nfb,
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
*/
-u32 netdev_increment_features(u32 all, u32 one, u32 mask)
+netdev_features_t netdev_increment_features(netdev_features_t all,
+ netdev_features_t one, netdev_features_t mask)
{
if (mask & NETIF_F_GEN_CSUM)
mask |= NETIF_F_ALL_CSUM;
@@ -6389,10 +6316,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
all &= one | ~NETIF_F_ALL_FOR_ALL;
- /* If device needs checksumming, downgrade to it. */
- if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
- all &= ~NETIF_F_NO_CSUM;
-
/* If one device supports hw checksumming, set for all. */
if (all & NETIF_F_GEN_CSUM)
all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);