From c1e756bfcbcac838a86a23f3e4501b556a961e3c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 15:00:44 +0200 Subject: Revert "net: core: introduce netif_skb_dev_features" This reverts commit d206940319c41df4299db75ed56142177bb2e5f6, there are no more callers. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a9..c619b864133 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - const struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { int tmp; if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { + } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - netdev_features_t features = dev->features; + netdev_features_t features = skb->dev->features; - if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } -EXPORT_SYMBOL(netif_skb_dev_features); +EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) -- cgit v1.2.3-70-g09d2 From 2176d5d41891753774f648b67470398a5acab584 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Fri, 9 May 2014 13:16:48 +0800 Subject: neigh: set nud_state to NUD_INCOMPLETE when probing router reachability Since commit 7e98056964("ipv6: router reachability probing"), a router falls into NUD_FAILED will be probed. Now if function rt6_select() selects a router which neighbour state is NUD_FAILED, and at the same time function rt6_probe() changes the neighbour state to NUD_PROBE, then function dst_neigh_output() can directly send packets, but actually the neighbour still is unreachable. If we set nud_state to NUD_INCOMPLETE instead NUD_PROBE, packets will not be sent out until the neihbour is reachable. In addition, because the route should be probes with a single NS, so we must set neigh->probes to neigh_max_probes(), then the neigh timer timeout and function neigh_timer_handler() will not send other NS Messages. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8f8a96ef9f3..32d872eec7f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1248,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh->nud_state = NUD_INCOMPLETE; + atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); } -- cgit v1.2.3-70-g09d2 From 3d4405226d27b3a215e4d03cfa51f536244e5de7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 11 May 2014 22:59:30 +0200 Subject: net: avoid dependency of net_get_random_once on nop patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net_get_random_once depends on the static keys infrastructure to patch up the branch to the slow path during boot. This was realized by abusing the static keys api and defining a new initializer to not enable the call site while still indicating that the branch point should get patched up. This was needed to have the fast path considered likely by gcc. The static key initialization during boot up normally walks through all the registered keys and either patches in ideal nops or enables the jump site but omitted that step on x86 if ideal nops where already placed at static_key branch points. Thus net_get_random_once branches not always became active. This patch switches net_get_random_once to the ordinary static_key api and thus places the kernel fast path in the - by gcc considered - unlikely path. Microbenchmarks on Intel and AMD x86-64 showed that the unlikely path actually beats the likely path in terms of cycle cost and that different nop patterns did not make much difference, thus this switch should not be noticeable. Fixes: a48e42920ff38b ("net: introduce new macro net_get_random_once") Reported-by: Tuomas Räsänen Cc: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 15 ++++----------- net/core/utils.c | 8 ++++---- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'net/core') diff --git a/include/linux/net.h b/include/linux/net.h index 94734a6259a..17d83393afc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -248,24 +248,17 @@ do { \ bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key); -#ifdef HAVE_JUMP_LABEL -#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \ - { .enabled = ATOMIC_INIT(0), .entries = (void *)1 }) -#else /* !HAVE_JUMP_LABEL */ -#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE -#endif /* HAVE_JUMP_LABEL */ - #define net_get_random_once(buf, nbytes) \ ({ \ bool ___ret = false; \ static bool ___done = false; \ - static struct static_key ___done_key = \ - ___NET_RANDOM_STATIC_KEY_INIT; \ - if (!static_key_true(&___done_key)) \ + static struct static_key ___once_key = \ + STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) \ ___ret = __net_get_random_once(buf, \ nbytes, \ &___done, \ - &___done_key); \ + &___once_key); \ ___ret; \ }) diff --git a/net/core/utils.c b/net/core/utils.c index 2f737bf90b3..eed34338736 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w) { struct __net_random_once_work *work = container_of(w, struct __net_random_once_work, work); - if (!static_key_enabled(work->key)) - static_key_slow_inc(work->key); + BUG_ON(!static_key_enabled(work->key)); + static_key_slow_dec(work->key); kfree(work); } @@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key) } bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key) + struct static_key *once_key) { static DEFINE_SPINLOCK(lock); unsigned long flags; @@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, *done = true; spin_unlock_irqrestore(&lock, flags); - __net_random_once_disable_jump(done_key); + __net_random_once_disable_jump(once_key); return true; } -- cgit v1.2.3-70-g09d2 From 200b916f3575bdf11609cb447661b8d5957b0bbf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 12 May 2014 15:11:20 -0700 Subject: rtnetlink: wait for unregistering devices in rtnl_link_unregister() From: Cong Wang commit 50624c934db18ab90 (net: Delay default_device_exit_batch until no devices are unregistering) introduced rtnl_lock_unregistering() for default_device_exit_batch(). Same race could happen we when rmmod a driver which calls rtnl_link_unregister() as we call dev->destructor without rtnl lock. For long term, I think we should clean up the mess of netdev_run_todo() and net namespce exit code. Cc: Eric W. Biederman Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++++ net/core/dev.c | 2 +- net/core/net_namespace.c | 2 +- net/core/rtnetlink.c | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e3e66ac0a5..953937ea523 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -4,6 +4,7 @@ #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -22,6 +23,10 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); + +extern wait_queue_head_t netdev_unregistering_wq; +extern struct mutex net_mutex; + #ifdef CONFIG_PROVE_LOCKING extern int lockdep_rtnl_is_held(void); #else diff --git a/net/core/dev.c b/net/core/dev.c index c619b864133..6da649bde4f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5541,7 +5541,7 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); -static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); +DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 81d3a9a0845..7c8ffd97496 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -24,7 +24,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9837bebf93c..2d8d8fcfa06 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); +/* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace. + */ +static void rtnl_lock_unregistering_all(void) +{ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + for_each_net(net) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + /** * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { - rtnl_lock(); + /* Close the race with cleanup_net() */ + mutex_lock(&net_mutex); + rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); + mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); -- cgit v1.2.3-70-g09d2 From 29e98242783ed3ba569797846a606ba66f781625 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 May 2014 11:34:37 -0700 Subject: net: gro: make sure skb->cb[] initial content has not to be zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from linux-3.13, GRO attempts to build full size skbs. Problem is the commit assumed one particular field in skb->cb[] was clean, but it is not the case on some stacked devices. Timo reported a crash in case traffic is decrypted before reaching a GRE device. Fix this by initializing NAPI_GRO_CB(skb)->last at the right place, this also removes one conditional. Thanks a lot to Timo for providing full reports and bisecting this. Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") Bisected-by: Timo Teras Signed-off-by: Eric Dumazet Tested-by: Timo Teräs Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/core/skbuff.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 6da649bde4f..ed928e84655 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3951,6 +3951,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; + NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b62343f583..8383b2bddeb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,7 +3076,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (unlikely(p->len + len >= 65536)) return -E2BIG; - lp = NAPI_GRO_CB(p)->last ?: p; + lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { @@ -3192,7 +3192,7 @@ merge: __skb_pull(skb, offset); - if (!NAPI_GRO_CB(p)->last) + if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; -- cgit v1.2.3-70-g09d2 From 4085ebe8c31face855fd01ee40372cb4aab1df3a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:53 -0400 Subject: net: Find the nesting level of a given device by type. Multiple devices in the kernel can be stacked/nested and they need to know their nesting level for the purposes of lockdep. This patch provides a generic function that determines a nesting level of a particular device by its type (ex: vlan, macvlan, etc). We only care about nesting of the same type of devices. For example: eth0 <- vlan0.10 <- macvlan0 <- vlan1.20 The nesting level of vlan1.20 would be 1, since there is another vlan in the stack under it. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ net/core/dev.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20e99efb1ca..fb912e8e5c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3077,6 +3077,14 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) +void *netdev_lower_get_next(struct net_device *dev, + struct list_head **iter); +#define netdev_for_each_lower_dev(dev, ldev, iter) \ + for (iter = &(dev)->adj_list.lower, \ + ldev = netdev_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_lower_get_next(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); @@ -3092,6 +3100,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); diff --git a/net/core/dev.c b/net/core/dev.c index ed928e84655..6ee3ac25ed7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4622,6 +4622,32 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); +/** + * netdev_lower_get_next - Get the next device from the lower neighbour + * list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_lower_get_next); + /** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU @@ -5072,6 +5098,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_dev_get_private); + +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)) +{ + struct net_device *lower = NULL; + struct list_head *iter; + int max_nest = -1; + int nest; + + ASSERT_RTNL(); + + netdev_for_each_lower_dev(dev, lower, iter) { + nest = dev_get_nest_level(lower, type_check); + if (max_nest < nest) + max_nest = nest; + } + + if (type_check(dev)) + max_nest++; + + return max_nest; +} +EXPORT_SYMBOL(dev_get_nest_level); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.2.3-70-g09d2 From d38569ab2bba6e6b3233acfc3a84cdbcfbd1f79f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:55 -0400 Subject: vlan: Fix lockdep warning with stacked vlan devices. This reverts commit dc8eaaa006350d24030502a4521542e74b5cb39f. vlan: Fix lockdep warning when vlan dev handle notification Instead we use the new new API to find the lock subclass of our vlan device. This way we can support configurations where vlans are interspersed with other devices: bond -> vlan -> macvlan -> vlan Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 ++- net/8021q/vlan.c | 1 + net/8021q/vlan_dev.c | 52 +++++++++---------------------------------------- net/core/dev.c | 1 - 4 files changed, 12 insertions(+), 45 deletions(-) (limited to 'net/core') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 13bbbde00e6..724bde8477b 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline int is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } @@ -159,6 +159,7 @@ struct vlan_dev_priv { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif + unsigned int nest_level; }; static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 175273f38cb..44ebd5c2cd4 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; + vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 733ec283ed1..019efb79708 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -493,48 +493,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) } } -static int vlan_calculate_locking_subclass(struct net_device *real_dev) -{ - int subclass = 0; - - while (is_vlan_dev(real_dev)) { - subclass++; - real_dev = vlan_dev_priv(real_dev)->real_dev; - } - - return subclass; -} - -static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - -static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); - vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -562,6 +524,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); } +static int vlan_dev_get_lock_subclass(struct net_device *dev) +{ + return vlan_dev_priv(dev)->nest_level; +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .rebuild = vlan_dev_rebuild_header, @@ -597,7 +564,6 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; netif_carrier_off(dev); @@ -646,8 +612,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - subclass = vlan_calculate_locking_subclass(dev); - vlan_dev_set_lockdep_class(dev, subclass); + vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev)); vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan_dev_priv(dev)->vlan_pcpu_stats) @@ -819,6 +784,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, }; void vlan_setup(struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 6ee3ac25ed7..2b872bfbd17 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5287,7 +5287,6 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } -EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { -- cgit v1.2.3-70-g09d2 From 44a4085538c844e79d6ee6bcf46fabf7c57a9a38 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:20:38 -0400 Subject: bonding: Fix stacked device detection in arp monitoring Prior to commit fbd929f2dce460456807a51e18d623db3db9f077 bonding: support QinQ for bond arp interval the arp monitoring code allowed for proper detection of devices stacked on top of vlans. Since the above commit, the code can still detect a device stacked on top of single vlan, but not a device stacked on top of Q-in-Q configuration. The search will only set the inner vlan tag if the route device is the vlan device. However, this is not always the case, as it is possible to extend the stacked configuration. With this patch it is possible to provision devices on top Q-in-Q vlan configuration that should be used as a source of ARP monitoring information. For example: ip link add link bond0 vlan10 type vlan proto 802.1q id 10 ip link add link vlan10 vlan100 type vlan proto 802.1q id 100 ip link add link vlan100 type macvlan Note: This patch limites the number of stacked VLANs to 2, just like before. The original, however had another issue in that if we had more then 2 levels of VLANs, we would end up generating incorrectly tagged traffic. This is no longer possible. Fixes: fbd929f2dce460456807a51e18d623db3db9f077 (bonding: support QinQ for bond arp interval) CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Ding Tianhong CC: Patric McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 134 +++++++++++++++++++--------------------- drivers/net/bonding/bonding.h | 1 + include/linux/if_vlan.h | 6 ++ include/linux/netdevice.h | 9 +++ net/core/dev.c | 26 ++++++++ 5 files changed, 107 insertions(+), 69 deletions(-) (limited to 'net/core') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 69aff72c895..d3a67896d43 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2126,10 +2126,10 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) */ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, - struct bond_vlan_tag *inner, - struct bond_vlan_tag *outer) + struct bond_vlan_tag *tags) { struct sk_buff *skb; + int i; pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", arp_op, slave_dev->name, &dest_ip, &src_ip); @@ -2141,21 +2141,26 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, net_err_ratelimited("ARP packet allocation failed\n"); return; } - if (outer->vlan_id) { - if (inner->vlan_id) { - pr_debug("inner tag: proto %X vid %X\n", - ntohs(inner->vlan_proto), inner->vlan_id); - skb = __vlan_put_tag(skb, inner->vlan_proto, - inner->vlan_id); - if (!skb) { - net_err_ratelimited("failed to insert inner VLAN tag\n"); - return; - } - } - pr_debug("outer reg: proto %X vid %X\n", - ntohs(outer->vlan_proto), outer->vlan_id); - skb = vlan_put_tag(skb, outer->vlan_proto, outer->vlan_id); + /* Go through all the tags backwards and add them to the packet */ + for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) { + if (!tags[i].vlan_id) + continue; + + pr_debug("inner tag: proto %X vid %X\n", + ntohs(tags[i].vlan_proto), tags[i].vlan_id); + skb = __vlan_put_tag(skb, tags[i].vlan_proto, + tags[i].vlan_id); + if (!skb) { + net_err_ratelimited("failed to insert inner VLAN tag\n"); + return; + } + } + /* Set the outer tag */ + if (tags[0].vlan_id) { + pr_debug("outer tag: proto %X vid %X\n", + ntohs(tags[0].vlan_proto), tags[0].vlan_id); + skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id); if (!skb) { net_err_ratelimited("failed to insert outer VLAN tag\n"); return; @@ -2164,22 +2169,52 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, arp_xmit(skb); } +/* Validate the device path between the @start_dev and the @end_dev. + * The path is valid if the @end_dev is reachable through device + * stacking. + * When the path is validated, collect any vlan information in the + * path. + */ +static bool bond_verify_device_path(struct net_device *start_dev, + struct net_device *end_dev, + struct bond_vlan_tag *tags) +{ + struct net_device *upper; + struct list_head *iter; + int idx; + + if (start_dev == end_dev) + return true; + + netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { + if (bond_verify_device_path(upper, end_dev, tags)) { + if (is_vlan_dev(upper)) { + idx = vlan_get_encap_level(upper); + if (idx >= BOND_MAX_VLAN_ENCAP) + return false; + + tags[idx].vlan_proto = + vlan_dev_vlan_proto(upper); + tags[idx].vlan_id = vlan_dev_vlan_id(upper); + } + return true; + } + } + + return false; +} static void bond_arp_send_all(struct bonding *bond, struct slave *slave) { - struct net_device *upper, *vlan_upper; - struct list_head *iter, *vlan_iter; struct rtable *rt; - struct bond_vlan_tag inner, outer; + struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; __be32 *targets = bond->params.arp_targets, addr; int i; + bool ret; for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { pr_debug("basa: target %pI4\n", &targets[i]); - inner.vlan_proto = 0; - inner.vlan_id = 0; - outer.vlan_proto = 0; - outer.vlan_id = 0; + memset(tags, 0, sizeof(tags)); /* Find out through which dev should the packet go */ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, @@ -2192,7 +2227,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", bond->dev->name, &targets[i]); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 0, &inner, &outer); + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], + 0, tags); continue; } @@ -2201,52 +2237,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) goto found; rcu_read_lock(); - /* first we search only for vlan devices. for every vlan - * found we verify its upper dev list, searching for the - * rt->dst.dev. If found we save the tag of the vlan and - * proceed to send the packet. - */ - netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper, - vlan_iter) { - if (!is_vlan_dev(vlan_upper)) - continue; - - if (vlan_upper == rt->dst.dev) { - outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper); - outer.vlan_id = vlan_dev_vlan_id(vlan_upper); - rcu_read_unlock(); - goto found; - } - netdev_for_each_all_upper_dev_rcu(vlan_upper, upper, - iter) { - if (upper == rt->dst.dev) { - /* If the upper dev is a vlan dev too, - * set the vlan tag to inner tag. - */ - if (is_vlan_dev(upper)) { - inner.vlan_proto = vlan_dev_vlan_proto(upper); - inner.vlan_id = vlan_dev_vlan_id(upper); - } - outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper); - outer.vlan_id = vlan_dev_vlan_id(vlan_upper); - rcu_read_unlock(); - goto found; - } - } - } - - /* if the device we're looking for is not on top of any of - * our upper vlans, then just search for any dev that - * matches, and in case it's a vlan - save the id - */ - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (upper == rt->dst.dev) { - rcu_read_unlock(); - goto found; - } - } + ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags); rcu_read_unlock(); + if (ret) + goto found; + /* Not our device - skip */ pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", bond->dev->name, &targets[i], @@ -2259,7 +2255,7 @@ found: addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); ip_rt_put(rt); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, &inner, &outer); + addr, tags); } } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index b8bdd0acc8f..00bea320e3b 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -36,6 +36,7 @@ #define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" +#define BOND_MAX_VLAN_ENCAP 2 #define BOND_MAX_ARP_TARGETS 16 #define BOND_DEFAULT_MIIMON 100 diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 724bde8477b..c901b13b6f0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -484,4 +484,10 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ skb->protocol = htons(ETH_P_802_2); } + +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG_ON(!is_vlan_dev(dev)); + return vlan_dev_priv(dev)->nest_level; +} #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d4b1f1b6b7..b42d07b0390 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3056,9 +3056,18 @@ extern int weight_p; extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); +/* iterate through upper list, must be called under RCU read lock */ +#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ + for (iter = &(dev)->adj_list.upper, \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ + updev; \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) + /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->all_adj_list.upper, \ diff --git a/net/core/dev.c b/net/core/dev.c index 2b872bfbd17..9abc503b19b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4541,6 +4541,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list) } EXPORT_SYMBOL(netdev_adjacent_get_private); +/** + * netdev_upper_get_next_dev_rcu - Get the next dev from upper list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next device from the dev's upper list, starting from iter + * position. The caller must hold RCU read lock. + */ +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + + upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} +EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); + /** * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device -- cgit v1.2.3-70-g09d2