From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Jul 2008 16:58:50 -0700 Subject: netdev: Fix lockdep warnings in multiqueue configurations. When support for multiple TX queues were added, the netif_tx_lock() routines we converted to iterate over all TX queues and grab each queue's spinlock. This causes heartburn for lockdep and it's not a healthy thing to do with lots of TX queues anyways. So modify this to use a top-level lock and a "frozen" state for the individual TX queues. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 86 ++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 31 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4d056ceab9..ee583f642a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -440,6 +440,7 @@ static inline void napi_synchronize(const struct napi_struct *n) enum netdev_queue_state_t { __QUEUE_STATE_XOFF, + __QUEUE_STATE_FROZEN, }; struct netdev_queue { @@ -636,7 +637,7 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ - + spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ @@ -1099,6 +1100,11 @@ static inline int netif_queue_stopped(const struct net_device *dev) return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } +static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue) +{ + return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state); +} + /** * netif_running - test if up * @dev: network device @@ -1475,6 +1481,26 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq) txq->xmit_lock_owner = smp_processor_id(); } +static inline int __netif_tx_trylock(struct netdev_queue *txq) +{ + int ok = spin_trylock(&txq->_xmit_lock); + if (likely(ok)) + txq->xmit_lock_owner = smp_processor_id(); + return ok; +} + +static inline void __netif_tx_unlock(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock(&txq->_xmit_lock); +} + +static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock_bh(&txq->_xmit_lock); +} + /** * netif_tx_lock - grab network device transmit lock * @dev: network device @@ -1484,12 +1510,23 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq) */ static inline void netif_tx_lock(struct net_device *dev) { - int cpu = smp_processor_id(); unsigned int i; + int cpu; + spin_lock(&dev->tx_global_lock); + cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + /* We are the only thread of execution doing a + * freeze, but we have to grab the _xmit_lock in + * order to synchronize with threads which are in + * the ->hard_start_xmit() handler and already + * checked the frozen bit. + */ __netif_tx_lock(txq, cpu); + set_bit(__QUEUE_STATE_FROZEN, &txq->state); + __netif_tx_unlock(txq); } } @@ -1499,40 +1536,22 @@ static inline void netif_tx_lock_bh(struct net_device *dev) netif_tx_lock(dev); } -static inline int __netif_tx_trylock(struct netdev_queue *txq) -{ - int ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); - return ok; -} - -static inline int netif_tx_trylock(struct net_device *dev) -{ - return __netif_tx_trylock(netdev_get_tx_queue(dev, 0)); -} - -static inline void __netif_tx_unlock(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock(&txq->_xmit_lock); -} - -static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock_bh(&txq->_xmit_lock); -} - static inline void netif_tx_unlock(struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - __netif_tx_unlock(txq); - } + /* No need to grab the _xmit_lock here. If the + * queue is not stopped for another reason, we + * force a schedule. + */ + clear_bit(__QUEUE_STATE_FROZEN, &txq->state); + if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) + __netif_schedule(txq->qdisc); + } + spin_unlock(&dev->tx_global_lock); } static inline void netif_tx_unlock_bh(struct net_device *dev) @@ -1556,13 +1575,18 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) static inline void netif_tx_disable(struct net_device *dev) { unsigned int i; + int cpu; - netif_tx_lock_bh(dev); + local_bh_disable(); + cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + __netif_tx_lock(txq, cpu); netif_tx_stop_queue(txq); + __netif_tx_unlock(txq); } - netif_tx_unlock_bh(dev); + local_bh_enable(); } static inline void netif_addr_lock(struct net_device *dev) -- cgit v1.2.3-70-g09d2 From 378a2f090f7a478704a372a4869b8a9ac206234e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 4 Aug 2008 22:31:03 -0700 Subject: net_sched: Add qdisc __NET_XMIT_STOLEN flag Patrick McHardy noticed: "The other problem that affects all qdiscs supporting actions is TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS even though the packet is not queued, corrupting upper qdiscs' qlen counters." and later explained: "The reason why it translates it at all seems to be to not increase the drops counter. Within a single qdisc this could be avoided by other means easily, upper qdiscs would still increase the counter when we return anything besides NET_XMIT_SUCCESS though. This means we need a new NET_XMIT return value to indicate this to the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN, return that to upper qdiscs and translate it to NET_XMIT_SUCCESS in dev_queue_xmit, similar to NET_XMIT_BYPASS." David Miller noticed: "Maybe these NET_XMIT_* values being passed around should be a set of bits. They could be composed of base meanings, combined with specific attributes. So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT" The attributes get masked out by the top-level ->enqueue() caller, such that the base meanings are the only thing that make their way up into the stack. If it's only about communication within the qdisc tree, let's simply code it that way." This patch is trying to realize these ideas. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/sch_generic.h | 14 +++++++++++++- net/sched/sch_atm.c | 12 +++++++----- net/sched/sch_cbq.c | 23 +++++++++++++++-------- net/sched/sch_dsmark.c | 8 +++++--- net/sched/sch_hfsc.c | 8 +++++--- net/sched/sch_htb.c | 18 +++++++++++------- net/sched/sch_netem.c | 3 ++- net/sched/sch_prio.c | 8 +++++--- net/sched/sch_red.c | 2 +- net/sched/sch_sfq.c | 2 +- net/sched/sch_tbf.c | 3 ++- 12 files changed, 68 insertions(+), 34 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee583f642a9..abbf5d52ec8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -64,6 +64,7 @@ struct wireless_dev; #define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; (TC use only - dev_queue_xmit returns this as NET_XMIT_SUCCESS) */ +#define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c5bb1306505..f15b045a85e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -343,6 +343,18 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb) return qdisc_skb_cb(skb)->pkt_len; } +#ifdef CONFIG_NET_CLS_ACT +/* additional qdisc xmit flags */ +enum net_xmit_qdisc_t { + __NET_XMIT_STOLEN = 0x00010000, +}; + +#define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) + +#else +#define net_xmit_drop_count(e) (1) +#endif + static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { #ifdef CONFIG_NET_SCHED @@ -355,7 +367,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) { qdisc_skb_cb(skb)->pkt_len = skb->len; - return qdisc_enqueue(skb, sch); + return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; } static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 6b517b9dac5..27dd773481b 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -415,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_QUEUED: case TC_ACT_STOLEN: kfree_skb(skb); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: kfree_skb(skb); goto drop; @@ -432,9 +432,11 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, flow->q); if (ret != 0) { drop: __maybe_unused - sch->qstats.drops++; - if (flow) - flow->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + if (flow) + flow->qstats.drops++; + } return ret; } sch->bstats.bytes += qdisc_pkt_len(skb); @@ -530,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) if (!ret) { sch->q.qlen++; sch->qstats.requeues++; - } else { + } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; p->link.qstats.drops++; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 14954bf4a68..765ae565900 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -256,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: @@ -397,9 +397,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - sch->qstats.drops++; - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cbq_mark_toplevel(q, cl); + cl->qstats.drops++; + } return ret; } @@ -430,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_activate_class(cl); return 0; } - sch->qstats.drops++; - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return ret; } @@ -664,13 +668,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) q->rx_class = NULL; if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { + int ret; cbq_mark_toplevel(q, cl); q->rx_class = cl; cl->q->__parent = sch; - if (qdisc_enqueue(skb, cl->q) == 0) { + ret = qdisc_enqueue(skb, cl->q); + if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->bstats.packets++; sch->bstats.bytes += qdisc_pkt_len(skb); @@ -678,7 +684,8 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) cbq_activate_class(cl); return 0; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return 0; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index a935676987e..7170275d9f9 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -236,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_QUEUED: case TC_ACT_STOLEN: kfree_skb(skb); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: goto drop; @@ -254,7 +254,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) err = qdisc_enqueue(skb, p->q); if (err != NET_XMIT_SUCCESS) { - sch->qstats.drops++; + if (net_xmit_drop_count(err)) + sch->qstats.drops++; return err; } @@ -321,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) err = p->q->ops->requeue(skb, p->q); if (err != NET_XMIT_SUCCESS) { - sch->qstats.drops++; + if (net_xmit_drop_count(err)) + sch->qstats.drops++; return err; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 0ae7d19dcba..5cf9ae71611 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1166,7 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -1586,8 +1586,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) err = qdisc_enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { - cl->qstats.drops++; - sch->qstats.drops++; + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } return err; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 75a40951c4f..538d79b489a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -221,7 +221,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -572,9 +572,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) kfree_skb(skb); return ret; #endif - } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; + } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return NET_XMIT_DROP; } else { cl->bstats.packets += @@ -615,10 +617,12 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) kfree_skb(skb); return ret; #endif - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != + } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return NET_XMIT_DROP; } else htb_activate(q, cl); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a5908570067..6cd6f2bc749 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -240,8 +240,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->q.qlen++; sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - } else + } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; + } pr_debug("netem: enqueue ret %d\n", ret); return ret; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index f849243eb09..adb1a52b77d 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -45,7 +45,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -88,7 +88,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->q.qlen++; return NET_XMIT_SUCCESS; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return ret; } @@ -114,7 +115,8 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) sch->qstats.requeues++; return 0; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return NET_XMIT_DROP; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3f2d1d7f3bb..5da05839e22 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,7 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; - } else { + } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; sch->qstats.drops++; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8589da66656..3a456e1b829 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b296672f763..7d3b7ff3bf0 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -135,7 +135,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) ret = qdisc_enqueue(skb, q->qdisc); if (ret != 0) { - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return ret; } -- cgit v1.2.3-70-g09d2 From cc6533e98a7f3cb7fce9d740da49195c7aa523a4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 4 Aug 2008 23:04:08 -0700 Subject: net: Kill plain NET_XMIT_BYPASS. dst_input() was doing something completely absurd, looping on skb->dst->input() if NET_XMIT_BYPASS was seen, but these functions never return such an error. And as a result plain ole' NET_XMIT_BYPASS has no more references and can be completely killed off. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- include/net/dst.h | 12 +----------- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index abbf5d52ec8..488c56e649b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -61,9 +61,6 @@ struct wireless_dev; #define NET_XMIT_DROP 1 /* skb dropped */ #define NET_XMIT_CN 2 /* congestion notification */ #define NET_XMIT_POLICED 3 /* skb is shot by police */ -#define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; - (TC use only - dev_queue_xmit - returns this as NET_XMIT_SUCCESS) */ #define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ /* Backlog congestion levels */ diff --git a/include/net/dst.h b/include/net/dst.h index c5c318a628f..8a8b71e5f3f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -252,17 +252,7 @@ static inline int dst_output(struct sk_buff *skb) /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - int err; - - for (;;) { - err = skb->dst->input(skb); - - if (likely(err == 0)) - return err; - /* Oh, Jamal... Seems, I will not forgive you this mess. :-) */ - if (unlikely(err != NET_XMIT_BYPASS)) - return err; - } + return skb->dst->input(skb); } static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) -- cgit v1.2.3-70-g09d2