From 8123b421e8ed944671d7241323ed3198cccb4041 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 8 Aug 2008 23:23:39 -0700 Subject: pkt_sched: Fix ingress deletion and filter attachment. Based upon bug reports by Stephen Hemminger. We still had some cases using ->qdisc instead of ->qdisc_sleeping. Also, qdisc_lookup() should return ingress qdiscs. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ba1d121f312..bbf149dd781 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -183,6 +183,21 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ +struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) +{ + struct Qdisc *q; + + if (!(root->flags & TCQ_F_BUILTIN) && + root->handle == handle) + return root; + + list_for_each_entry(q, &root->list, list) { + if (q->handle == handle) + return q; + } + return NULL; +} + struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { unsigned int i; @@ -191,16 +206,11 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct Qdisc *q, *txq_root = txq->qdisc_sleeping; - if (!(txq_root->flags & TCQ_F_BUILTIN) && - txq_root->handle == handle) - return txq_root; - - list_for_each_entry(q, &txq_root->list, list) { - if (q->handle == handle) - return q; - } + q = qdisc_match_from_root(txq_root, handle); + if (q) + return q; } - return NULL; + return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) @@ -908,7 +918,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->rx_queue.qdisc; + q = dev->rx_queue.qdisc_sleeping; } } else { struct netdev_queue *dev_queue; @@ -978,7 +988,7 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->rx_queue.qdisc; + q = dev->rx_queue.qdisc_sleeping; } } else { struct netdev_queue *dev_queue; @@ -1529,11 +1539,11 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) t = 0; dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) goto done; dev_queue = &dev->rx_queue; - if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) goto done; done: -- cgit v1.2.3-70-g09d2 From 1cfa26661a85549063e369e2b40275eeaa7b923c Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 11 Aug 2008 18:11:06 -0700 Subject: pkt_sched: Add BH protection for qdisc_stab_lock. Since qdisc_stab_lock is used in qdisc_put_stab(), which is called in BH context from __qdisc_destroy() RCU callback, softirq safe locking is needed. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bbf149dd781..c25465e5607 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -331,7 +331,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (!s || tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock(&qdisc_stab_lock); + spin_lock_bh(&qdisc_stab_lock); list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) @@ -339,11 +339,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); return stab; } - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) @@ -354,9 +354,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock(&qdisc_stab_lock); + spin_lock_bh(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); return stab; } @@ -366,14 +366,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock(&qdisc_stab_lock); + spin_lock_bh(&qdisc_stab_lock); if (--tab->refcnt == 0) { list_del(&tab->list); kfree(tab); } - spin_unlock(&qdisc_stab_lock); + spin_unlock_bh(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); -- cgit v1.2.3-70-g09d2 From 3a76e3716b4e571f5d91a20b6afb412560599083 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 17 Aug 2008 22:02:11 -0700 Subject: pkt_sched: Grab correct lock in notify_and_destroy(). From: Jarek Poplawski When we are destroying non-root qdiscs, we need to lock the root of the qdisc tree not the the qdisc itself. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c25465e5607..c8dc72e1210 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -638,9 +638,9 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid qdisc_notify(skb, n, clid, old, new); if (old) { - spin_lock_bh(&old->q.lock); + sch_tree_lock(old); qdisc_destroy(old); - spin_unlock_bh(&old->q.lock); + sch_tree_unlock(old); } } -- cgit v1.2.3-70-g09d2 From 8608db031b4d2932d645709e2cfe8fbcd91a7305 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 20:51:18 -0700 Subject: pkt_sched: Never schedule non-root qdiscs. Based upon initial discovery and patch by Jarek Poplawski. The qdisc watchdogs can be attached to any qdisc, not just the root, so make sure we schedule the correct one. CBQ has a similar bug. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- net/sched/sch_cbq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c8dc72e1210..98c00847a3d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -426,7 +426,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - __netif_schedule(wd->qdisc); + __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4e261ce62f4..47ef492c4ff 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -654,7 +654,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - __netif_schedule(sch); + __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } -- cgit v1.2.3-70-g09d2 From 25bfcd5a78a377ea4c54a3c21e44590e2fc478a6 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 18 Aug 2008 20:53:34 -0700 Subject: pkt_sched: Add lockdep annotation for qdisc locks Qdisc locks are initialized in the same function, qdisc_alloc(), so lockdep can't distinguish tx qdisc lock from rx and reports "possible recursive locking detected" when both these locks are taken eg. while using act_mirred with ifb. This looks like a false positive. Anyway, after this patch these locks will be reported more exactly. Reported-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 98c00847a3d..7d7070b1eeb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -707,6 +708,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return err; } +/* lockdep annotation is needed for ingress; egress gets it only for name */ +static struct lock_class_key qdisc_tx_lock; +static struct lock_class_key qdisc_rx_lock; + /* Allocate and initialize new qdisc. @@ -767,6 +772,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); + lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); @@ -774,6 +780,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == 0) goto err_out3; } + lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; -- cgit v1.2.3-70-g09d2 From 4d8863a29c4755a0461cd31b6865026187d6c43a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 21:03:15 -0700 Subject: pkt_sched: Don't hold qdisc lock over qdisc_destroy(). Based upon reports by Denys Fedoryshchenko, and feedback and help from Jarek Poplawski and Herbert Xu. We always either: 1) Never made an external reference to this qdisc. or 2) Did a dev_deactivate() which purged all asynchronous references. So do not lock the qdisc when we call qdisc_destroy(), it's illegal anyways as when we drop the lock this is free'd memory. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 13 ++----------- net/sched/sch_generic.c | 6 ------ 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7d7070b1eeb..d91a2338877 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -638,11 +638,8 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid if (new || old) qdisc_notify(skb, n, clid, old, new); - if (old) { - sch_tree_lock(old); + if (old) qdisc_destroy(old); - sch_tree_unlock(old); - } } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -1092,16 +1089,10 @@ create_n_graft: graft: if (1) { - spinlock_t *root_lock; - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { - if (q) { - root_lock = qdisc_root_lock(q); - spin_lock_bh(root_lock); + if (q) qdisc_destroy(q); - spin_unlock_bh(root_lock); - } return err; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6f96b7bc080..c3ed4d44fc1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -518,8 +518,6 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -/* Under qdisc_lock(qdisc) and BH! */ - void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -712,14 +710,10 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { - spinlock_t *root_lock = qdisc_lock(qdisc); - dev_queue->qdisc = qdisc_default; dev_queue->qdisc_sleeping = qdisc_default; - spin_lock_bh(root_lock); qdisc_destroy(qdisc); - spin_unlock_bh(root_lock); } } -- cgit v1.2.3-70-g09d2 From e5befbd9525d92bb074b70192eb2c69aae65fc60 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 18 Aug 2008 22:30:01 -0700 Subject: pkt_sched: remove bogus block (cleanup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...Last block local var got just deleted. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/sched/sch_api.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d91a2338877..9372ec41ce8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1088,14 +1088,13 @@ create_n_graft: } graft: - if (1) { - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); - if (err) { - if (q) - qdisc_destroy(q); - return err; - } + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + if (err) { + if (q) + qdisc_destroy(q); + return err; } + return 0; } -- cgit v1.2.3-70-g09d2 From f3b9605d744df537dee10fd06630f35a62b343ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 22:33:05 -0700 Subject: Revert "pkt_sched: Add BH protection for qdisc_stab_lock." This reverts commit 1cfa26661a85549063e369e2b40275eeaa7b923c. qdisc_destroy() runs fully under RTNL again and not from softint any longer, so this change is no longer needed. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9372ec41ce8..ef0efeca635 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -332,7 +332,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (!s || tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) @@ -340,11 +340,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); return stab; } - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) @@ -355,9 +355,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); return stab; } @@ -367,14 +367,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); if (--tab->refcnt == 0) { list_del(&tab->list); kfree(tab); } - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); -- cgit v1.2.3-70-g09d2 From 2540e0511ea17e25831be543cdf9381e6209950d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 21 Aug 2008 05:11:14 -0700 Subject: pkt_sched: Fix qdisc_watchdog() vs. dev_deactivate() race dev_deactivate() can skip rescheduling of a qdisc by qdisc_watchdog() or other timer calling netif_schedule() after dev_queue_deactivate(). We prevent this checking aliveness before scheduling the timer. Since during deactivation the root qdisc is available only as qdisc_sleeping additional accessor qdisc_root_sleeping() is created. With feedback from Herbert Xu Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 +++++ net/sched/sch_api.c | 4 ++++ net/sched/sch_cbq.c | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 84d25f2e618..b1d2cfea89c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -193,6 +193,11 @@ static inline struct Qdisc *qdisc_root(struct Qdisc *qdisc) return qdisc->dev_queue->qdisc; } +static inline struct Qdisc *qdisc_root_sleeping(struct Qdisc *qdisc) +{ + return qdisc->dev_queue->qdisc_sleeping; +} + /* The qdisc root lock is a mechanism by which to top level * of a qdisc tree can be locked from any qdisc node in the * forest. This allows changing the configuration of some diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ef0efeca635..45f442d7de4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -444,6 +444,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { ktime_t time; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state)) + return; + wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_US2NS(expires)); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 47ef492c4ff..8fa90d68ec6 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -521,6 +521,10 @@ static void cbq_ovl_delay(struct cbq_class *cl) struct cbq_sched_data *q = qdisc_priv(cl->qdisc); psched_tdiff_t delay = cl->undertime - q->now; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(cl->qdisc)->state)) + return; + if (!cl->delayed) { psched_time_t sched = q->now; ktime_t expires; -- cgit v1.2.3-70-g09d2 From f6e0b239a2657ea8cb67f0d83d0bfdbfd19a481b Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 22 Aug 2008 03:24:05 -0700 Subject: pkt_sched: Fix qdisc list locking Since some qdiscs call qdisc_tree_decrease_qlen() (so qdisc_lookup()) without rtnl_lock(), adding and deleting from a qdisc list needs additional locking. This patch adds global spinlock qdisc_list_lock and wrapper functions for modifying the list. It is considered as a temporary solution until hfsc_dequeue(), netem_dequeue() and tbf_dequeue() (or qdisc_tree_decrease_qlen()) are redone. With feedback from Herbert Xu and David S. Miller. Signed-off-by: Jarek Poplawski Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + net/sched/sch_api.c | 44 +++++++++++++++++++++++++++++++++++++++----- net/sched/sch_generic.c | 5 ++--- 3 files changed, 42 insertions(+), 8 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 853fe83d9f3..b786a5b0925 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -78,6 +78,7 @@ extern struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, extern int register_qdisc(struct Qdisc_ops *qops); extern int unregister_qdisc(struct Qdisc_ops *qops); +extern void qdisc_list_del(struct Qdisc *q); extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 45f442d7de4..e7fb9e0d21b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -199,19 +199,53 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } +/* + * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() + * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() + */ +static DEFINE_SPINLOCK(qdisc_list_lock); + +static void qdisc_list_add(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + spin_unlock_bh(&qdisc_list_lock); + } +} + +void qdisc_list_del(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_del(&q->list); + spin_unlock_bh(&qdisc_list_lock); + } +} +EXPORT_SYMBOL(qdisc_list_del); + struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { unsigned int i; + struct Qdisc *q; + + spin_lock_bh(&qdisc_list_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *q, *txq_root = txq->qdisc_sleeping; + struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - return q; + goto unlock; } - return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + + q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + +unlock: + spin_unlock_bh(&qdisc_list_lock); + + return q; } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) @@ -810,8 +844,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS)) - list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list); + + qdisc_list_add(sch); return sch; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index c3ed4d44fc1..5f0ade7806a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -526,10 +526,9 @@ void qdisc_destroy(struct Qdisc *qdisc) !atomic_dec_and_test(&qdisc->refcnt)) return; - if (qdisc->parent) - list_del(&qdisc->list); - #ifdef CONFIG_NET_SCHED + qdisc_list_del(qdisc); + qdisc_put_stab(qdisc->stab); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -- cgit v1.2.3-70-g09d2 From 666d9bbedfff7c2c37eab92e715641922dee6864 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 27 Aug 2008 02:12:52 -0700 Subject: pkt_sched: Fix dev_graft_qdisc() locking During dev_graft_qdisc() dev is deactivated, so qdisc_root_lock() returns wrong lock of noop_qdisc instead of qdisc_sleeping. Signed-off-by: Jarek Poplawski Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e7fb9e0d21b..341d558b6e3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -624,7 +624,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; spinlock_t *root_lock; - root_lock = qdisc_root_lock(oqdisc); + root_lock = qdisc_lock(oqdisc); spin_lock_bh(root_lock); /* Prune old scheduler */ -- cgit v1.2.3-70-g09d2 From f7a54c13c7b072d9426bd5cec1cdb8306df5ef55 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 27 Aug 2008 02:22:07 -0700 Subject: pkt_sched: Use rcu_assign_pointer() to change dev_queue->qdisc These pointers are RCU protected, so proper primitives should be used. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- net/sched/sch_generic.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 341d558b6e3..ad9cda1b8c0 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -635,7 +635,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, if (qdisc == NULL) qdisc = &noop_qdisc; dev_queue->qdisc_sleeping = qdisc; - dev_queue->qdisc = &noop_qdisc; + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); spin_unlock_bh(root_lock); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5f0ade7806a..9634091ee2f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -634,7 +634,7 @@ static void dev_deactivate_queue(struct net_device *dev, if (!(qdisc->flags & TCQ_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - dev_queue->qdisc = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); @@ -709,7 +709,7 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { - dev_queue->qdisc = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); dev_queue->qdisc_sleeping = qdisc_default; qdisc_destroy(qdisc); -- cgit v1.2.3-70-g09d2 From f6f9b93f1624206c802ac9162c9302edaf59bfd9 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 27 Aug 2008 02:25:17 -0700 Subject: pkt_sched: Fix gen_estimator locks While passing a qdisc root lock to gen_new_estimator() and gen_replace_estimator() dev could be deactivated or even before grafting proper root qdisc as qdisc_sleeping (e.g. qdisc_create), so using qdisc_root_lock() is not enough. This patch adds qdisc_root_sleeping_lock() for this, plus additional checks, where necessary. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 8 ++++++++ net/sched/sch_api.c | 14 +++++++++++--- net/sched/sch_cbq.c | 4 ++-- net/sched/sch_hfsc.c | 4 ++-- net/sched/sch_htb.c | 4 ++-- 5 files changed, 25 insertions(+), 9 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b1d2cfea89c..ef8a7e2e12e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,14 @@ static inline spinlock_t *qdisc_root_lock(struct Qdisc *qdisc) return qdisc_lock(root); } +static inline spinlock_t *qdisc_root_sleeping_lock(struct Qdisc *qdisc) +{ + struct Qdisc *root = qdisc_root_sleeping(qdisc); + + ASSERT_RTNL(); + return qdisc_lock(root); +} + static inline struct net_device *qdisc_dev(struct Qdisc *qdisc) { return qdisc->dev_queue->dev; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ad9cda1b8c0..506b709510b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -830,9 +830,16 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->stab = stab; } if (tca[TCA_RATE]) { + spinlock_t *root_lock; + + if ((sch->parent != TC_H_ROOT) && + !(sch->flags & TCQ_F_INGRESS)) + root_lock = qdisc_root_sleeping_lock(sch); + else + root_lock = qdisc_lock(sch); + err = gen_new_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_lock(sch), - tca[TCA_RATE]); + root_lock, tca[TCA_RATE]); if (err) { /* * Any broken qdiscs that would require @@ -884,7 +891,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); return 0; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 8fa90d68ec6..9b720adedea 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1839,7 +1839,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1930,7 +1930,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c2b8d9cce3d..c1e77da8cd0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0df0df202ed..97d4761cc31 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1372,7 +1372,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; cl->children = 0; @@ -1427,7 +1427,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } -- cgit v1.2.3-70-g09d2 From 102396ae65108b026e4e1868e30fa013f45a169e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 29 Aug 2008 14:21:52 -0700 Subject: pkt_sched: Fix locking of qdisc_root with qdisc_root_sleeping_lock() Use qdisc_root_sleeping_lock() instead of qdisc_root_lock() where appropriate. The only difference is while dev is deactivated, when currently we can use a sleeping qdisc with the lock of noop_qdisc. This shouldn't be dangerous since after deactivation root lock could be used only by gen_estimator code, but looks wrong anyway. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- net/sched/cls_route.c | 2 +- net/sched/sch_api.c | 8 ++++---- net/sched/sch_cbq.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_netem.c | 2 +- net/sched/sch_teql.c | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5cafdd4c801..8eb79e92e94 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -205,7 +205,7 @@ replay: } } - root_lock = qdisc_root_lock(q); + root_lock = qdisc_root_sleeping_lock(q); if (tp == NULL) { /* Proto-tcf does not exist, create new one */ diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 481260a4f10..e3d8455eebc 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -75,7 +75,7 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif) static inline void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) { - spinlock_t *root_lock = qdisc_root_lock(q); + spinlock_t *root_lock = qdisc_root_sleeping_lock(q); spin_lock_bh(root_lock); memset(head->fastmap, 0, sizeof(head->fastmap)); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 506b709510b..1122c952aa9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1169,8 +1169,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) goto nla_put_failure; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, qdisc_root_lock(q), &d) < 0) + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1461,8 +1461,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) goto nla_put_failure; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, qdisc_root_lock(q), &d) < 0) + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9b720adedea..8b06fa90048 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1754,7 +1754,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT - spinlock_t *root_lock = qdisc_root_lock(sch); + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(root_lock); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 97d4761cc31..d14f02056ae 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1043,7 +1043,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { - spinlock_t *root_lock = qdisc_root_lock(sch); + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; @@ -1075,7 +1075,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; - spinlock_t *root_lock = qdisc_root_lock(sch); + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index fb0294d0b55..3781e55046d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -341,7 +341,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - root_lock = qdisc_root_lock(sch); + root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); d = xchg(&q->delay_dist, d); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 2c35c678563..d35ef059abb 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -161,7 +161,7 @@ teql_destroy(struct Qdisc* sch) txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_lock(txq->qdisc); + root_lock = qdisc_root_sleeping_lock(txq->qdisc); spin_lock_bh(root_lock); qdisc_reset(txq->qdisc); spin_unlock_bh(root_lock); -- cgit v1.2.3-70-g09d2