diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_api.c | 13 | ||||
-rw-r--r-- | net/sched/cls_api.c | 2 | ||||
-rw-r--r-- | net/sched/sch_api.c | 131 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 10 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 76 | ||||
-rw-r--r-- | net/sched/sch_hfsc.c | 4 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 11 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 4 | ||||
-rw-r--r-- | net/sched/sch_tbf.c | 11 |
9 files changed, 144 insertions, 118 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 26c7e1f9a35..9974b3f04f0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -751,7 +751,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) struct nlattr *tb[TCA_ACT_MAX+1]; struct nlattr *kind; struct tc_action *a = create_a(0); - int err = -EINVAL; + int err = -ENOMEM; if (a == NULL) { printk("tca_action_flush: couldnt create tc_action\n"); @@ -762,7 +762,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) if (!skb) { printk("tca_action_flush: failed skb alloc\n"); kfree(a); - return -ENOBUFS; + return err; } b = skb_tail_pointer(skb); @@ -790,6 +790,8 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); if (err < 0) goto nla_put_failure; + if (err == 0) + goto noflush_out; nla_nest_end(skb, nest); @@ -807,6 +809,7 @@ nla_put_failure: nlmsg_failure: module_put(a->ops->owner); err_out: +noflush_out: kfree_skb(skb); kfree(a); return err; @@ -824,8 +827,10 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) return ret; if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { - if (tb[0] != NULL && tb[1] == NULL) - return tca_action_flush(tb[0], n, pid); + if (tb[1] != NULL) + return tca_action_flush(tb[1], n, pid); + else + return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d2b6f54a626..5cafdd4c801 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -280,7 +280,7 @@ replay: if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { spin_lock_bh(root_lock); *back = tp->next; - spin_lock_bh(root_lock); + spin_unlock_bh(root_lock); tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ba1d121f312..506b709510b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,6 +27,7 @@ #include <linux/kmod.h> #include <linux/list.h> #include <linux/hrtimer.h> +#include <linux/lockdep.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -183,24 +184,68 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ +struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) +{ + struct Qdisc *q; + + if (!(root->flags & TCQ_F_BUILTIN) && + root->handle == handle) + return root; + + list_for_each_entry(q, &root->list, list) { + if (q->handle == handle) + return q; + } + return NULL; +} + +/* + * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() + * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() + */ +static DEFINE_SPINLOCK(qdisc_list_lock); + +static void qdisc_list_add(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + spin_unlock_bh(&qdisc_list_lock); + } +} + +void qdisc_list_del(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_del(&q->list); + spin_unlock_bh(&qdisc_list_lock); + } +} +EXPORT_SYMBOL(qdisc_list_del); + struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { unsigned int i; + struct Qdisc *q; + + spin_lock_bh(&qdisc_list_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *q, *txq_root = txq->qdisc_sleeping; - - if (!(txq_root->flags & TCQ_F_BUILTIN) && - txq_root->handle == handle) - return txq_root; + struct Qdisc *txq_root = txq->qdisc_sleeping; - list_for_each_entry(q, &txq_root->list, list) { - if (q->handle == handle) - return q; - } + q = qdisc_match_from_root(txq_root, handle); + if (q) + goto unlock; } - return NULL; + + q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + +unlock: + spin_unlock_bh(&qdisc_list_lock); + + return q; } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) @@ -416,7 +461,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - __netif_schedule(wd->qdisc); + __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; } @@ -433,6 +478,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { ktime_t time; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state)) + return; + wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_US2NS(expires)); @@ -575,7 +624,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; spinlock_t *root_lock; - root_lock = qdisc_root_lock(oqdisc); + root_lock = qdisc_lock(oqdisc); spin_lock_bh(root_lock); /* Prune old scheduler */ @@ -586,7 +635,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, if (qdisc == NULL) qdisc = &noop_qdisc; dev_queue->qdisc_sleeping = qdisc; - dev_queue->qdisc = &noop_qdisc; + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); spin_unlock_bh(root_lock); @@ -627,11 +676,8 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid if (new || old) qdisc_notify(skb, n, clid, old, new); - if (old) { - spin_lock_bh(&old->q.lock); + if (old) qdisc_destroy(old); - spin_unlock_bh(&old->q.lock); - } } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -697,6 +743,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return err; } +/* lockdep annotation is needed for ingress; egress gets it only for name */ +static struct lock_class_key qdisc_tx_lock; +static struct lock_class_key qdisc_rx_lock; + /* Allocate and initialize new qdisc. @@ -757,6 +807,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); + lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); @@ -764,6 +815,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == 0) goto err_out3; } + lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; @@ -778,9 +830,16 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->stab = stab; } if (tca[TCA_RATE]) { + spinlock_t *root_lock; + + if ((sch->parent != TC_H_ROOT) && + !(sch->flags & TCQ_F_INGRESS)) + root_lock = qdisc_root_sleeping_lock(sch); + else + root_lock = qdisc_lock(sch); + err = gen_new_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_lock(sch), - tca[TCA_RATE]); + root_lock, tca[TCA_RATE]); if (err) { /* * Any broken qdiscs that would require @@ -792,8 +851,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS)) - list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list); + + qdisc_list_add(sch); return sch; } @@ -832,7 +891,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); return 0; } @@ -908,7 +968,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->rx_queue.qdisc; + q = dev->rx_queue.qdisc_sleeping; } } else { struct netdev_queue *dev_queue; @@ -978,7 +1038,7 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->rx_queue.qdisc; + q = dev->rx_queue.qdisc_sleeping; } } else { struct netdev_queue *dev_queue; @@ -1074,20 +1134,13 @@ create_n_graft: } graft: - if (1) { - spinlock_t *root_lock; - - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); - if (err) { - if (q) { - root_lock = qdisc_root_lock(q); - spin_lock_bh(root_lock); - qdisc_destroy(q); - spin_unlock_bh(root_lock); - } - return err; - } + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + if (err) { + if (q) + qdisc_destroy(q); + return err; } + return 0; } @@ -1529,11 +1582,11 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) t = 0; dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) goto done; dev_queue = &dev->rx_queue; - if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) goto done; done: diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4e261ce62f4..9b720adedea 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -521,6 +521,10 @@ static void cbq_ovl_delay(struct cbq_class *cl) struct cbq_sched_data *q = qdisc_priv(cl->qdisc); psched_tdiff_t delay = cl->undertime - q->now; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(cl->qdisc)->state)) + return; + if (!cl->delayed) { psched_time_t sched = q->now; ktime_t expires; @@ -654,7 +658,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - __netif_schedule(sch); + __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } @@ -1835,7 +1839,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1926,7 +1930,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7cf83b37459..9634091ee2f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -518,15 +518,17 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -/* this is the rcu callback function to clean up a qdisc when there - * are no further references to it */ - -static void __qdisc_destroy(struct rcu_head *head) +void qdisc_destroy(struct Qdisc *qdisc) { - struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); const struct Qdisc_ops *ops = qdisc->ops; + if (qdisc->flags & TCQ_F_BUILTIN || + !atomic_dec_and_test(&qdisc->refcnt)) + return; + #ifdef CONFIG_NET_SCHED + qdisc_list_del(qdisc); + qdisc_put_stab(qdisc->stab); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); @@ -542,20 +544,6 @@ static void __qdisc_destroy(struct rcu_head *head) kfree((char *) qdisc - qdisc->padded); } - -/* Under qdisc_lock(qdisc) and BH! */ - -void qdisc_destroy(struct Qdisc *qdisc) -{ - if (qdisc->flags & TCQ_F_BUILTIN || - !atomic_dec_and_test(&qdisc->refcnt)) - return; - - if (qdisc->parent) - list_del(&qdisc->list); - - call_rcu(&qdisc->q_rcu, __qdisc_destroy); -} EXPORT_SYMBOL(qdisc_destroy); static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) @@ -597,6 +585,9 @@ static void transition_one_qdisc(struct net_device *dev, struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; int *need_watchdog_p = _need_watchdog; + if (!(new_qdisc->flags & TCQ_F_BUILTIN)) + clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); + rcu_assign_pointer(dev_queue->qdisc, new_qdisc); if (need_watchdog_p && new_qdisc != &noqueue_qdisc) *need_watchdog_p = 1; @@ -640,14 +631,17 @@ static void dev_deactivate_queue(struct net_device *dev, if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); - dev_queue->qdisc = qdisc_default; + if (!(qdisc->flags & TCQ_F_BUILTIN)) + set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); + + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } } -static bool some_qdisc_is_running(struct net_device *dev, int lock) +static bool some_qdisc_is_busy(struct net_device *dev) { unsigned int i; @@ -658,16 +652,15 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock) int val; dev_queue = netdev_get_tx_queue(dev, i); - q = dev_queue->qdisc; + q = dev_queue->qdisc_sleeping; root_lock = qdisc_lock(q); - if (lock) - spin_lock_bh(root_lock); + spin_lock_bh(root_lock); - val = test_bit(__QDISC_STATE_RUNNING, &q->state); + val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || + test_bit(__QDISC_STATE_SCHED, &q->state)); - if (lock) - spin_unlock_bh(root_lock); + spin_unlock_bh(root_lock); if (val) return true; @@ -677,8 +670,6 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock) void dev_deactivate(struct net_device *dev) { - bool running; - netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); @@ -688,25 +679,8 @@ void dev_deactivate(struct net_device *dev) synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - do { - while (some_qdisc_is_running(dev, 0)) - yield(); - - /* - * Double-check inside queue lock to ensure that all effects - * of the queue run are visible when we return. - */ - running = some_qdisc_is_running(dev, 1); - - /* - * The running flag should never be set at this point because - * we've already set dev->qdisc to noop_qdisc *inside* the same - * pair of spin locks. That is, if any qdisc_run starts after - * our initial test it should see the noop_qdisc and then - * clear the RUNNING bit before dropping the queue lock. So - * if it is set here then we've found a bug. - */ - } while (WARN_ON_ONCE(running)); + while (some_qdisc_is_busy(dev)) + yield(); } static void dev_init_scheduler_queue(struct net_device *dev, @@ -735,14 +709,10 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { - spinlock_t *root_lock = qdisc_lock(qdisc); - - dev_queue->qdisc = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); dev_queue->qdisc_sleeping = qdisc_default; - spin_lock_bh(root_lock); qdisc_destroy(qdisc); - spin_unlock_bh(root_lock); } } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c2b8d9cce3d..c1e77da8cd0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index be35422711a..97d4761cc31 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -577,7 +577,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.drops++; cl->qstats.drops++; } - return NET_XMIT_DROP; + return ret; } else { cl->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; @@ -623,7 +623,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.drops++; cl->qstats.drops++; } - return NET_XMIT_DROP; + return ret; } else htb_activate(q, cl); @@ -1279,7 +1279,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) /* delete from hash and active; remainder in destroy_class */ qdisc_class_hash_remove(&q->clhash, &cl->common); - cl->parent->children--; + if (cl->parent) + cl->parent->children--; if (cl->prio_activity) htb_deactivate(q, cl); @@ -1371,7 +1372,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; cl->children = 0; @@ -1426,7 +1427,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_lock(sch), + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index eac197610ed..a6697c686c7 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -113,11 +113,11 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->qstats.requeues++; - return 0; + return NET_XMIT_SUCCESS; } if (net_xmit_drop_count(ret)) sch->qstats.drops++; - return NET_XMIT_DROP; + return ret; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 7d3b7ff3bf0..94c61598b86 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -123,15 +123,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) { - sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_ACT - if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) -#endif - kfree_skb(skb); - - return NET_XMIT_DROP; - } + if (qdisc_pkt_len(skb) > q->max_size) + return qdisc_reshape_fail(skb, sch); ret = qdisc_enqueue(skb, q->qdisc); if (ret != 0) { |