From c716a81ab946c68a8d84022ee32eb14674e72650 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 10 Jun 2007 17:31:24 -0700 Subject: [NET_SCHED]: Cleanup readability of qdisc restart Over the years this code has gotten hairier. Resulting in many long discussions over long summer days and patches that get it wrong. This patch helps tame that code so normal people will understand it. Thanks to Thomas Graf, Peter J. waskiewicz Jr, and Patrick McHardy for their valuable reviews. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 199 ++++++++++++++++++++++++++---------------------- 1 file changed, 109 insertions(+), 90 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f4d34480a09..9461e8ae052 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -34,6 +34,9 @@ #include #include +#define SCHED_TX_DROP -2 +#define SCHED_TX_QUEUE -3 + /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -59,7 +62,74 @@ void qdisc_unlock_tree(struct net_device *dev) spin_unlock_bh(&dev->queue_lock); } +static inline int qdisc_qlen(struct Qdisc *q) +{ + BUG_ON((int) q->q.qlen < 0); + return q->q.qlen; +} + +static inline int handle_dev_cpu_collision(struct net_device *dev) +{ + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + if (net_ratelimit()) + printk(KERN_WARNING + "Dead loop on netdevice %s, fix it urgently!\n", + dev->name); + return SCHED_TX_DROP; + } + __get_cpu_var(netdev_rx_stat).cpu_collision++; + return SCHED_TX_QUEUE; +} + +static inline int +do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +{ + + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); + /* XXX: Could netif_schedule fail? Or is the fact we are + * requeueing imply the hardware path is closed + * and even if we fail, some interupt will wake us + */ + netif_schedule(dev); + return 0; +} + +static inline struct sk_buff * +try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) +{ + struct sk_buff *skb = dev->gso_skb; + + if (skb) + dev->gso_skb = NULL; + else + skb = q->dequeue(q); + + return skb; +} + +static inline int +tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +{ + int ret = handle_dev_cpu_collision(dev); + + if (ret == SCHED_TX_DROP) { + kfree_skb(skb); + return qdisc_qlen(q); + } + + return do_dev_requeue(skb, dev, q); +} + + /* + NOTE: Called under dev->queue_lock with locally disabled BH. + + __LINK_STATE_QDISC_RUNNING guarantees only one CPU + can enter this region at a time. + dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. @@ -67,116 +137,65 @@ void qdisc_unlock_tree(struct net_device *dev) dev->queue_lock and netif_tx_lock are mutually exclusive, if one is grabbed, another must be free. - */ + Multiple CPUs may contend for the two locks. -/* Kick device. + Note, that this procedure can be called by a watchdog timer + Returns to the caller: Returns: 0 - queue is empty or throttled. >0 - queue is not empty. - NOTE: Called under dev->queue_lock with locally disabled BH. */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; + unsigned lockless = (dev->features & NETIF_F_LLTX); struct sk_buff *skb; + int ret; - /* Dequeue packet */ - if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { - unsigned nolock = (dev->features & NETIF_F_LLTX); - - dev->gso_skb = NULL; + skb = try_get_tx_pkt(dev, q); + if (skb == NULL) + return 0; - /* - * When the driver has LLTX set it does its own locking - * in start_xmit. No need to add additional overhead by - * locking again. These checks are worth it because - * even uncongested locks can be quite expensive. - * The driver can do trylock like here too, in case - * of lock congestion it should return -1 and the packet - * will be requeued. - */ - if (!nolock) { - if (!netif_tx_trylock(dev)) { - collision: - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. - */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - goto out; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - goto requeue; - } - } + /* we have a packet to send */ + if (!lockless) { + if (!netif_tx_trylock(dev)) + return tx_islocked(skb, dev, q); + } + /* all clear .. */ + spin_unlock(&dev->queue_lock); - { - /* And release queue */ - spin_unlock(&dev->queue_lock); - - if (!netif_queue_stopped(dev)) { - int ret; - - ret = dev_hard_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) { - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto out; - } - if (ret == NETDEV_TX_LOCKED && nolock) { - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto collision; - } - } + ret = NETDEV_TX_BUSY; + if (!netif_queue_stopped(dev)) + /* churn baby churn .. */ + ret = dev_hard_start_xmit(skb, dev); - /* NETDEV_TX_BUSY - we need to requeue */ - /* Release the driver */ - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - } + if (!lockless) + netif_tx_unlock(dev); - /* Device kicked us out :( - This is possible in three cases: - - 0. driver is locked - 1. fastroute is enabled - 2. device cannot determine busy state - before start of transmission (f.e. dialout) - 3. device is buggy (ppp) - */ - -requeue: - if (unlikely(q == &noop_qdisc)) - kfree_skb(skb); - else if (skb->next) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); - netif_schedule(dev); - } - return 0; + spin_lock(&dev->queue_lock); -out: - BUG_ON((int) q->q.qlen < 0); - return q->q.qlen; + /* we need to refresh q because it may be invalid since + * we dropped dev->queue_lock earlier ... + * So dont try to be clever grasshopper + */ + q = dev->qdisc; + /* most likely result, packet went ok */ + if (ret == NETDEV_TX_OK) + return qdisc_qlen(q); + /* only for lockless drivers .. */ + if (ret == NETDEV_TX_LOCKED && lockless) + return tx_islocked(skb, dev, q); + + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen); + + return do_dev_requeue(skb, dev, q); } + void __qdisc_run(struct net_device *dev) { do { -- cgit v1.2.3-70-g09d2 From 6c1361a6f285bf3df4b502651c0dd38d0eedc044 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Sun, 24 Jun 2007 19:56:09 -0700 Subject: [NET]: qdisc_restart - readability changes plus one bug fix. New changes : - Incorporated Peter Waskiewicz's comments. - Re-added back one warning message (on driver returning wrong value). Previous changes : - Converted to use switch/case code which looks neater. - "if (ret == NETDEV_TX_LOCKED && lockless)" is buggy, and the lockless check should be removed, since driver will return NETDEV_TX_LOCKED only if lockless is true and driver has to do the locking. In the original code as well as the latest code, this code can result in a bug where if LLTX is not set for a driver (lockless == 0) but the driver is written wrongly to do a trylock (despite LLTX being set), the driver returns LOCKED. But since lockless is zero, the packet is requeue'd instead of calling collision code which will issue warning and free up the skb. Instead this skb will be retried with this driver next time, and the same result will ensue. Removing this check will catch these driver bugs instead of hiding the problem. I am keeping this change to readability section since : a. it is confusing to check two things as it is; and b. it is difficult to keep this check in the changed 'switch' code. - Changed some names, like try_get_tx_pkt to dev_dequeue_skb (as that is the work being done and easier to understand) and do_dev_requeue to dev_requeue_skb, merged handle_dev_cpu_collision and tx_islocked to dev_handle_collision (handle_dev_cpu_collision is a small routine with only one caller, so there is no need to have two separate routines which also results in getting rid of two macros, etc. - Removed an XXX comment as it should never fail (I suspect this was related to batch skb WIP, Jamal ?). Converted some functions to original coding style of having the return values and the function name on same line, eg prio2list. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 167 +++++++++++++++++++++++++----------------------- 1 file changed, 86 insertions(+), 81 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9461e8ae052..983c32caf71 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -34,9 +34,6 @@ #include #include -#define SCHED_TX_DROP -2 -#define SCHED_TX_QUEUE -3 - /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -68,41 +65,24 @@ static inline int qdisc_qlen(struct Qdisc *q) return q->q.qlen; } -static inline int handle_dev_cpu_collision(struct net_device *dev) -{ - if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { - if (net_ratelimit()) - printk(KERN_WARNING - "Dead loop on netdevice %s, fix it urgently!\n", - dev->name); - return SCHED_TX_DROP; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - return SCHED_TX_QUEUE; -} - -static inline int -do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, + struct Qdisc *q) { - if (unlikely(skb->next)) dev->gso_skb = skb; else q->ops->requeue(skb, q); - /* XXX: Could netif_schedule fail? Or is the fact we are - * requeueing imply the hardware path is closed - * and even if we fail, some interupt will wake us - */ + netif_schedule(dev); return 0; } -static inline struct sk_buff * -try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) +static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, + struct Qdisc *q) { - struct sk_buff *skb = dev->gso_skb; + struct sk_buff *skb; - if (skb) + if ((skb = dev->gso_skb)) dev->gso_skb = NULL; else skb = q->dequeue(q); @@ -110,92 +90,117 @@ try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) return skb; } -static inline int -tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +static inline int handle_dev_cpu_collision(struct sk_buff *skb, + struct net_device *dev, + struct Qdisc *q) { - int ret = handle_dev_cpu_collision(dev); + int ret; - if (ret == SCHED_TX_DROP) { + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + /* + * Same CPU holding the lock. It may be a transient + * configuration error, when hard_start_xmit() recurses. We + * detect it by checking xmit owner and drop the packet when + * deadloop is detected. Return OK to try the next skb. + */ kfree_skb(skb); - return qdisc_qlen(q); + if (net_ratelimit()) + printk(KERN_WARNING "Dead loop on netdevice %s, " + "fix it urgently!\n", dev->name); + ret = qdisc_qlen(q); + } else { + /* + * Another cpu is holding lock, requeue & delay xmits for + * some time. + */ + __get_cpu_var(netdev_rx_stat).cpu_collision++; + ret = dev_requeue_skb(skb, dev, q); } - return do_dev_requeue(skb, dev, q); + return ret; } - /* - NOTE: Called under dev->queue_lock with locally disabled BH. - - __LINK_STATE_QDISC_RUNNING guarantees only one CPU - can enter this region at a time. - - dev->queue_lock serializes queue accesses for this device - AND dev->qdisc pointer itself. - - netif_tx_lock serializes accesses to device driver. - - dev->queue_lock and netif_tx_lock are mutually exclusive, - if one is grabbed, another must be free. - - Multiple CPUs may contend for the two locks. - - Note, that this procedure can be called by a watchdog timer - - Returns to the caller: - Returns: 0 - queue is empty or throttled. - >0 - queue is not empty. - -*/ - + * NOTE: Called under dev->queue_lock with locally disabled BH. + * + * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this + * device at a time. dev->queue_lock serializes queue accesses for + * this device AND dev->qdisc pointer itself. + * + * netif_tx_lock serializes accesses to device driver. + * + * dev->queue_lock and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; - unsigned lockless = (dev->features & NETIF_F_LLTX); struct sk_buff *skb; + unsigned lockless; int ret; - skb = try_get_tx_pkt(dev, q); - if (skb == NULL) + /* Dequeue packet */ + if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) return 0; - /* we have a packet to send */ - if (!lockless) { - if (!netif_tx_trylock(dev)) - return tx_islocked(skb, dev, q); + /* + * When the driver has LLTX set, it does its own locking in + * start_xmit. These checks are worth it because even uncongested + * locks can be quite expensive. The driver can do a trylock, as + * is being done here; in case of lock contention it should return + * NETDEV_TX_LOCKED and the packet will be requeued. + */ + lockless = (dev->features & NETIF_F_LLTX); + + if (!lockless && !netif_tx_trylock(dev)) { + /* Another CPU grabbed the driver tx lock */ + return handle_dev_cpu_collision(skb, dev, q); } - /* all clear .. */ + + /* And release queue */ spin_unlock(&dev->queue_lock); ret = NETDEV_TX_BUSY; if (!netif_queue_stopped(dev)) - /* churn baby churn .. */ ret = dev_hard_start_xmit(skb, dev); if (!lockless) netif_tx_unlock(dev); spin_lock(&dev->queue_lock); - - /* we need to refresh q because it may be invalid since - * we dropped dev->queue_lock earlier ... - * So dont try to be clever grasshopper - */ q = dev->qdisc; - /* most likely result, packet went ok */ - if (ret == NETDEV_TX_OK) - return qdisc_qlen(q); - /* only for lockless drivers .. */ - if (ret == NETDEV_TX_LOCKED && lockless) - return tx_islocked(skb, dev, q); - if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) - printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen); + switch (ret) { + case NETDEV_TX_OK: + /* Driver sent out skb successfully */ + ret = qdisc_qlen(q); + break; + + case NETDEV_TX_LOCKED: + /* Driver try lock failed */ + ret = handle_dev_cpu_collision(skb, dev, q); + break; + + default: + /* Driver returned NETDEV_TX_BUSY - requeue skb */ + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING "BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); + + ret = dev_requeue_skb(skb, dev, q); + break; + } - return do_dev_requeue(skb, dev, q); + return ret; } - void __qdisc_run(struct net_device *dev) { do { -- cgit v1.2.3-70-g09d2 From e50c41b53d7aa48152dd9c633b04fc7abd536f1f Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Sun, 24 Jun 2007 19:57:27 -0700 Subject: [NET]: qdisc_restart - couple of optimizations. Changes : - netif_queue_stopped need not be called inside qdisc_restart as it has been called already in qdisc_run() before the first skb is sent, and in __qdisc_run() after each intermediate skb is sent (note : we are the only sender, so the queue cannot get stopped while the tx lock was got in the ~LLTX case). - BUG_ON((int) q->q.qlen < 0) was a relic from old times when -1 meant more packets are available, and __qdisc_run used to loop when qdisc_restart() returned -1. During those days, it was necessary to make sure that qlen is never less than zero, since __qdisc_run would get into an infinite loop if no packets are on the queue and this bug in qdisc was there (and worse - no more skbs could ever get queue'd as we hold the queue lock too). With Herbert's recent change to return values, this check is not required. Hopefully Herbert can validate this change. If at all this is required, it should be added to skb_dequeue (in failure case), and not to qdisc_qlen. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 983c32caf71..2488dbb17b6 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -61,7 +61,6 @@ void qdisc_unlock_tree(struct net_device *dev) static inline int qdisc_qlen(struct Qdisc *q) { - BUG_ON((int) q->q.qlen < 0); return q->q.qlen; } @@ -167,9 +166,7 @@ static inline int qdisc_restart(struct net_device *dev) /* And release queue */ spin_unlock(&dev->queue_lock); - ret = NETDEV_TX_BUSY; - if (!netif_queue_stopped(dev)) - ret = dev_hard_start_xmit(skb, dev); + ret = dev_hard_start_xmit(skb, dev); if (!lockless) netif_tx_unlock(dev); -- cgit v1.2.3-70-g09d2 From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 6 Jul 2007 13:36:20 -0700 Subject: [CORE] Stack changes to add multiqueue hardware support API Add the multiqueue hardware device support API to the core network stack. Allow drivers to allocate multiple queues and manage them at the netdev level if they choose to do so. Added a new field to sk_buff, namely queue_mapping, for drivers to know which tx_ring to select based on OS classification of the flow. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/Kconfig | 8 +++++ include/linux/etherdevice.h | 3 +- include/linux/netdevice.h | 80 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/skbuff.h | 25 ++++++++++++-- net/core/dev.c | 36 ++++++++++++++------ net/core/netpoll.c | 8 +++-- net/core/pktgen.c | 10 ++++-- net/core/skbuff.c | 3 ++ net/ethernet/eth.c | 9 ++--- net/sched/sch_teql.c | 6 +++- 10 files changed, 158 insertions(+), 30 deletions(-) (limited to 'net/sched') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index c251cca295c..d4e39ff1545 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -25,6 +25,14 @@ menuconfig NETDEVICES # that for each of the symbols. if NETDEVICES +config NETDEVICES_MULTIQUEUE + bool "Netdevice multiple hardware queue support" + ---help--- + Say Y here if you want to allow the network stack to use multiple + hardware TX queues on an ethernet device. + + Most people will say N here. + config IFB tristate "Intermediate Functional Block support" depends on NET_CLS_ACT diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f48eb89efd0..6cdb97365e4 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -39,7 +39,8 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); -extern struct net_device *alloc_etherdev(int sizeof_priv); +extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); +#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c0cc19edfb..9817821729c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -108,6 +108,14 @@ struct wireless_dev; #define MAX_HEADER (LL_MAX_HEADER + 48) #endif +struct net_device_subqueue +{ + /* Give a control state for each queue. This struct may contain + * per-queue locks in the future. + */ + unsigned long state; +}; + /* * Network device statistics. Akin to the 2.0 ether stats but * with byte counters. @@ -331,6 +339,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -557,6 +566,10 @@ struct net_device /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; + + /* The TX queue control structures */ + unsigned int egress_subqueue_count; + struct net_device_subqueue egress_subqueue[0]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -565,9 +578,7 @@ struct net_device static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ((sizeof(struct net_device) - + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST); + return dev->priv; } #define SET_MODULE_OWNER(dev) do { } while (0) @@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev) return test_bit(__LINK_STATE_START, &dev->state); } +/* + * Routines to manage the subqueues on a device. We only need start + * stop, and a check if it's stopped. All other device management is + * done at the overall netdevice level. + * Also test the device if we're multiqueue. + */ +static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline int netif_subqueue_stopped(const struct net_device *dev, + u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return test_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state); +#else + return 0; +#endif +} + +static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + if (test_and_clear_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state)) + __netif_schedule(dev); +#endif +} + +static inline int netif_is_multiqueue(const struct net_device *dev) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return (!!(NETIF_F_MULTI_QUEUE & dev->features)); +#else + return 0; +#endif +} /* Use this variant when it is known for sure that it * is executing from interrupt context. @@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev) extern void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ -extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)); +extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int queue_count); +#define alloc_netdev(sizeof_priv, name, setup) \ + alloc_netdev_mq(sizeof_priv, name, setup, 1) extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); /* Functions used for secondary unicast and multicast support */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 881fe80f01d..2d6a14f5f2f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t; * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @iif: ifindex of device we arrived on * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t; * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c + * @iif: ifindex of device we arrived on + * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @dma_cookie: a cookie to one of several possible DMA operations @@ -246,8 +247,6 @@ struct sk_buff { struct sock *sk; ktime_t tstamp; struct net_device *dev; - int iif; - /* 4 byte hole on 64 bit*/ struct dst_entry *dst; struct sec_path *sp; @@ -290,12 +289,18 @@ struct sk_buff { #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif + + int iif; + __u16 queue_mapping; + #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif #endif + /* 2 byte hole */ + #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif @@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + skb->queue_mapping = queue_mapping; +#endif +} + +static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + to->queue_mapping = from->queue_mapping; +#endif +} + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/dev.c b/net/core/dev.c index 6dce9d2d46f..7ddf66d0ad5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1429,7 +1429,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1547,6 +1549,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1576,7 +1580,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -3539,16 +3544,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * queue_count)) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3570,15 +3579,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + queue_count) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a0efdd7a6b3..4b06d193637 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); - if (netif_queue_stopped(dev) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + if ((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); @@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev)) + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9cd3a1cb60e..dffe067e7a7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3139,7 +3139,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if (netif_queue_stopped(odev) || need_resched()) { + if ((netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) || + need_resched()) { idle_start = getCurUs(); if (!netif_running(odev)) { @@ -3154,7 +3156,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev)) { + if (netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3181,7 +3184,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev)) { + if (!netif_queue_stopped(odev) && + !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c989c3a0f90..6a41b96b3d3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -419,6 +419,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->nohdr = 0; C(pkt_type); C(ip_summed); + skb_copy_queue_mapping(n, skb); C(priority); #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); @@ -460,6 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->sk = NULL; new->dev = old->dev; + skb_copy_queue_mapping(new, old); new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -1932,6 +1934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) tail = nskb; nskb->dev = skb->dev; + skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; nskb->dst = dst_clone(skb->dst); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 0ac2524f3b6..1387e5411f7 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -316,9 +316,10 @@ void ether_setup(struct net_device *dev) EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev - Allocates and sets up an Ethernet device + * alloc_etherdev_mq - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device + * @queue_count: The number of queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. @@ -328,8 +329,8 @@ EXPORT_SYMBOL(ether_setup); * this private data area. */ -struct net_device *alloc_etherdev(int sizeof_priv) +struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) { - return alloc_netdev(sizeof_priv, "eth%d", ether_setup); + return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); } -EXPORT_SYMBOL(alloc_etherdev); +EXPORT_SYMBOL(alloc_etherdev_mq); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index f05ad9a30b4..dfe7e452098 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -277,6 +277,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int busy; int nores; int len = skb->len; + int subq = skb->queue_mapping; struct sk_buff *skb_res = NULL; start = master->slaves; @@ -293,7 +294,9 @@ restart: if (slave->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || ! netif_running(slave)) { + if (netif_queue_stopped(slave) || + netif_subqueue_stopped(slave, subq) || + !netif_running(slave)) { busy = 1; continue; } @@ -302,6 +305,7 @@ restart: case 0: if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && + !netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); -- cgit v1.2.3-70-g09d2 From d62733c8e437fdb58325617c4b3331769ba82d70 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 28 Jun 2007 21:04:31 -0700 Subject: [SCHED]: Qdisc changes and sch_rr added for multiqueue Add the new sch_rr qdisc for multiqueue network device support. Allow sch_prio and sch_rr to be compiled with or without multiqueue hardware support. sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This was done since sch_prio and sch_rr only differ in their dequeue routine. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 9 ++++ net/sched/Kconfig | 11 ++++ net/sched/sch_prio.c | 129 ++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 134 insertions(+), 15 deletions(-) (limited to 'net/sched') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d10f3533850..268c51599eb 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -101,6 +101,15 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +enum +{ + TCA_PRIO_UNSPEC, + TCA_PRIO_MQ, + __TCA_PRIO_MAX +}; + +#define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1) + /* TBF section */ struct tc_tbf_qopt diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 475df8449be..f3217942ca8 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -111,6 +111,17 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. +config NET_SCH_RR + tristate "Multi Band Round Robin Queuing (RR)" + select NET_SCH_PRIO + ---help--- + Say Y here if you want to use an n-band round robin packet + scheduler. + + The module uses sch_prio for its framework and is aliased as + sch_rr, so it will load sch_prio, although it is referred + to using sch_rr. + config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 6d7542c26e4..40452204628 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -40,9 +40,11 @@ struct prio_sched_data { int bands; + int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; + int mq; }; @@ -70,14 +72,17 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) #endif if (TC_H_MAJ(band)) band = 0; - return q->queues[q->prio2band[band&TC_PRIO_MAX]]; + band = q->prio2band[band&TC_PRIO_MAX]; + goto out; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - return q->queues[q->prio2band[0]]; - + band = q->prio2band[0]; +out: + if (q->mq) + skb_set_queue_mapping(skb, band); return q->queues[band]; } @@ -144,17 +149,58 @@ prio_dequeue(struct Qdisc* sch) struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. + */ + if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { + qdisc = q->queues[prio]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } } } return NULL; } +static struct sk_buff *rr_dequeue(struct Qdisc* sch) +{ + struct sk_buff *skb; + struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + int bandcount; + + /* Only take one pass through the queues. If nothing is available, + * return nothing. + */ + for (bandcount = 0; bandcount < q->bands; bandcount++) { + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. If the queue is stopped, try the + * next queue. + */ + if (!netif_subqueue_stopped(sch->dev, + (q->mq ? q->curband : 0))) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + return skb; + } + } + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + } + return NULL; +} + static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -198,21 +244,41 @@ prio_destroy(struct Qdisc* sch) static int prio_tune(struct Qdisc *sch, struct rtattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); - struct tc_prio_qopt *qopt = RTA_DATA(opt); + struct tc_prio_qopt *qopt; + struct rtattr *tb[TCA_PRIO_MAX]; int i; - if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) + if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, + sizeof(*qopt))) return -EINVAL; - if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) + q->bands = qopt->bands; + /* If we're multiqueue, make sure the number of incoming bands + * matches the number of queues on the device we're associating with. + * If the number of bands requested is zero, then set q->bands to + * dev->egress_subqueue_count. + */ + q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); + if (q->mq) { + if (sch->handle != TC_H_ROOT) + return -EINVAL; + if (netif_is_multiqueue(sch->dev)) { + if (q->bands == 0) + q->bands = sch->dev->egress_subqueue_count; + else if (q->bands != sch->dev->egress_subqueue_count) + return -EINVAL; + } else + return -EOPNOTSUPP; + } + + if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= qopt->bands) + if (qopt->priomap[i] >= q->bands) return -EINVAL; } sch_tree_lock(sch); - q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; ibands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); + if (q->mq) + RTA_PUT_FLAG(skb, TCA_PRIO_MQ); + RTA_NEST_COMPAT_END(skb, nest); + return skb->len; rtattr_failure: @@ -443,17 +515,44 @@ static struct Qdisc_ops prio_qdisc_ops = { .owner = THIS_MODULE, }; +static struct Qdisc_ops rr_qdisc_ops = { + .next = NULL, + .cl_ops = &prio_class_ops, + .id = "rr", + .priv_size = sizeof(struct prio_sched_data), + .enqueue = prio_enqueue, + .dequeue = rr_dequeue, + .requeue = prio_requeue, + .drop = prio_drop, + .init = prio_init, + .reset = prio_reset, + .destroy = prio_destroy, + .change = prio_tune, + .dump = prio_dump, + .owner = THIS_MODULE, +}; + static int __init prio_module_init(void) { - return register_qdisc(&prio_qdisc_ops); + int err; + + err = register_qdisc(&prio_qdisc_ops); + if (err < 0) + return err; + err = register_qdisc(&rr_qdisc_ops); + if (err < 0) + unregister_qdisc(&prio_qdisc_ops); + return err; } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); + unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_ALIAS("sch_rr"); -- cgit v1.2.3-70-g09d2 From 876d48aabf30e4981653f1a0a7ae1e262b8c8b6f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:46:07 -0700 Subject: [NET_SCHED]: Remove CONFIG_NET_ESTIMATOR option The generic estimator is always built in anways and all the config options does is prevent including a minimal amount of code for setting it up. Additionally the option is already automatically selected for most cases. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/Kconfig | 12 ------------ net/sched/act_api.c | 6 ------ net/sched/act_police.c | 18 ------------------ net/sched/sch_api.c | 6 ------ net/sched/sch_cbq.c | 8 -------- net/sched/sch_generic.c | 2 -- net/sched/sch_hfsc.c | 8 -------- 7 files changed, 60 deletions(-) (limited to 'net/sched') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f3217942ca8..b4662888bdb 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -286,7 +286,6 @@ config CLS_U32_MARK config NET_CLS_RSVP tristate "IPv4 Resource Reservation Protocol (RSVP)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -301,7 +300,6 @@ config NET_CLS_RSVP config NET_CLS_RSVP6 tristate "IPv6 Resource Reservation Protocol (RSVP6)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -393,7 +391,6 @@ config NET_EMATCH_TEXT config NET_CLS_ACT bool "Actions" - select NET_ESTIMATOR ---help--- Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful @@ -476,7 +473,6 @@ config NET_ACT_SIMP config NET_CLS_POLICE bool "Traffic Policing (obsolete)" depends on NET_CLS_ACT!=y - select NET_ESTIMATOR ---help--- Say Y here if you want to do traffic policing, i.e. strict bandwidth limiting. This option is obsoleted by the traffic @@ -491,14 +487,6 @@ config NET_CLS_IND classification based on the incoming device. This option is likely to disappear in favour of the metadata ematch. -config NET_ESTIMATOR - bool "Rate estimator" - ---help--- - Say Y here to allow using rate estimators to estimate the current - rate-of-flow for network devices, queues, etc. This module is - automatically selected if needed but can be selected manually for - statistical purposes. - endif # NET_SCHED endmenu diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 711dd26c95c..72bb9bd1a22 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -42,10 +42,8 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_lock_bh(hinfo->lock); *p1p = p->tcfc_next; write_unlock_bh(hinfo->lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); -#endif kfree(p); return; } @@ -236,11 +234,9 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, p->tcfc_stats_lock, est); -#endif a->priv = (void *) p; return p; } @@ -614,9 +610,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 616f465f407..580698db578 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -118,10 +118,8 @@ void tcf_police_destroy(struct tcf_police *p) write_lock_bh(&police_lock); *p1p = p->tcf_next; write_unlock_bh(&police_lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); -#endif if (p->tcfp_R_tab) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) @@ -227,7 +225,6 @@ override: police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); @@ -235,7 +232,6 @@ override: gen_replace_estimator(&police->tcf_bstats, &police->tcf_rate_est, police->tcf_stats_lock, est); -#endif spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -281,14 +277,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { @@ -348,10 +342,8 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -477,14 +469,12 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) goto failure; police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } -#endif police->tcfp_toks = police->tcfp_burst = parm->burst; police->tcfp_mtu = parm->mtu; if (police->tcfp_mtu == 0) { @@ -498,11 +488,9 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_index = parm->index ? parm->index : tcf_police_new_index(); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, police->tcf_stats_lock, est); -#endif h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); police->tcf_next = tcf_police_ht[h]; @@ -528,14 +516,12 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police) police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { spin_unlock(&police->tcf_lock); @@ -591,10 +577,8 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -612,9 +596,7 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) goto errout; if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bec600af03c..0f9e1c71746 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -515,7 +515,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, @@ -531,7 +530,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) goto err_out3; } } -#endif qdisc_lock_tree(dev); list_add_tail(&sch->list, &dev->qdisc_list); qdisc_unlock_tree(dev); @@ -559,11 +557,9 @@ static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) if (err) return err; } -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -839,9 +835,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto rtattr_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &q->qstats) < 0) goto rtattr_failure; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ee2d5967d10..bf1ea9e75cd 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1653,9 +1653,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1726,9 +1724,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->link) kfree(cl); } @@ -1873,11 +1869,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -1963,11 +1957,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2488dbb17b6..e525fd723c1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -514,9 +514,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; list_del(&qdisc->list); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif if (ops->reset) ops->reset(qdisc); if (ops->destroy) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9d124c4ee3a..7ccdf63a0cb 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1054,11 +1054,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -1112,11 +1110,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif *arg = (unsigned long)cl; return 0; } @@ -1128,9 +1124,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->qdisc); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->root) kfree(cl); } @@ -1384,9 +1378,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; -- cgit v1.2.3-70-g09d2 From 4bdf39911e7a887c4499161422423cbaf16684e8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:47:37 -0700 Subject: [NET_SCHED]: Remove unnecessary stats_lock pointers Remove stats_lock pointers from qdisc-internal structures, in all cases it points to dev->queue_lock. The only case where it is necessary is for top-level qdiscs, where it might also point to dev->ingress_lock in case of the ingress qdisc. Also remove it from actions completely, it always points to the actions internal lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 7 +++---- net/sched/act_police.c | 8 +++----- net/sched/sch_atm.c | 1 - net/sched/sch_cbq.c | 8 +++----- net/sched/sch_hfsc.c | 10 +++------- 6 files changed, 12 insertions(+), 24 deletions(-) (limited to 'net/sched') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8b06c2f3657..2f0273feabd 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -19,7 +19,6 @@ struct tcf_common { struct gnet_stats_basic tcfc_bstats; struct gnet_stats_queue tcfc_qstats; struct gnet_stats_rate_est tcfc_rate_est; - spinlock_t *tcfc_stats_lock; spinlock_t tcfc_lock; }; #define tcf_next common.tcfc_next @@ -32,7 +31,6 @@ struct tcf_common { #define tcf_bstats common.tcfc_bstats #define tcf_qstats common.tcfc_qstats #define tcf_rate_est common.tcfc_rate_est -#define tcf_stats_lock common.tcfc_stats_lock #define tcf_lock common.tcfc_lock struct tcf_police { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 72bb9bd1a22..32cc191d9f9 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -230,13 +230,12 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti p->tcfc_bindcnt = 1; spin_lock_init(&p->tcfc_lock); - p->tcfc_stats_lock = &p->tcfc_lock; p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; if (est) gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - p->tcfc_stats_lock, est); + &p->tcfc_lock, est); a->priv = (void *) p; return p; } @@ -595,12 +594,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); + TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->tcf_stats_lock, &d); + &h->tcf_lock, &d); if (err < 0) goto errout; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 580698db578..3e8716dd738 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -183,7 +183,6 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, ret = ACT_P_CREATED; police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (bind) police->tcf_bindcnt = 1; override: @@ -231,7 +230,7 @@ override: if (est) gen_replace_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); + &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -450,7 +449,6 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (parm->rate.rate) { police->tcfp_R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); @@ -490,7 +488,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_action = parm->action; if (est) gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); + &police->tcf_lock, est); h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); police->tcf_next = tcf_police_ht[h]; @@ -591,7 +589,7 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, police->tcf_stats_lock, + TCA_XSTATS, &police->tcf_lock, &d) < 0) goto errout; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d1c383fca82..16fe802a66f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -71,7 +71,6 @@ struct atm_flow_data { int ref; /* reference count */ struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; - spinlock_t *stats_lock; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index bf1ea9e75cd..b093d8fce78 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -148,7 +148,6 @@ struct cbq_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; @@ -1442,7 +1441,6 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.ewma_log = TC_CBQ_DEF_EWMA; q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - q->link.stats_lock = &sch->dev->queue_lock; qdisc_watchdog_init(&q->watchdog, sch); hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); @@ -1871,7 +1869,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1929,7 +1928,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->allot = parent->allot; cl->quantum = cl->allot; cl->weight = cl->R_tab->rate.rate; - cl->stats_lock = &sch->dev->queue_lock; sch_tree_lock(sch); cbq_link_class(cl); @@ -1959,7 +1957,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 7ccdf63a0cb..7130a2441b0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -122,7 +122,6 @@ struct hfsc_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ @@ -1056,7 +1055,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1096,7 +1096,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; - cl->stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1112,7 +1111,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; } @@ -1440,8 +1439,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) return -EINVAL; qopt = RTA_DATA(opt); - sch->stats_lock = &sch->dev->queue_lock; - q->defcls = qopt->defcls; for (i = 0; i < HFSC_HSIZE; i++) INIT_LIST_HEAD(&q->clhash[i]); @@ -1456,7 +1453,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; - q->root.stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; -- cgit v1.2.3-70-g09d2 From ee39e10c27ca5293c72addb95bff864095e19904 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:48:13 -0700 Subject: [NET_SCHED]: sch_htb: use generic estimator Use the generic estimator instead of reimplementing (parts of) it. For compatibility always create a default estimator for new classes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 85 +++++++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 61 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 035788c5b7f..26f81b848bf 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -69,8 +69,6 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ @@ -95,12 +93,6 @@ struct htb_class { struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ -#ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes, sum_bytes; - unsigned long rate_packets, sum_packets; -#endif - /* topology */ int level; /* our level (see above) */ struct htb_class *parent; /* parent class */ @@ -194,10 +186,6 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ struct qdisc_watchdog watchdog; -#ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ -#endif /* non shaped skbs; let them go directly thru */ struct sk_buff_head direct_queue; @@ -677,34 +665,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -#ifdef HTB_RATECM -#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 -static void htb_rate_timer(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; - - - /* lock queue so that we can muck with it */ - spin_lock_bh(&sch->dev->queue_lock); - - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); - - /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) - q->recmp_bucket = 0; - - hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { - RT_GEN(cl->sum_bytes, cl->rate_bytes); - RT_GEN(cl->sum_packets, cl->rate_packets); - } - spin_unlock_bh(&sch->dev->queue_lock); -} -#endif - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -750,11 +710,6 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } -#ifdef HTB_RATECM - /* update rate counters */ - cl->sum_bytes += bytes; - cl->sum_packets++; -#endif /* update byte stats except for leaves which are already updated */ if (cl->level) { @@ -1095,13 +1050,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; -#ifdef HTB_RATECM - init_timer(&q->rttim); - q->rttim.function = htb_rate_timer; - q->rttim.data = (unsigned long)sch; - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); -#endif if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1175,11 +1123,6 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; -#ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); -#endif - if (!cl->level && cl->un.leaf.q) cl->qstats.qlen = cl->un.leaf.q->q.qlen; cl->xstats.tokens = cl->tokens; @@ -1277,6 +1220,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) BUG_TRAP(cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } + gen_kill_estimator(&cl->bstats, &cl->rate_est); qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); @@ -1305,9 +1249,6 @@ static void htb_destroy(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); -#ifdef HTB_RATECM - del_timer_sync(&q->rttim); -#endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call @@ -1403,6 +1344,20 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) { /* new class */ struct Qdisc *new_q; int prio; + struct { + struct rtattr rta; + struct gnet_estimator opt; + } est = { + .rta = { + .rta_len = RTA_LENGTH(sizeof(est.opt)), + .rta_type = TCA_RATE, + }, + .opt = { + /* 4s interval, 16s averaging constant */ + .interval = 2, + .ewma_log = 2, + }, + }; /* check for valid classid */ if (!classid || TC_H_MAJ(classid ^ sch->handle) @@ -1418,6 +1373,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; + gen_new_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1] ? : &est.rta); cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); INIT_HLIST_NODE(&cl->hlist); @@ -1469,8 +1427,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); - } else + } else { + if (tca[TCA_RATE-1]) + gen_replace_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1]); sch_tree_lock(sch); + } /* it used to be a nasty bug here, we have to check that node is really leaf before changing cl->un.leaf ! */ -- cgit v1.2.3-70-g09d2 From 0ba48053831d5b89ee2afaefaae1c06eae80cb05 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:49:07 -0700 Subject: [NET_SCHED]: Remove unnecessary includes Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/act_api.c | 10 ---------- net/sched/act_gact.c | 11 ----------- net/sched/act_ipt.c | 12 ------------ net/sched/act_mirred.c | 12 ------------ net/sched/act_pedit.c | 11 ----------- net/sched/act_police.c | 10 ---------- net/sched/act_simple.c | 1 - net/sched/cls_api.c | 10 ---------- net/sched/cls_basic.c | 1 - net/sched/cls_fw.c | 19 +------------------ net/sched/cls_route.c | 20 +++----------------- net/sched/cls_rsvp.c | 17 +---------------- net/sched/cls_rsvp6.c | 16 ---------------- net/sched/cls_tcindex.c | 3 --- net/sched/cls_u32.c | 18 +----------------- net/sched/ematch.c | 2 -- net/sched/sch_api.c | 12 ------------ net/sched/sch_atm.c | 3 --- net/sched/sch_blackhole.c | 1 - net/sched/sch_cbq.c | 18 +----------------- net/sched/sch_dsmark.c | 1 - net/sched/sch_fifo.c | 1 - net/sched/sch_generic.c | 8 -------- net/sched/sch_gred.c | 1 - net/sched/sch_hfsc.c | 3 --- net/sched/sch_htb.c | 18 +----------------- net/sched/sch_ingress.c | 9 +-------- net/sched/sch_netem.c | 2 -- net/sched/sch_prio.c | 16 ---------------- net/sched/sch_red.c | 1 - net/sched/sch_sfq.c | 18 ++---------------- net/sched/sch_tbf.c | 19 +------------------ net/sched/sch_teql.c | 18 ++---------------- 33 files changed, 14 insertions(+), 308 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 32cc191d9f9..feef366cad5 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -11,23 +11,13 @@ * */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include -#include #include #include #include diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 7517f379154..a9631e426d9 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -10,26 +10,15 @@ * */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include -#include #include #include #include diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 00b05f422d4..6b407ece953 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -11,27 +11,15 @@ * Copyright: Jamal Hadi Salim (2002-4) */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include -#include #include -#include #include #include #include diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index de21c92faaa..579578944ae 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -12,31 +12,19 @@ * */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include -#include #include #include #include -#include #include #define MIRRED_TAB_MASK 7 diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 6f8684b5617..b46fab5fb32 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -9,26 +9,15 @@ * Authors: Jamal Hadi Salim (2002-4) */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include -#include #include #include #include diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 3e8716dd738..d2040389087 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -10,25 +10,15 @@ * J Hadi Salim (action changes) */ -#include -#include -#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include #include diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 36e1edad599..fb84ef33d14 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ebf94edf047..36b72aab1bd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -14,26 +14,16 @@ * */ -#include -#include -#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include -#include #include #include diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c885412d79d..8dbcf2771a4 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index bbec4a0d4dc..8adbd6a37d1 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -19,29 +19,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include #include diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index cc941d0ee3a..0a8409c1d28 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,28 +10,14 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include #include #include diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index 0a683c07c64..cbb5e0d600f 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -10,27 +10,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include #include #include diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index 93b6abed57d..dd08aea2aee 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -10,28 +10,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include #include -#include #include #include #include diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 47ac0c55642..2314820a080 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -9,12 +9,9 @@ #include #include #include -#include -#include #include #include #include -#include /* diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c7a347bd6d7..77961e2314d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -30,30 +30,14 @@ * nfmark match added by Catalin(ux aka Dino) BOIE */ -#include -#include -#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include +#include #include #include diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 63146d339d8..24837391640 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -84,9 +84,7 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0f9e1c71746..d92ea26982c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -19,30 +19,18 @@ #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include #include -#include #include #include -#include #include -#include -#include -#include - static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 16fe802a66f..54b92d22796 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -8,15 +8,12 @@ #include #include #include -#include #include #include -#include #include #include /* for fput */ #include #include -#include extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index cb0c456aa34..f914fc43a12 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b093d8fce78..b184c354514 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -11,28 +11,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3c6fd181263..4d2c233a861 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -9,7 +9,6 @@ #include #include #include -#include /* for pkt_sched */ #include #include #include diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index c2689f4ba8d..c264308f17c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e525fd723c1..c81649cf0b9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -11,27 +11,19 @@ * - Ingress support */ -#include -#include #include #include #include #include #include #include -#include -#include -#include -#include #include -#include #include #include #include #include #include #include -#include #include /* Main transmission queue. */ diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index fa1b4fe7a5f..3cc6dda02e2 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 7130a2441b0..874452c41a0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -62,13 +61,11 @@ #include #include #include -#include #include #include #include #include #include -#include #include /* diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 26f81b848bf..c031486b675 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -28,32 +28,16 @@ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include +#include #include -#include #include -#include /* HTB algorithm. Author: devik@cdi.cz diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f8b9f1cdf73..cd0aab6a2a7 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -9,21 +9,14 @@ #include #include +#include #include -#include #include #include #include #include -#include #include #include -#include -#include -#include -#include -#include -#include #undef DEBUG_INGRESS diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5d9d8bc9cc3..9e5e87e81f0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -14,11 +14,9 @@ */ #include -#include #include #include #include -#include #include #include diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 40452204628..2d8c08493d6 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -12,28 +12,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 00db53eb815..9b95fefb70f 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 96dfdf78d32..95795730985 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -10,31 +10,17 @@ */ #include -#include -#include -#include #include #include #include #include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include #include -#include -#include #include -#include #include -#include +#include +#include #include diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 53862953baa..22e431dace5 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,29 +13,12 @@ */ #include -#include -#include -#include #include #include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index dfe7e452098..0968184ea6b 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -9,30 +9,17 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include #include -#include -#include #include -#include -#include #include -#include -#include #include #include -#include +#include +#include #include /* @@ -225,7 +212,6 @@ static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) return 0; } -/* "teql*" netdevice routines */ static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) -- cgit v1.2.3-70-g09d2 From c9726d6890f7f3a892c879e067c3ed839f61e745 Mon Sep 17 00:00:00 2001 From: Ranjit Manomohan Date: Tue, 10 Jul 2007 22:43:16 -0700 Subject: [NET_SCHED]: Make HTB scheduler work with TSO. Currently the HTB scheduler does not correctly account for TSO packets which causes large inaccuracies in the bandwidth control when using TSO. This patch allows the HTB scheduler to work with TSO enabled devices. Signed-off-by: Ranjit Manomohan Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c031486b675..b417a95df32 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -129,15 +129,12 @@ struct htb_class { /* of un.leaf originals should be done. */ }; -/* TODO: maybe compute rate when size is too large .. or drop ? */ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } + if (slot > 255) + return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); return rate->data[slot]; } @@ -606,13 +603,14 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->qstats.drops++; return NET_XMIT_DROP; } else { - cl->bstats.packets++; + cl->bstats.packets += + skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; cl->bstats.bytes += skb->len; htb_activate(q, cl); } sch->q.qlen++; - sch->bstats.packets++; + sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; sch->bstats.bytes += skb->len; return NET_XMIT_SUCCESS; } @@ -661,8 +659,9 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) * In such case we remove class from event queue first. */ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, - int level, int bytes) + int level, struct sk_buff *skb) { + int bytes = skb->len; long toks, diff; enum htb_cmode old_mode; @@ -698,7 +697,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, /* update byte stats except for leaves which are already updated */ if (cl->level) { cl->bstats.bytes += bytes; - cl->bstats.packets++; + cl->bstats.packets += skb_is_gso(skb)? + skb_shinfo(skb)->gso_segs:1; } cl = cl->parent; } @@ -882,7 +882,7 @@ next: gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); - htb_charge_class(q, cl, level, skb->len); + htb_charge_class(q, cl, level, skb); } return skb; } -- cgit v1.2.3-70-g09d2 From db3d99c090e0cdb34b1274767e062bfddbb384bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:46:26 -0700 Subject: [NET_SCHED]: ematch: module autoloading Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 17 +++++++---------- include/net/pkt_cls.h | 2 ++ net/sched/em_cmp.c | 1 + net/sched/em_meta.c | 2 ++ net/sched/em_nbyte.c | 2 ++ net/sched/em_text.c | 2 ++ net/sched/em_u32.c | 2 ++ net/sched/ematch.c | 13 +++++++++++++ 8 files changed, 31 insertions(+), 10 deletions(-) (limited to 'net/sched') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index c3f01b3085a..30b8571e6b3 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -403,16 +403,13 @@ enum * 1..32767 Reserved for ematches inside kernel tree * 32768..65535 Free to use, not reliable */ -enum -{ - TCF_EM_CONTAINER, - TCF_EM_CMP, - TCF_EM_NBYTE, - TCF_EM_U32, - TCF_EM_META, - TCF_EM_TEXT, - __TCF_EM_MAX -}; +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_MAX 5 enum { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4129df70807..6c29920cbe2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -306,6 +306,8 @@ static inline int tcf_em_tree_match(struct sk_buff *skb, return 1; } +#define MODULE_ALIAS_TCF_EMATCH(kind) MODULE_ALIAS("ematch-kind-" __stringify(kind)) + #else /* CONFIG_NET_EMATCH */ struct tcf_ematch_tree diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index 8d6dacd8190..cc49c932641 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -98,3 +98,4 @@ MODULE_LICENSE("GPL"); module_init(init_em_cmp); module_exit(exit_em_cmp); +MODULE_ALIAS_TCF_EMATCH(TCF_EM_CMP); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 60acf8cdb27..650f09c8bd6 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -848,3 +848,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_meta); module_exit(exit_em_meta); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_META); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index b4b36efce29..370a1b2ea31 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -76,3 +76,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_nbyte); module_exit(exit_em_nbyte); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index e8f46169449..d5cd86efb7d 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -150,3 +150,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_text); module_exit(exit_em_text); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_TEXT); diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 0a2a7fe08de..112796e4a7c 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -60,3 +60,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_u32); module_exit(exit_em_u32); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_U32); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 24837391640..f3a104e323b 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -222,6 +222,19 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; +#ifdef CONFIG_KMOD + __rtnl_unlock(); + request_module("ematch-kind-%u", em_hdr->kind); + rtnl_lock(); + em->ops = tcf_em_lookup(em_hdr->kind); + if (em->ops) { + /* We dropped the RTNL mutex in order to + * perform the module load. Tell the caller + * to replay the request. */ + module_put(em->ops->owner); + err = -EAGAIN; + } +#endif goto errout; } -- cgit v1.2.3-70-g09d2