diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 2 | ||||
-rw-r--r-- | net/sched/act_api.c | 13 | ||||
-rw-r--r-- | net/sched/cls_api.c | 5 | ||||
-rw-r--r-- | net/sched/cls_basic.c | 3 | ||||
-rw-r--r-- | net/sched/em_meta.c | 295 | ||||
-rw-r--r-- | net/sched/sch_api.c | 10 | ||||
-rw-r--r-- | net/sched/sch_dsmark.c | 373 | ||||
-rw-r--r-- | net/sched/sch_fifo.c | 152 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 84 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 209 |
10 files changed, 646 insertions, 500 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b0941186f86..b22c9beb604 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -405,7 +405,7 @@ config NET_EMATCH_STACK ---help--- Size of the local stack variable used while evaluating the tree of ematches. Limits the depth of the tree, i.e. the number of - encapsulated precedences. Every level requires 4 bytes of addtional + encapsulated precedences. Every level requires 4 bytes of additional stack space. config NET_EMATCH_CMP diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cafcb084098..9594206e603 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -428,15 +428,15 @@ errout: static int tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, - unsigned flags, int event, int bind, int ref) + u16 flags, int event, int bind, int ref) { struct tcamsg *t; struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rtattr *x; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); - nlh->nlmsg_flags = flags; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); + t = NLMSG_DATA(nlh); t->tca_family = AF_UNSPEC; @@ -669,7 +669,7 @@ err: } static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, - unsigned flags) + u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -684,8 +684,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, b = (unsigned char *)skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); - nlh->nlmsg_flags = flags; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); t = NLMSG_DATA(nlh); t->tca_family = AF_UNSPEC; @@ -881,7 +880,7 @@ static int __init tc_action_init(void) link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; } - printk("TC classifier action (bugs to netdev@oss.sgi.com cc " + printk("TC classifier action (bugs to netdev@vger.kernel.org cc " "hadi@cyberus.ca)\n"); return 0; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 56e66c3fe0f..1616bf5c962 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -322,14 +322,13 @@ errout: static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, - u32 pid, u32 seq, unsigned flags, int event) + u32 pid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); - nlh->nlmsg_flags = flags; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm_ifindex = tp->q->dev->ifindex; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0d2d4415f33..dfb300bb6ba 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, rta = (struct rtattr *) b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + if (f->res.classid) + RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid); + if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto rtattr_failure; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index f1eeaf65cee..48bb23c2a35 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -32,7 +32,7 @@ * +-----------+ +-----------+ * | | * ---> meta_ops[INT][INDEV](...) | - * | | + * | | * ----------- | * V V * +-----------+ +-----------+ @@ -70,6 +70,7 @@ #include <net/dst.h> #include <net/route.h> #include <net/pkt_cls.h> +#include <net/sock.h> struct meta_obj { @@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif) } /************************************************************************** + * Socket Attributes + **************************************************************************/ + +#define SKIP_NONLOCAL(skb) \ + if (unlikely(skb->sk == NULL)) { \ + *err = -1; \ + return; \ + } + +META_COLLECTOR(int_sk_family) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_family; +} + +META_COLLECTOR(int_sk_state) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_state; +} + +META_COLLECTOR(int_sk_reuse) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_reuse; +} + +META_COLLECTOR(int_sk_bound_if) +{ + SKIP_NONLOCAL(skb); + /* No error if bound_dev_if is 0, legal userspace check */ + dst->value = skb->sk->sk_bound_dev_if; +} + +META_COLLECTOR(var_sk_bound_if) +{ + SKIP_NONLOCAL(skb); + + if (skb->sk->sk_bound_dev_if == 0) { + dst->value = (unsigned long) "any"; + dst->len = 3; + } else { + struct net_device *dev; + + dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + *err = var_dev(dev, dst); + if (dev) + dev_put(dev); + } +} + +META_COLLECTOR(int_sk_refcnt) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_refcnt); +} + +META_COLLECTOR(int_sk_rcvbuf) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvbuf; +} + +META_COLLECTOR(int_sk_shutdown) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_shutdown; +} + +META_COLLECTOR(int_sk_proto) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_protocol; +} + +META_COLLECTOR(int_sk_type) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_type; +} + +META_COLLECTOR(int_sk_rmem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_rmem_alloc); +} + +META_COLLECTOR(int_sk_wmem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_wmem_alloc); +} + +META_COLLECTOR(int_sk_omem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_omem_alloc); +} + +META_COLLECTOR(int_sk_rcv_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_receive_queue.qlen; +} + +META_COLLECTOR(int_sk_snd_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_write_queue.qlen; +} + +META_COLLECTOR(int_sk_wmem_queued) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_wmem_queued; +} + +META_COLLECTOR(int_sk_fwd_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_forward_alloc; +} + +META_COLLECTOR(int_sk_sndbuf) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndbuf; +} + +META_COLLECTOR(int_sk_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_allocation; +} + +META_COLLECTOR(int_sk_route_caps) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_route_caps; +} + +META_COLLECTOR(int_sk_hashent) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_hashent; +} + +META_COLLECTOR(int_sk_lingertime) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_lingertime / HZ; +} + +META_COLLECTOR(int_sk_err_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_error_queue.qlen; +} + +META_COLLECTOR(int_sk_ack_bl) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_ack_backlog; +} + +META_COLLECTOR(int_sk_max_ack_bl) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_max_ack_backlog; +} + +META_COLLECTOR(int_sk_prio) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_priority; +} + +META_COLLECTOR(int_sk_rcvlowat) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvlowat; +} + +META_COLLECTOR(int_sk_rcvtimeo) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvtimeo / HZ; +} + +META_COLLECTOR(int_sk_sndtimeo) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndtimeo / HZ; +} + +META_COLLECTOR(int_sk_sendmsg_off) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndmsg_off; +} + +META_COLLECTOR(int_sk_write_pend) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_write_pending; +} + +/************************************************************************** * Meta value collectors assignment table **************************************************************************/ @@ -293,41 +502,75 @@ struct meta_ops struct meta_value *, struct meta_obj *, int *); }; +#define META_ID(name) TCF_META_ID_##name +#define META_FUNC(name) { .get = meta_##name } + /* Meta value operations table listing all meta value collectors and * assigns them to a type and meta id. */ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [TCF_META_TYPE_VAR] = { - [TCF_META_ID_DEV] = { .get = meta_var_dev }, - [TCF_META_ID_INDEV] = { .get = meta_var_indev }, - [TCF_META_ID_REALDEV] = { .get = meta_var_realdev } + [META_ID(DEV)] = META_FUNC(var_dev), + [META_ID(INDEV)] = META_FUNC(var_indev), + [META_ID(REALDEV)] = META_FUNC(var_realdev), + [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), }, [TCF_META_TYPE_INT] = { - [TCF_META_ID_RANDOM] = { .get = meta_int_random }, - [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 }, - [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 }, - [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 }, - [TCF_META_ID_DEV] = { .get = meta_int_dev }, - [TCF_META_ID_INDEV] = { .get = meta_int_indev }, - [TCF_META_ID_REALDEV] = { .get = meta_int_realdev }, - [TCF_META_ID_PRIORITY] = { .get = meta_int_priority }, - [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol }, - [TCF_META_ID_SECURITY] = { .get = meta_int_security }, - [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype }, - [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen }, - [TCF_META_ID_DATALEN] = { .get = meta_int_datalen }, - [TCF_META_ID_MACLEN] = { .get = meta_int_maclen }, + [META_ID(RANDOM)] = META_FUNC(int_random), + [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0), + [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1), + [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2), + [META_ID(DEV)] = META_FUNC(int_dev), + [META_ID(INDEV)] = META_FUNC(int_indev), + [META_ID(REALDEV)] = META_FUNC(int_realdev), + [META_ID(PRIORITY)] = META_FUNC(int_priority), + [META_ID(PROTOCOL)] = META_FUNC(int_protocol), + [META_ID(SECURITY)] = META_FUNC(int_security), + [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), + [META_ID(PKTLEN)] = META_FUNC(int_pktlen), + [META_ID(DATALEN)] = META_FUNC(int_datalen), + [META_ID(MACLEN)] = META_FUNC(int_maclen), #ifdef CONFIG_NETFILTER - [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark }, + [META_ID(NFMARK)] = META_FUNC(int_nfmark), #endif - [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex }, + [META_ID(TCINDEX)] = META_FUNC(int_tcindex), #ifdef CONFIG_NET_CLS_ACT - [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd }, - [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid }, + [META_ID(TCVERDICT)] = META_FUNC(int_tcverd), + [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid), #endif #ifdef CONFIG_NET_CLS_ROUTE - [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid }, + [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid), #endif - [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif } + [META_ID(RTIIF)] = META_FUNC(int_rtiif), + [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family), + [META_ID(SK_STATE)] = META_FUNC(int_sk_state), + [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse), + [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if), + [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt), + [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf), + [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf), + [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown), + [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto), + [META_ID(SK_TYPE)] = META_FUNC(int_sk_type), + [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc), + [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc), + [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc), + [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued), + [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen), + [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen), + [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen), + [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), + [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), + [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), + [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent), + [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), + [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), + [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), + [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio), + [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat), + [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo), + [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo), + [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), + [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), } }; @@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b) /* Let gcc optimize it, the unlikely is not really based on * some numbers but jump free code for mismatches seems * more logical. */ - if (unlikely(a == b)) + if (unlikely(a->value == b->value)) return 0; - else if (a < b) + else if (a->value < b->value) return -1; else return 1; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 07977f8f267..97c1c75d5c7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -760,15 +760,14 @@ graft: } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 pid, u32 seq, unsigned flags, int event) + u32 pid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct gnet_dump d; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); - nlh->nlmsg_flags = flags; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm_ifindex = q->dev->ifindex; @@ -997,7 +996,7 @@ out: static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, - u32 pid, u32 seq, unsigned flags, int event) + u32 pid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1005,8 +1004,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); - nlh->nlmsg_flags = flags; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm_ifindex = q->dev->ifindex; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 8a3db9d95ba..13e0e7b3856 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -18,7 +18,7 @@ #include <asm/byteorder.h> -#if 1 /* control */ +#if 0 /* control */ #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) #else #define DPRINTK(format,args...) @@ -31,7 +31,7 @@ #endif -#define PRIV(sch) qdisc_priv(sch) +#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch)) /* @@ -55,145 +55,163 @@ struct dsmark_qdisc_data { struct Qdisc *q; struct tcf_proto *filter_list; - __u8 *mask; /* "owns" the array */ - __u8 *value; - __u16 indices; - __u32 default_index; /* index range is 0...0xffff */ + u8 *mask; /* "owns" the array */ + u8 *value; + u16 indices; + u32 default_index; /* index range is 0...0xffff */ int set_tc_index; }; +static inline int dsmark_valid_indices(u16 indices) +{ + while (indices != 1) { + if (indices & 1) + return 0; + indices >>= 1; + } + + return 1; +} -/* ------------------------- Class/flow operations ------------------------- */ +static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) +{ + return (index <= p->indices && index > 0); +} +/* ------------------------- Class/flow operations ------------------------- */ -static int dsmark_graft(struct Qdisc *sch,unsigned long arg, - struct Qdisc *new,struct Qdisc **old) +static int dsmark_graft(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) { struct dsmark_qdisc_data *p = PRIV(sch); - DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, - old); - if (!new) - new = &noop_qdisc; + DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n", + sch, p, new, old); + + if (new == NULL) { + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + if (new == NULL) + new = &noop_qdisc; + } + sch_tree_lock(sch); - *old = xchg(&p->q,new); - if (*old) - qdisc_reset(*old); + *old = xchg(&p->q, new); + qdisc_reset(*old); sch->q.qlen = 0; - sch_tree_unlock(sch); /* @@@ move up ? */ + sch_tree_unlock(sch); + return 0; } - static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) { - struct dsmark_qdisc_data *p = PRIV(sch); - - return p->q; + return PRIV(sch)->q; } - -static unsigned long dsmark_get(struct Qdisc *sch,u32 classid) +static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) { - struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch); + DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n", + sch, PRIV(sch), classid); - DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); - return TC_H_MIN(classid)+1; + return TC_H_MIN(classid) + 1; } - static unsigned long dsmark_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) + unsigned long parent, u32 classid) { - return dsmark_get(sch,classid); + return dsmark_get(sch, classid); } - static void dsmark_put(struct Qdisc *sch, unsigned long cl) { } - static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct rtattr **tca, unsigned long *arg) + struct rtattr **tca, unsigned long *arg) { struct dsmark_qdisc_data *p = PRIV(sch); struct rtattr *opt = tca[TCA_OPTIONS-1]; struct rtattr *tb[TCA_DSMARK_MAX]; + int err = -EINVAL; + u8 mask = 0; DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," - "arg 0x%lx\n",sch,p,classid,parent,*arg); - if (*arg > p->indices) - return -ENOENT; - if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) - return -EINVAL; - if (tb[TCA_DSMARK_MASK-1]) { - if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1])) - return -EINVAL; - p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]); - } - if (tb[TCA_DSMARK_VALUE-1]) { - if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1])) - return -EINVAL; - p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]); + "arg 0x%lx\n", sch, p, classid, parent, *arg); + + if (!dsmark_valid_index(p, *arg)) { + err = -ENOENT; + goto rtattr_failure; } - return 0; -} + if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) + goto rtattr_failure; + + if (tb[TCA_DSMARK_MASK-1]) + mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]); + + if (tb[TCA_DSMARK_VALUE-1]) + p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]); + + if (tb[TCA_DSMARK_MASK-1]) + p->mask[*arg-1] = mask; -static int dsmark_delete(struct Qdisc *sch,unsigned long arg) + err = 0; + +rtattr_failure: + return err; +} + +static int dsmark_delete(struct Qdisc *sch, unsigned long arg) { struct dsmark_qdisc_data *p = PRIV(sch); - if (!arg || arg > p->indices) + if (!dsmark_valid_index(p, arg)) return -EINVAL; + p->mask[arg-1] = 0xff; p->value[arg-1] = 0; + return 0; } - static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) { struct dsmark_qdisc_data *p = PRIV(sch); int i; - DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); + DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + if (walker->stop) return; + for (i = 0; i < p->indices; i++) { if (p->mask[i] == 0xff && !p->value[i]) - continue; + goto ignore; if (walker->count >= walker->skip) { if (walker->fn(sch, i+1, walker) < 0) { walker->stop = 1; break; } } - walker->count++; +ignore: + walker->count++; } } - static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) { - struct dsmark_qdisc_data *p = PRIV(sch); - - return &p->filter_list; + return &PRIV(sch)->filter_list; } - /* --------------------------- Qdisc operations ---------------------------- */ - static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) { struct dsmark_qdisc_data *p = PRIV(sch); - struct tcf_result res; - int result; - int ret = NET_XMIT_POLICED; + int err; + + D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); if (p->set_tc_index) { /* FIXME: Safe with non-linear skbs? --RR */ switch (skb->protocol) { @@ -210,17 +228,21 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) break; }; } - result = TC_POLICE_OK; /* be nice to gcc */ - if (TC_H_MAJ(skb->priority) == sch->handle) { + + if (TC_H_MAJ(skb->priority) == sch->handle) skb->tc_index = TC_H_MIN(skb->priority); - } else { - result = tc_classify(skb,p->filter_list,&res); - D2PRINTK("result %d class 0x%04x\n",result,res.classid); + else { + struct tcf_result res; + int result = tc_classify(skb, p->filter_list, &res); + + D2PRINTK("result %d class 0x%04x\n", result, res.classid); + switch (result) { #ifdef CONFIG_NET_CLS_POLICE case TC_POLICE_SHOT: kfree_skb(skb); - break; + sch->qstats.drops++; + return NET_XMIT_POLICED; #if 0 case TC_POLICE_RECLASSIFY: /* FIXME: what to do here ??? */ @@ -237,43 +259,45 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) break; }; } - if ( -#ifdef CONFIG_NET_CLS_POLICE - result == TC_POLICE_SHOT || -#endif - ((ret = p->q->enqueue(skb,p->q)) != 0)) { + err = p->q->enqueue(skb,p->q); + if (err != NET_XMIT_SUCCESS) { sch->qstats.drops++; - return ret; + return err; } + sch->bstats.bytes += skb->len; sch->bstats.packets++; sch->q.qlen++; - return ret; -} + return NET_XMIT_SUCCESS; +} static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) { struct dsmark_qdisc_data *p = PRIV(sch); struct sk_buff *skb; - int index; + u32 index; + + D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p); - D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p); skb = p->q->ops->dequeue(p->q); - if (!skb) + if (skb == NULL) return NULL; + sch->q.qlen--; - index = skb->tc_index & (p->indices-1); - D2PRINTK("index %d->%d\n",skb->tc_index,index); + + index = skb->tc_index & (p->indices - 1); + D2PRINTK("index %d->%d\n", skb->tc_index, index); + switch (skb->protocol) { case __constant_htons(ETH_P_IP): - ipv4_change_dsfield(skb->nh.iph, - p->mask[index],p->value[index]); + ipv4_change_dsfield(skb->nh.iph, p->mask[index], + p->value[index]); break; case __constant_htons(ETH_P_IPV6): - ipv6_change_dsfield(skb->nh.ipv6h, - p->mask[index],p->value[index]); + ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index], + p->value[index]); break; default: /* @@ -287,152 +311,162 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) htons(skb->protocol)); break; }; + return skb; } - static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) { - int ret; struct dsmark_qdisc_data *p = PRIV(sch); + int err; - D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - if ((ret = p->q->ops->requeue(skb, p->q)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - return 0; + D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + + err = p->q->ops->requeue(skb, p->q); + if (err != NET_XMIT_SUCCESS) { + sch->qstats.drops++; + return err; } - sch->qstats.drops++; - return ret; -} + sch->q.qlen++; + sch->qstats.requeues++; + + return NET_XMIT_SUCCESS; +} static unsigned int dsmark_drop(struct Qdisc *sch) { struct dsmark_qdisc_data *p = PRIV(sch); unsigned int len; - DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); - if (!p->q->ops->drop) - return 0; - if (!(len = p->q->ops->drop(p->q))) + DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + + if (p->q->ops->drop == NULL) return 0; - sch->q.qlen--; + + len = p->q->ops->drop(p->q); + if (len) + sch->q.qlen--; + return len; } - -static int dsmark_init(struct Qdisc *sch,struct rtattr *opt) +static int dsmark_init(struct Qdisc *sch, struct rtattr *opt) { struct dsmark_qdisc_data *p = PRIV(sch); struct rtattr *tb[TCA_DSMARK_MAX]; - __u16 tmp; - - DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); - if (!opt || - rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || - !tb[TCA_DSMARK_INDICES-1] || - RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) - return -EINVAL; - p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]); - if (!p->indices) - return -EINVAL; - for (tmp = p->indices; tmp != 1; tmp >>= 1) { - if (tmp & 1) - return -EINVAL; - } - p->default_index = NO_DEFAULT_INDEX; - if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) { - if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16)) - return -EINVAL; - p->default_index = - *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]); + int err = -EINVAL; + u32 default_index = NO_DEFAULT_INDEX; + u16 indices; + u8 *mask; + + DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); + + if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0) + goto errout; + + indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]); + if (!indices || !dsmark_valid_indices(indices)) + goto errout; + + if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) + default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]); + + mask = kmalloc(indices * 2, GFP_KERNEL); + if (mask == NULL) { + err = -ENOMEM; + goto errout; } - p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1]; - p->mask = kmalloc(p->indices*2,GFP_KERNEL); - if (!p->mask) - return -ENOMEM; - p->value = p->mask+p->indices; - memset(p->mask,0xff,p->indices); - memset(p->value,0,p->indices); - if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + + p->mask = mask; + memset(p->mask, 0xff, indices); + + p->value = p->mask + indices; + memset(p->value, 0, indices); + + p->indices = indices; + p->default_index = default_index; + p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]); + + p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + if (p->q == NULL) p->q = &noop_qdisc; - DPRINTK("dsmark_init: qdisc %p\n",&p->q); - return 0; -} + DPRINTK("dsmark_init: qdisc %p\n", p->q); + + err = 0; +errout: +rtattr_failure: + return err; +} static void dsmark_reset(struct Qdisc *sch) { struct dsmark_qdisc_data *p = PRIV(sch); - DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); qdisc_reset(p->q); sch->q.qlen = 0; } - static void dsmark_destroy(struct Qdisc *sch) { struct dsmark_qdisc_data *p = PRIV(sch); struct tcf_proto *tp; - DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); + while (p->filter_list) { tp = p->filter_list; p->filter_list = tp->next; tcf_destroy(tp); } + qdisc_destroy(p->q); kfree(p->mask); } - static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { struct dsmark_qdisc_data *p = PRIV(sch); - unsigned char *b = skb->tail; - struct rtattr *rta; + struct rtattr *opts = NULL; + + DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl); - DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl); - if (!cl || cl > p->indices) + if (!dsmark_valid_index(p, cl)) return -EINVAL; - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1); - rta = (struct rtattr *) b; - RTA_PUT(skb,TCA_OPTIONS,0,NULL); - RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]); - RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]); - rta->rta_len = skb->tail-b; - return skb->len; + + tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); + + opts = RTA_NEST(skb, TCA_OPTIONS); + RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]); + RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]); + + return RTA_NEST_END(skb, opts); rtattr_failure: - skb_trim(skb,b-skb->data); - return -1; + return RTA_NEST_CANCEL(skb, opts); } static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) { struct dsmark_qdisc_data *p = PRIV(sch); - unsigned char *b = skb->tail; - struct rtattr *rta; + struct rtattr *opts = NULL; - rta = (struct rtattr *) b; - RTA_PUT(skb,TCA_OPTIONS,0,NULL); - RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices); - if (p->default_index != NO_DEFAULT_INDEX) { - __u16 tmp = p->default_index; + opts = RTA_NEST(skb, TCA_OPTIONS); + RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices); + + if (p->default_index != NO_DEFAULT_INDEX) + RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index); - RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp); - } if (p->set_tc_index) - RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL); - rta->rta_len = skb->tail-b; - return skb->len; + RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX); + + return RTA_NEST_END(skb, opts); rtattr_failure: - skb_trim(skb,b-skb->data); - return -1; + return RTA_NEST_CANCEL(skb, opts); } static struct Qdisc_class_ops dsmark_class_ops = { @@ -470,10 +504,13 @@ static int __init dsmark_module_init(void) { return register_qdisc(&dsmark_qdisc_ops); } + static void __exit dsmark_module_exit(void) { unregister_qdisc(&dsmark_qdisc_ops); } + module_init(dsmark_module_init) module_exit(dsmark_module_exit) + MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 4888305c96d..033083bf0e7 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -11,131 +11,38 @@ #include <linux/config.h> #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> #include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> #include <net/pkt_sched.h> /* 1 band FIFO pseudo-"scheduler" */ struct fifo_sched_data { - unsigned limit; + u32 limit; }; -static int -bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct fifo_sched_data *q = qdisc_priv(sch); - if (sch->qstats.backlog + skb->len <= q->limit) { - __skb_queue_tail(&sch->q, skb); - sch->qstats.backlog += skb->len; - sch->bstats.bytes += skb->len; - sch->bstats.packets++; - return 0; - } - sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE - if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch)) -#endif - kfree_skb(skb); - return NET_XMIT_DROP; -} - -static int -bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - __skb_queue_head(&sch->q, skb); - sch->qstats.backlog += skb->len; - sch->qstats.requeues++; - return 0; -} - -static struct sk_buff * -bfifo_dequeue(struct Qdisc* sch) -{ - struct sk_buff *skb; + if (likely(sch->qstats.backlog + skb->len <= q->limit)) + return qdisc_enqueue_tail(skb, sch); - skb = __skb_dequeue(&sch->q); - if (skb) - sch->qstats.backlog -= skb->len; - return skb; + return qdisc_reshape_fail(skb, sch); } -static unsigned int -fifo_drop(struct Qdisc* sch) -{ - struct sk_buff *skb; - - skb = __skb_dequeue_tail(&sch->q); - if (skb) { - unsigned int len = skb->len; - sch->qstats.backlog -= len; - kfree_skb(skb); - return len; - } - return 0; -} - -static void -fifo_reset(struct Qdisc* sch) -{ - skb_queue_purge(&sch->q); - sch->qstats.backlog = 0; -} - -static int -pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct fifo_sched_data *q = qdisc_priv(sch); - if (sch->q.qlen < q->limit) { - __skb_queue_tail(&sch->q, skb); - sch->bstats.bytes += skb->len; - sch->bstats.packets++; - return 0; - } - sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE - if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch)) -#endif - kfree_skb(skb); - return NET_XMIT_DROP; -} - -static int -pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - __skb_queue_head(&sch->q, skb); - sch->qstats.requeues++; - return 0; -} - + if (likely(skb_queue_len(&sch->q) < q->limit)) + return qdisc_enqueue_tail(skb, sch); -static struct sk_buff * -pfifo_dequeue(struct Qdisc* sch) -{ - return __skb_dequeue(&sch->q); + return qdisc_reshape_fail(skb, sch); } static int fifo_init(struct Qdisc *sch, struct rtattr *opt) @@ -143,66 +50,59 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt) struct fifo_sched_data *q = qdisc_priv(sch); if (opt == NULL) { - unsigned int limit = sch->dev->tx_queue_len ? : 1; + u32 limit = sch->dev->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - q->limit = limit*sch->dev->mtu; - else - q->limit = limit; + limit *= sch->dev->mtu; + + q->limit = limit; } else { struct tc_fifo_qopt *ctl = RTA_DATA(opt); - if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + + if (RTA_PAYLOAD(opt) < sizeof(*ctl)) return -EINVAL; + q->limit = ctl->limit; } + return 0; } static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fifo_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; - struct tc_fifo_qopt opt; + struct tc_fifo_qopt opt = { .limit = q->limit }; - opt.limit = q->limit; RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); - return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); return -1; } struct Qdisc_ops pfifo_qdisc_ops = { - .next = NULL, - .cl_ops = NULL, .id = "pfifo", .priv_size = sizeof(struct fifo_sched_data), .enqueue = pfifo_enqueue, - .dequeue = pfifo_dequeue, - .requeue = pfifo_requeue, - .drop = fifo_drop, + .dequeue = qdisc_dequeue_head, + .requeue = qdisc_requeue, + .drop = qdisc_queue_drop, .init = fifo_init, - .reset = fifo_reset, - .destroy = NULL, + .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, }; struct Qdisc_ops bfifo_qdisc_ops = { - .next = NULL, - .cl_ops = NULL, .id = "bfifo", .priv_size = sizeof(struct fifo_sched_data), .enqueue = bfifo_enqueue, - .dequeue = bfifo_dequeue, - .requeue = bfifo_requeue, - .drop = fifo_drop, + .dequeue = qdisc_dequeue_head, + .requeue = qdisc_requeue, + .drop = qdisc_queue_drop, .init = fifo_init, - .reset = fifo_reset, - .destroy = NULL, + .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, .owner = THIS_MODULE, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 87e48a4e105..7683b34dc6a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -243,31 +243,27 @@ static void dev_watchdog_down(struct net_device *dev) cheaper. */ -static int -noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) +static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) { kfree_skb(skb); return NET_XMIT_CN; } -static struct sk_buff * -noop_dequeue(struct Qdisc * qdisc) +static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) { return NULL; } -static int -noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); + printk(KERN_DEBUG "%s deferred output. It is buggy.\n", + skb->dev->name); kfree_skb(skb); return NET_XMIT_CN; } struct Qdisc_ops noop_qdisc_ops = { - .next = NULL, - .cl_ops = NULL, .id = "noop", .priv_size = 0, .enqueue = noop_enqueue, @@ -285,8 +281,6 @@ struct Qdisc noop_qdisc = { }; static struct Qdisc_ops noqueue_qdisc_ops = { - .next = NULL, - .cl_ops = NULL, .id = "noqueue", .priv_size = 0, .enqueue = noop_enqueue, @@ -311,97 +305,87 @@ static const u8 prio2band[TC_PRIO_MAX+1] = generic prio+fifo combination. */ -static int -pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +#define PFIFO_FAST_BANDS 3 + +static inline struct sk_buff_head *prio2list(struct sk_buff *skb, + struct Qdisc *qdisc) { struct sk_buff_head *list = qdisc_priv(qdisc); + return list + prio2band[skb->priority & TC_PRIO_MAX]; +} - list += prio2band[skb->priority&TC_PRIO_MAX]; +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + struct sk_buff_head *list = prio2list(skb, qdisc); - if (list->qlen < qdisc->dev->tx_queue_len) { - __skb_queue_tail(list, skb); + if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { qdisc->q.qlen++; - qdisc->bstats.bytes += skb->len; - qdisc->bstats.packets++; - return 0; + return __qdisc_enqueue_tail(skb, qdisc, list); } - qdisc->qstats.drops++; - kfree_skb(skb); - return NET_XMIT_DROP; + + return qdisc_drop(skb, qdisc); } -static struct sk_buff * -pfifo_fast_dequeue(struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) { int prio; struct sk_buff_head *list = qdisc_priv(qdisc); - struct sk_buff *skb; - for (prio = 0; prio < 3; prio++, list++) { - skb = __skb_dequeue(list); + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) { + struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); if (skb) { qdisc->q.qlen--; return skb; } } + return NULL; } -static int -pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list = qdisc_priv(qdisc); - - list += prio2band[skb->priority&TC_PRIO_MAX]; - - __skb_queue_head(list, skb); qdisc->q.qlen++; - qdisc->qstats.requeues++; - return 0; + return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); } -static void -pfifo_fast_reset(struct Qdisc* qdisc) +static void pfifo_fast_reset(struct Qdisc* qdisc) { int prio; struct sk_buff_head *list = qdisc_priv(qdisc); - for (prio=0; prio < 3; prio++) - skb_queue_purge(list+prio); + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) + __qdisc_reset_queue(qdisc, list + prio); + + qdisc->qstats.backlog = 0; qdisc->q.qlen = 0; } static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) { - unsigned char *b = skb->tail; - struct tc_prio_qopt opt; + struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - opt.bands = 3; memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); return -1; } static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) { - int i; + int prio; struct sk_buff_head *list = qdisc_priv(qdisc); - for (i=0; i<3; i++) - skb_queue_head_init(list+i); + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) + skb_queue_head_init(list + prio); return 0; } static struct Qdisc_ops pfifo_fast_ops = { - .next = NULL, - .cl_ops = NULL, .id = "pfifo_fast", - .priv_size = 3 * sizeof(struct sk_buff_head), + .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, .requeue = pfifo_fast_requeue, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e0c9fbe73b1..bb9bf8d5003 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -53,7 +53,6 @@ struct netem_sched_data { struct Qdisc *qdisc; - struct sk_buff_head delayed; struct timer_list timer; u32 latency; @@ -63,11 +62,12 @@ struct netem_sched_data { u32 gap; u32 jitter; u32 duplicate; + u32 reorder; struct crndstate { unsigned long last; unsigned long rho; - } delay_cor, loss_cor, dup_cor; + } delay_cor, loss_cor, dup_cor, reorder_cor; struct disttable { u32 size; @@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -/* Put skb in the private delayed queue. */ -static int netem_delay(struct Qdisc *sch, struct sk_buff *skb) -{ - struct netem_sched_data *q = qdisc_priv(sch); - psched_tdiff_t td; - psched_time_t now; - - PSCHED_GET_TIME(now); - td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - - /* Always queue at tail to keep packets in order */ - if (likely(q->delayed.qlen < q->limit)) { - struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; - - PSCHED_TADD2(now, td, cb->time_to_send); - - pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, - now, cb->time_to_send); - - __skb_queue_tail(&q->delayed, skb); - return NET_XMIT_SUCCESS; - } - - pr_debug("netem_delay: queue over limit %d\n", q->limit); - sch->qstats.overlimits++; - kfree_skb(skb); - return NET_XMIT_DROP; -} - /* - * Move a packet that is ready to send from the delay holding - * list to the underlying qdisc. + * Insert one skb into qdisc. + * Note: parent depends on return value to account for queue length. + * NET_XMIT_DROP: queue length didn't change. + * NET_XMIT_SUCCESS: one skb was queued. */ -static int netem_run(struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - psched_time_t now; - - PSCHED_GET_TIME(now); - - skb = skb_peek(&q->delayed); - if (skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; - long delay - = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); - pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); - - /* if more time remaining? */ - if (delay > 0) { - mod_timer(&q->timer, jiffies + delay); - return 1; - } - - __skb_unlink(skb, &q->delayed); - - if (q->qdisc->enqueue(skb, q->qdisc)) { - sch->q.qlen--; - sch->qstats.drops++; - } - } - - return 0; -} - static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); + struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; + struct sk_buff *skb2; int ret; + int count = 1; pr_debug("netem_enqueue skb=%p\n", skb); + /* Random duplication */ + if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) + ++count; + /* Random packet drop 0 => none, ~0 => all */ - if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { - pr_debug("netem_enqueue: random loss\n"); + if (q->loss && q->loss >= get_crandom(&q->loss_cor)) + --count; + + if (count == 0) { sch->qstats.drops++; kfree_skb(skb); - return 0; /* lie about loss so TCP doesn't know */ + return NET_XMIT_DROP; } - /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) { - struct sk_buff *skb2; - - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) { - struct Qdisc *qp; - - /* Since one packet can generate two packets in the - * queue, the parent's qlen accounting gets confused, - * so fix it. - */ - qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent)); - if (qp) - qp->q.qlen++; - - sch->q.qlen++; - sch->bstats.bytes += skb2->len; - sch->bstats.packets++; - } else - sch->qstats.drops++; + /* + * If we need to duplicate packet, then re-insert at top of the + * qdisc tree, since parent queuer expects that only one + * skb will be queued. + */ + if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { + struct Qdisc *rootq = sch->dev->qdisc; + u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ + q->duplicate = 0; + + rootq->enqueue(skb2, rootq); + q->duplicate = dupsave; } - /* If doing simple delay then gap == 0 so all packets - * go into the delayed holding queue - * otherwise if doing out of order only "1 out of gap" - * packets will be delayed. - */ - if (q->counter < q->gap) { + if (q->gap == 0 /* not doing reordering */ + || q->counter < q->gap /* inside last reordering gap */ + || q->reorder < get_crandom(&q->reorder_cor)) { + psched_time_t now; + PSCHED_GET_TIME(now); + PSCHED_TADD2(now, tabledist(q->latency, q->jitter, + &q->delay_cor, q->delay_dist), + cb->time_to_send); ++q->counter; ret = q->qdisc->enqueue(skb, q->qdisc); } else { + /* + * Do re-ordering by putting one out of N packets at the front + * of the queue. + */ + PSCHED_GET_TIME(cb->time_to_send); q->counter = 0; - ret = netem_delay(sch, skb); - netem_run(sch); + ret = q->qdisc->ops->requeue(skb, q->qdisc); } if (likely(ret == NET_XMIT_SUCCESS)) { @@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - int pending; - - pending = netem_run(sch); skb = q->qdisc->dequeue(q->qdisc); if (skb) { - pr_debug("netem_dequeue: return skb=%p\n", skb); - sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; - } - else if (pending) { - pr_debug("netem_dequeue: throttling\n"); + const struct netem_skb_cb *cb + = (const struct netem_skb_cb *)skb->cb; + psched_time_t now; + long delay; + + /* if more time remaining? */ + PSCHED_GET_TIME(now); + delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); + pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); + if (delay <= 0) { + pr_debug("netem_dequeue: return skb=%p\n", skb); + sch->q.qlen--; + sch->flags &= ~TCQ_F_THROTTLED; + return skb; + } + + mod_timer(&q->timer, jiffies + delay); sch->flags |= TCQ_F_THROTTLED; - } - return skb; + if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) + sch->qstats.drops++; + } + + return NULL; } static void netem_watchdog(unsigned long arg) @@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - skb_queue_purge(&q->delayed); - sch->q.qlen = 0; sch->flags &= ~TCQ_F_THROTTLED; del_timer_sync(&q->timer); @@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) return 0; } +static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct tc_netem_reorder *r = RTA_DATA(attr); + + if (RTA_PAYLOAD(attr) != sizeof(*r)) + return -EINVAL; + + q->reorder = r->probability; + init_crandom(&q->reorder_cor, r->correlation); + return 0; +} + static int netem_change(struct Qdisc *sch, struct rtattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) q->jitter = qopt->jitter; q->limit = qopt->limit; q->gap = qopt->gap; + q->counter = 0; q->loss = qopt->loss; q->duplicate = qopt->duplicate; + /* for compatiablity with earlier versions. + * if gap is set, need to assume 100% probablity + */ + q->reorder = ~0; + /* Handle nested options after initial queue options. * Should have put all options in nested format but too late now. */ @@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) if (ret) return ret; } + if (tb[TCA_NETEM_REORDER-1]) { + ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); + if (ret) + return ret; + } } @@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) if (!opt) return -EINVAL; - skb_queue_head_init(&q->delayed); init_timer(&q->timer); q->timer.function = netem_watchdog; q->timer.data = (unsigned long) sch; - q->counter = 0; q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); if (!q->qdisc) { @@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct rtattr *rta = (struct rtattr *) b; struct tc_netem_qopt qopt; struct tc_netem_corr cor; + struct tc_netem_reorder reorder; qopt.latency = q->latency; qopt.jitter = q->jitter; @@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); + + reorder.probability = q->reorder; + reorder.correlation = q->reorder_cor.rho; + RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); + rta->rta_len = skb->tail - b; return skb->len; |