summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_api.c13
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c3
-rw-r--r--net/sched/em_meta.c295
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_dsmark.c373
-rw-r--r--net/sched/sch_fifo.c152
-rw-r--r--net/sched/sch_generic.c84
-rw-r--r--net/sched/sch_netem.c209
10 files changed, 646 insertions, 500 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b0941186f86..b22c9beb604 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -405,7 +405,7 @@ config NET_EMATCH_STACK
---help---
Size of the local stack variable used while evaluating the tree of
ematches. Limits the depth of the tree, i.e. the number of
- encapsulated precedences. Every level requires 4 bytes of addtional
+ encapsulated precedences. Every level requires 4 bytes of additional
stack space.
config NET_EMATCH_CMP
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cafcb084098..9594206e603 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,15 +428,15 @@ errout:
static int
tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
- unsigned flags, int event, int bind, int ref)
+ u16 flags, int event, int bind, int ref)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
struct rtattr *x;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
+
t = NLMSG_DATA(nlh);
t->tca_family = AF_UNSPEC;
@@ -669,7 +669,7 @@ err:
}
static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
- unsigned flags)
+ u16 flags)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
@@ -684,8 +684,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
b = (unsigned char *)skb->tail;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
t = NLMSG_DATA(nlh);
t->tca_family = AF_UNSPEC;
@@ -881,7 +880,7 @@ static int __init tc_action_init(void)
link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
}
- printk("TC classifier action (bugs to netdev@oss.sgi.com cc "
+ printk("TC classifier action (bugs to netdev@vger.kernel.org cc "
"hadi@cyberus.ca)\n");
return 0;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 56e66c3fe0f..1616bf5c962 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -322,14 +322,13 @@ errout:
static int
tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
- u32 pid, u32 seq, unsigned flags, int event)
+ u32 pid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = tp->q->dev->ifindex;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0d2d4415f33..dfb300bb6ba 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
rta = (struct rtattr *) b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+ if (f->res.classid)
+ RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
+
if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto rtattr_failure;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index f1eeaf65cee..48bb23c2a35 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -32,7 +32,7 @@
* +-----------+ +-----------+
* | |
* ---> meta_ops[INT][INDEV](...) |
- * | |
+ * | |
* ----------- |
* V V
* +-----------+ +-----------+
@@ -70,6 +70,7 @@
#include <net/dst.h>
#include <net/route.h>
#include <net/pkt_cls.h>
+#include <net/sock.h>
struct meta_obj
{
@@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif)
}
/**************************************************************************
+ * Socket Attributes
+ **************************************************************************/
+
+#define SKIP_NONLOCAL(skb) \
+ if (unlikely(skb->sk == NULL)) { \
+ *err = -1; \
+ return; \
+ }
+
+META_COLLECTOR(int_sk_family)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_family;
+}
+
+META_COLLECTOR(int_sk_state)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_state;
+}
+
+META_COLLECTOR(int_sk_reuse)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_reuse;
+}
+
+META_COLLECTOR(int_sk_bound_if)
+{
+ SKIP_NONLOCAL(skb);
+ /* No error if bound_dev_if is 0, legal userspace check */
+ dst->value = skb->sk->sk_bound_dev_if;
+}
+
+META_COLLECTOR(var_sk_bound_if)
+{
+ SKIP_NONLOCAL(skb);
+
+ if (skb->sk->sk_bound_dev_if == 0) {
+ dst->value = (unsigned long) "any";
+ dst->len = 3;
+ } else {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
+ *err = var_dev(dev, dst);
+ if (dev)
+ dev_put(dev);
+ }
+}
+
+META_COLLECTOR(int_sk_refcnt)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_refcnt);
+}
+
+META_COLLECTOR(int_sk_rcvbuf)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_rcvbuf;
+}
+
+META_COLLECTOR(int_sk_shutdown)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_shutdown;
+}
+
+META_COLLECTOR(int_sk_proto)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_protocol;
+}
+
+META_COLLECTOR(int_sk_type)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_type;
+}
+
+META_COLLECTOR(int_sk_rmem_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_rmem_alloc);
+}
+
+META_COLLECTOR(int_sk_wmem_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_wmem_alloc);
+}
+
+META_COLLECTOR(int_sk_omem_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_omem_alloc);
+}
+
+META_COLLECTOR(int_sk_rcv_qlen)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_receive_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_snd_qlen)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_write_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_wmem_queued)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_wmem_queued;
+}
+
+META_COLLECTOR(int_sk_fwd_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_forward_alloc;
+}
+
+META_COLLECTOR(int_sk_sndbuf)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_sndbuf;
+}
+
+META_COLLECTOR(int_sk_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_allocation;
+}
+
+META_COLLECTOR(int_sk_route_caps)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_route_caps;
+}
+
+META_COLLECTOR(int_sk_hashent)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_hashent;
+}
+
+META_COLLECTOR(int_sk_lingertime)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_lingertime / HZ;
+}
+
+META_COLLECTOR(int_sk_err_qlen)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_error_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_ack_bl)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_ack_backlog;
+}
+
+META_COLLECTOR(int_sk_max_ack_bl)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_max_ack_backlog;
+}
+
+META_COLLECTOR(int_sk_prio)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_priority;
+}
+
+META_COLLECTOR(int_sk_rcvlowat)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_rcvlowat;
+}
+
+META_COLLECTOR(int_sk_rcvtimeo)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_rcvtimeo / HZ;
+}
+
+META_COLLECTOR(int_sk_sndtimeo)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_sndtimeo / HZ;
+}
+
+META_COLLECTOR(int_sk_sendmsg_off)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_sndmsg_off;
+}
+
+META_COLLECTOR(int_sk_write_pend)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_write_pending;
+}
+
+/**************************************************************************
* Meta value collectors assignment table
**************************************************************************/
@@ -293,41 +502,75 @@ struct meta_ops
struct meta_value *, struct meta_obj *, int *);
};
+#define META_ID(name) TCF_META_ID_##name
+#define META_FUNC(name) { .get = meta_##name }
+
/* Meta value operations table listing all meta value collectors and
* assigns them to a type and meta id. */
static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
[TCF_META_TYPE_VAR] = {
- [TCF_META_ID_DEV] = { .get = meta_var_dev },
- [TCF_META_ID_INDEV] = { .get = meta_var_indev },
- [TCF_META_ID_REALDEV] = { .get = meta_var_realdev }
+ [META_ID(DEV)] = META_FUNC(var_dev),
+ [META_ID(INDEV)] = META_FUNC(var_indev),
+ [META_ID(REALDEV)] = META_FUNC(var_realdev),
+ [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
},
[TCF_META_TYPE_INT] = {
- [TCF_META_ID_RANDOM] = { .get = meta_int_random },
- [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 },
- [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 },
- [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 },
- [TCF_META_ID_DEV] = { .get = meta_int_dev },
- [TCF_META_ID_INDEV] = { .get = meta_int_indev },
- [TCF_META_ID_REALDEV] = { .get = meta_int_realdev },
- [TCF_META_ID_PRIORITY] = { .get = meta_int_priority },
- [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol },
- [TCF_META_ID_SECURITY] = { .get = meta_int_security },
- [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype },
- [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen },
- [TCF_META_ID_DATALEN] = { .get = meta_int_datalen },
- [TCF_META_ID_MACLEN] = { .get = meta_int_maclen },
+ [META_ID(RANDOM)] = META_FUNC(int_random),
+ [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0),
+ [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1),
+ [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2),
+ [META_ID(DEV)] = META_FUNC(int_dev),
+ [META_ID(INDEV)] = META_FUNC(int_indev),
+ [META_ID(REALDEV)] = META_FUNC(int_realdev),
+ [META_ID(PRIORITY)] = META_FUNC(int_priority),
+ [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
+ [META_ID(SECURITY)] = META_FUNC(int_security),
+ [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
+ [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
+ [META_ID(DATALEN)] = META_FUNC(int_datalen),
+ [META_ID(MACLEN)] = META_FUNC(int_maclen),
#ifdef CONFIG_NETFILTER
- [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark },
+ [META_ID(NFMARK)] = META_FUNC(int_nfmark),
#endif
- [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex },
+ [META_ID(TCINDEX)] = META_FUNC(int_tcindex),
#ifdef CONFIG_NET_CLS_ACT
- [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd },
- [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid },
+ [META_ID(TCVERDICT)] = META_FUNC(int_tcverd),
+ [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid),
#endif
#ifdef CONFIG_NET_CLS_ROUTE
- [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid },
+ [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid),
#endif
- [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif }
+ [META_ID(RTIIF)] = META_FUNC(int_rtiif),
+ [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family),
+ [META_ID(SK_STATE)] = META_FUNC(int_sk_state),
+ [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse),
+ [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if),
+ [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt),
+ [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf),
+ [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf),
+ [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown),
+ [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto),
+ [META_ID(SK_TYPE)] = META_FUNC(int_sk_type),
+ [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc),
+ [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc),
+ [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc),
+ [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued),
+ [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen),
+ [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen),
+ [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen),
+ [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc),
+ [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc),
+ [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps),
+ [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent),
+ [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime),
+ [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl),
+ [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl),
+ [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio),
+ [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat),
+ [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo),
+ [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo),
+ [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
+ [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
}
};
@@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
/* Let gcc optimize it, the unlikely is not really based on
* some numbers but jump free code for mismatches seems
* more logical. */
- if (unlikely(a == b))
+ if (unlikely(a->value == b->value))
return 0;
- else if (a < b)
+ else if (a->value < b->value)
return -1;
else
return 1;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 07977f8f267..97c1c75d5c7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -760,15 +760,14 @@ graft:
}
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 pid, u32 seq, unsigned flags, int event)
+ u32 pid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
struct gnet_dump d;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = q->dev->ifindex;
@@ -997,7 +996,7 @@ out:
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
unsigned long cl,
- u32 pid, u32 seq, unsigned flags, int event)
+ u32 pid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1005,8 +1004,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
struct gnet_dump d;
struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = q->dev->ifindex;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 8a3db9d95ba..13e0e7b3856 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -18,7 +18,7 @@
#include <asm/byteorder.h>
-#if 1 /* control */
+#if 0 /* control */
#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
#else
#define DPRINTK(format,args...)
@@ -31,7 +31,7 @@
#endif
-#define PRIV(sch) qdisc_priv(sch)
+#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
/*
@@ -55,145 +55,163 @@
struct dsmark_qdisc_data {
struct Qdisc *q;
struct tcf_proto *filter_list;
- __u8 *mask; /* "owns" the array */
- __u8 *value;
- __u16 indices;
- __u32 default_index; /* index range is 0...0xffff */
+ u8 *mask; /* "owns" the array */
+ u8 *value;
+ u16 indices;
+ u32 default_index; /* index range is 0...0xffff */
int set_tc_index;
};
+static inline int dsmark_valid_indices(u16 indices)
+{
+ while (indices != 1) {
+ if (indices & 1)
+ return 0;
+ indices >>= 1;
+ }
+
+ return 1;
+}
-/* ------------------------- Class/flow operations ------------------------- */
+static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
+{
+ return (index <= p->indices && index > 0);
+}
+/* ------------------------- Class/flow operations ------------------------- */
-static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
- struct Qdisc *new,struct Qdisc **old)
+static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
+ struct Qdisc *new, struct Qdisc **old)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
- old);
- if (!new)
- new = &noop_qdisc;
+ DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
+ sch, p, new, old);
+
+ if (new == NULL) {
+ new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (new == NULL)
+ new = &noop_qdisc;
+ }
+
sch_tree_lock(sch);
- *old = xchg(&p->q,new);
- if (*old)
- qdisc_reset(*old);
+ *old = xchg(&p->q, new);
+ qdisc_reset(*old);
sch->q.qlen = 0;
- sch_tree_unlock(sch); /* @@@ move up ? */
+ sch_tree_unlock(sch);
+
return 0;
}
-
static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
{
- struct dsmark_qdisc_data *p = PRIV(sch);
-
- return p->q;
+ return PRIV(sch)->q;
}
-
-static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
+static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
{
- struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch);
+ DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
+ sch, PRIV(sch), classid);
- DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid);
- return TC_H_MIN(classid)+1;
+ return TC_H_MIN(classid) + 1;
}
-
static unsigned long dsmark_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
+ unsigned long parent, u32 classid)
{
- return dsmark_get(sch,classid);
+ return dsmark_get(sch, classid);
}
-
static void dsmark_put(struct Qdisc *sch, unsigned long cl)
{
}
-
static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct rtattr **tca, unsigned long *arg)
+ struct rtattr **tca, unsigned long *arg)
{
struct dsmark_qdisc_data *p = PRIV(sch);
struct rtattr *opt = tca[TCA_OPTIONS-1];
struct rtattr *tb[TCA_DSMARK_MAX];
+ int err = -EINVAL;
+ u8 mask = 0;
DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
- "arg 0x%lx\n",sch,p,classid,parent,*arg);
- if (*arg > p->indices)
- return -ENOENT;
- if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
- return -EINVAL;
- if (tb[TCA_DSMARK_MASK-1]) {
- if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
- return -EINVAL;
- p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
- }
- if (tb[TCA_DSMARK_VALUE-1]) {
- if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
- return -EINVAL;
- p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
+ "arg 0x%lx\n", sch, p, classid, parent, *arg);
+
+ if (!dsmark_valid_index(p, *arg)) {
+ err = -ENOENT;
+ goto rtattr_failure;
}
- return 0;
-}
+ if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
+ goto rtattr_failure;
+
+ if (tb[TCA_DSMARK_MASK-1])
+ mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
+
+ if (tb[TCA_DSMARK_VALUE-1])
+ p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
+
+ if (tb[TCA_DSMARK_MASK-1])
+ p->mask[*arg-1] = mask;
-static int dsmark_delete(struct Qdisc *sch,unsigned long arg)
+ err = 0;
+
+rtattr_failure:
+ return err;
+}
+
+static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- if (!arg || arg > p->indices)
+ if (!dsmark_valid_index(p, arg))
return -EINVAL;
+
p->mask[arg-1] = 0xff;
p->value[arg-1] = 0;
+
return 0;
}
-
static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
{
struct dsmark_qdisc_data *p = PRIV(sch);
int i;
- DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker);
+ DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+
if (walker->stop)
return;
+
for (i = 0; i < p->indices; i++) {
if (p->mask[i] == 0xff && !p->value[i])
- continue;
+ goto ignore;
if (walker->count >= walker->skip) {
if (walker->fn(sch, i+1, walker) < 0) {
walker->stop = 1;
break;
}
}
- walker->count++;
+ignore:
+ walker->count++;
}
}
-
static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
{
- struct dsmark_qdisc_data *p = PRIV(sch);
-
- return &p->filter_list;
+ return &PRIV(sch)->filter_list;
}
-
/* --------------------------- Qdisc operations ---------------------------- */
-
static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- struct tcf_result res;
- int result;
- int ret = NET_XMIT_POLICED;
+ int err;
+
+ D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
- D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
if (p->set_tc_index) {
/* FIXME: Safe with non-linear skbs? --RR */
switch (skb->protocol) {
@@ -210,17 +228,21 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
break;
};
}
- result = TC_POLICE_OK; /* be nice to gcc */
- if (TC_H_MAJ(skb->priority) == sch->handle) {
+
+ if (TC_H_MAJ(skb->priority) == sch->handle)
skb->tc_index = TC_H_MIN(skb->priority);
- } else {
- result = tc_classify(skb,p->filter_list,&res);
- D2PRINTK("result %d class 0x%04x\n",result,res.classid);
+ else {
+ struct tcf_result res;
+ int result = tc_classify(skb, p->filter_list, &res);
+
+ D2PRINTK("result %d class 0x%04x\n", result, res.classid);
+
switch (result) {
#ifdef CONFIG_NET_CLS_POLICE
case TC_POLICE_SHOT:
kfree_skb(skb);
- break;
+ sch->qstats.drops++;
+ return NET_XMIT_POLICED;
#if 0
case TC_POLICE_RECLASSIFY:
/* FIXME: what to do here ??? */
@@ -237,43 +259,45 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
break;
};
}
- if (
-#ifdef CONFIG_NET_CLS_POLICE
- result == TC_POLICE_SHOT ||
-#endif
- ((ret = p->q->enqueue(skb,p->q)) != 0)) {
+ err = p->q->enqueue(skb,p->q);
+ if (err != NET_XMIT_SUCCESS) {
sch->qstats.drops++;
- return ret;
+ return err;
}
+
sch->bstats.bytes += skb->len;
sch->bstats.packets++;
sch->q.qlen++;
- return ret;
-}
+ return NET_XMIT_SUCCESS;
+}
static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
struct sk_buff *skb;
- int index;
+ u32 index;
+
+ D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
- D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
skb = p->q->ops->dequeue(p->q);
- if (!skb)
+ if (skb == NULL)
return NULL;
+
sch->q.qlen--;
- index = skb->tc_index & (p->indices-1);
- D2PRINTK("index %d->%d\n",skb->tc_index,index);
+
+ index = skb->tc_index & (p->indices - 1);
+ D2PRINTK("index %d->%d\n", skb->tc_index, index);
+
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- ipv4_change_dsfield(skb->nh.iph,
- p->mask[index],p->value[index]);
+ ipv4_change_dsfield(skb->nh.iph, p->mask[index],
+ p->value[index]);
break;
case __constant_htons(ETH_P_IPV6):
- ipv6_change_dsfield(skb->nh.ipv6h,
- p->mask[index],p->value[index]);
+ ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
+ p->value[index]);
break;
default:
/*
@@ -287,152 +311,162 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
htons(skb->protocol));
break;
};
+
return skb;
}
-
static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
{
- int ret;
struct dsmark_qdisc_data *p = PRIV(sch);
+ int err;
- D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
- if ((ret = p->q->ops->requeue(skb, p->q)) == 0) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- return 0;
+ D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+
+ err = p->q->ops->requeue(skb, p->q);
+ if (err != NET_XMIT_SUCCESS) {
+ sch->qstats.drops++;
+ return err;
}
- sch->qstats.drops++;
- return ret;
-}
+ sch->q.qlen++;
+ sch->qstats.requeues++;
+
+ return NET_XMIT_SUCCESS;
+}
static unsigned int dsmark_drop(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
unsigned int len;
- DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
- if (!p->q->ops->drop)
- return 0;
- if (!(len = p->q->ops->drop(p->q)))
+ DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+
+ if (p->q->ops->drop == NULL)
return 0;
- sch->q.qlen--;
+
+ len = p->q->ops->drop(p->q);
+ if (len)
+ sch->q.qlen--;
+
return len;
}
-
-static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
+static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
{
struct dsmark_qdisc_data *p = PRIV(sch);
struct rtattr *tb[TCA_DSMARK_MAX];
- __u16 tmp;
-
- DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
- if (!opt ||
- rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 ||
- !tb[TCA_DSMARK_INDICES-1] ||
- RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16))
- return -EINVAL;
- p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]);
- if (!p->indices)
- return -EINVAL;
- for (tmp = p->indices; tmp != 1; tmp >>= 1) {
- if (tmp & 1)
- return -EINVAL;
- }
- p->default_index = NO_DEFAULT_INDEX;
- if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) {
- if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16))
- return -EINVAL;
- p->default_index =
- *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
+ int err = -EINVAL;
+ u32 default_index = NO_DEFAULT_INDEX;
+ u16 indices;
+ u8 *mask;
+
+ DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+
+ if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
+ goto errout;
+
+ indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
+ if (!indices || !dsmark_valid_indices(indices))
+ goto errout;
+
+ if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
+ default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
+
+ mask = kmalloc(indices * 2, GFP_KERNEL);
+ if (mask == NULL) {
+ err = -ENOMEM;
+ goto errout;
}
- p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1];
- p->mask = kmalloc(p->indices*2,GFP_KERNEL);
- if (!p->mask)
- return -ENOMEM;
- p->value = p->mask+p->indices;
- memset(p->mask,0xff,p->indices);
- memset(p->value,0,p->indices);
- if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+
+ p->mask = mask;
+ memset(p->mask, 0xff, indices);
+
+ p->value = p->mask + indices;
+ memset(p->value, 0, indices);
+
+ p->indices = indices;
+ p->default_index = default_index;
+ p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
+
+ p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (p->q == NULL)
p->q = &noop_qdisc;
- DPRINTK("dsmark_init: qdisc %p\n",&p->q);
- return 0;
-}
+ DPRINTK("dsmark_init: qdisc %p\n", p->q);
+
+ err = 0;
+errout:
+rtattr_failure:
+ return err;
+}
static void dsmark_reset(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
+ DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
qdisc_reset(p->q);
sch->q.qlen = 0;
}
-
static void dsmark_destroy(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
struct tcf_proto *tp;
- DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p);
+ DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
+
while (p->filter_list) {
tp = p->filter_list;
p->filter_list = tp->next;
tcf_destroy(tp);
}
+
qdisc_destroy(p->q);
kfree(p->mask);
}
-
static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
+ struct sk_buff *skb, struct tcmsg *tcm)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- unsigned char *b = skb->tail;
- struct rtattr *rta;
+ struct rtattr *opts = NULL;
+
+ DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
- DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl);
- if (!cl || cl > p->indices)
+ if (!dsmark_valid_index(p, cl))
return -EINVAL;
- tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1);
- rta = (struct rtattr *) b;
- RTA_PUT(skb,TCA_OPTIONS,0,NULL);
- RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]);
- RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]);
- rta->rta_len = skb->tail-b;
- return skb->len;
+
+ tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+
+ opts = RTA_NEST(skb, TCA_OPTIONS);
+ RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]);
+ RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]);
+
+ return RTA_NEST_END(skb, opts);
rtattr_failure:
- skb_trim(skb,b-skb->data);
- return -1;
+ return RTA_NEST_CANCEL(skb, opts);
}
static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- unsigned char *b = skb->tail;
- struct rtattr *rta;
+ struct rtattr *opts = NULL;
- rta = (struct rtattr *) b;
- RTA_PUT(skb,TCA_OPTIONS,0,NULL);
- RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices);
- if (p->default_index != NO_DEFAULT_INDEX) {
- __u16 tmp = p->default_index;
+ opts = RTA_NEST(skb, TCA_OPTIONS);
+ RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
+
+ if (p->default_index != NO_DEFAULT_INDEX)
+ RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
- RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
- }
if (p->set_tc_index)
- RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL);
- rta->rta_len = skb->tail-b;
- return skb->len;
+ RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
+
+ return RTA_NEST_END(skb, opts);
rtattr_failure:
- skb_trim(skb,b-skb->data);
- return -1;
+ return RTA_NEST_CANCEL(skb, opts);
}
static struct Qdisc_class_ops dsmark_class_ops = {
@@ -470,10 +504,13 @@ static int __init dsmark_module_init(void)
{
return register_qdisc(&dsmark_qdisc_ops);
}
+
static void __exit dsmark_module_exit(void)
{
unregister_qdisc(&dsmark_qdisc_ops);
}
+
module_init(dsmark_module_init)
module_exit(dsmark_module_exit)
+
MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4888305c96d..033083bf0e7 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -11,131 +11,38 @@
#include <linux/config.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/pkt_sched.h>
/* 1 band FIFO pseudo-"scheduler" */
struct fifo_sched_data
{
- unsigned limit;
+ u32 limit;
};
-static int
-bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
- if (sch->qstats.backlog + skb->len <= q->limit) {
- __skb_queue_tail(&sch->q, skb);
- sch->qstats.backlog += skb->len;
- sch->bstats.bytes += skb->len;
- sch->bstats.packets++;
- return 0;
- }
- sch->qstats.drops++;
-#ifdef CONFIG_NET_CLS_POLICE
- if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
-#endif
- kfree_skb(skb);
- return NET_XMIT_DROP;
-}
-
-static int
-bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
- __skb_queue_head(&sch->q, skb);
- sch->qstats.backlog += skb->len;
- sch->qstats.requeues++;
- return 0;
-}
-
-static struct sk_buff *
-bfifo_dequeue(struct Qdisc* sch)
-{
- struct sk_buff *skb;
+ if (likely(sch->qstats.backlog + skb->len <= q->limit))
+ return qdisc_enqueue_tail(skb, sch);
- skb = __skb_dequeue(&sch->q);
- if (skb)
- sch->qstats.backlog -= skb->len;
- return skb;
+ return qdisc_reshape_fail(skb, sch);
}
-static unsigned int
-fifo_drop(struct Qdisc* sch)
-{
- struct sk_buff *skb;
-
- skb = __skb_dequeue_tail(&sch->q);
- if (skb) {
- unsigned int len = skb->len;
- sch->qstats.backlog -= len;
- kfree_skb(skb);
- return len;
- }
- return 0;
-}
-
-static void
-fifo_reset(struct Qdisc* sch)
-{
- skb_queue_purge(&sch->q);
- sch->qstats.backlog = 0;
-}
-
-static int
-pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
- if (sch->q.qlen < q->limit) {
- __skb_queue_tail(&sch->q, skb);
- sch->bstats.bytes += skb->len;
- sch->bstats.packets++;
- return 0;
- }
- sch->qstats.drops++;
-#ifdef CONFIG_NET_CLS_POLICE
- if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
-#endif
- kfree_skb(skb);
- return NET_XMIT_DROP;
-}
-
-static int
-pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
- __skb_queue_head(&sch->q, skb);
- sch->qstats.requeues++;
- return 0;
-}
-
+ if (likely(skb_queue_len(&sch->q) < q->limit))
+ return qdisc_enqueue_tail(skb, sch);
-static struct sk_buff *
-pfifo_dequeue(struct Qdisc* sch)
-{
- return __skb_dequeue(&sch->q);
+ return qdisc_reshape_fail(skb, sch);
}
static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -143,66 +50,59 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
struct fifo_sched_data *q = qdisc_priv(sch);
if (opt == NULL) {
- unsigned int limit = sch->dev->tx_queue_len ? : 1;
+ u32 limit = sch->dev->tx_queue_len ? : 1;
if (sch->ops == &bfifo_qdisc_ops)
- q->limit = limit*sch->dev->mtu;
- else
- q->limit = limit;
+ limit *= sch->dev->mtu;
+
+ q->limit = limit;
} else {
struct tc_fifo_qopt *ctl = RTA_DATA(opt);
- if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+
+ if (RTA_PAYLOAD(opt) < sizeof(*ctl))
return -EINVAL;
+
q->limit = ctl->limit;
}
+
return 0;
}
static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fifo_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
- struct tc_fifo_qopt opt;
+ struct tc_fifo_qopt opt = { .limit = q->limit };
- opt.limit = q->limit;
RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
-
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
return -1;
}
struct Qdisc_ops pfifo_qdisc_ops = {
- .next = NULL,
- .cl_ops = NULL,
.id = "pfifo",
.priv_size = sizeof(struct fifo_sched_data),
.enqueue = pfifo_enqueue,
- .dequeue = pfifo_dequeue,
- .requeue = pfifo_requeue,
- .drop = fifo_drop,
+ .dequeue = qdisc_dequeue_head,
+ .requeue = qdisc_requeue,
+ .drop = qdisc_queue_drop,
.init = fifo_init,
- .reset = fifo_reset,
- .destroy = NULL,
+ .reset = qdisc_reset_queue,
.change = fifo_init,
.dump = fifo_dump,
.owner = THIS_MODULE,
};
struct Qdisc_ops bfifo_qdisc_ops = {
- .next = NULL,
- .cl_ops = NULL,
.id = "bfifo",
.priv_size = sizeof(struct fifo_sched_data),
.enqueue = bfifo_enqueue,
- .dequeue = bfifo_dequeue,
- .requeue = bfifo_requeue,
- .drop = fifo_drop,
+ .dequeue = qdisc_dequeue_head,
+ .requeue = qdisc_requeue,
+ .drop = qdisc_queue_drop,
.init = fifo_init,
- .reset = fifo_reset,
- .destroy = NULL,
+ .reset = qdisc_reset_queue,
.change = fifo_init,
.dump = fifo_dump,
.owner = THIS_MODULE,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 87e48a4e105..7683b34dc6a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -243,31 +243,27 @@ static void dev_watchdog_down(struct net_device *dev)
cheaper.
*/
-static int
-noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
+static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
{
kfree_skb(skb);
return NET_XMIT_CN;
}
-static struct sk_buff *
-noop_dequeue(struct Qdisc * qdisc)
+static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
{
return NULL;
}
-static int
-noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
{
if (net_ratelimit())
- printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
+ printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
+ skb->dev->name);
kfree_skb(skb);
return NET_XMIT_CN;
}
struct Qdisc_ops noop_qdisc_ops = {
- .next = NULL,
- .cl_ops = NULL,
.id = "noop",
.priv_size = 0,
.enqueue = noop_enqueue,
@@ -285,8 +281,6 @@ struct Qdisc noop_qdisc = {
};
static struct Qdisc_ops noqueue_qdisc_ops = {
- .next = NULL,
- .cl_ops = NULL,
.id = "noqueue",
.priv_size = 0,
.enqueue = noop_enqueue,
@@ -311,97 +305,87 @@ static const u8 prio2band[TC_PRIO_MAX+1] =
generic prio+fifo combination.
*/
-static int
-pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+#define PFIFO_FAST_BANDS 3
+
+static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
+ struct Qdisc *qdisc)
{
struct sk_buff_head *list = qdisc_priv(qdisc);
+ return list + prio2band[skb->priority & TC_PRIO_MAX];
+}
- list += prio2band[skb->priority&TC_PRIO_MAX];
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+ struct sk_buff_head *list = prio2list(skb, qdisc);
- if (list->qlen < qdisc->dev->tx_queue_len) {
- __skb_queue_tail(list, skb);
+ if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
qdisc->q.qlen++;
- qdisc->bstats.bytes += skb->len;
- qdisc->bstats.packets++;
- return 0;
+ return __qdisc_enqueue_tail(skb, qdisc, list);
}
- qdisc->qstats.drops++;
- kfree_skb(skb);
- return NET_XMIT_DROP;
+
+ return qdisc_drop(skb, qdisc);
}
-static struct sk_buff *
-pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
{
int prio;
struct sk_buff_head *list = qdisc_priv(qdisc);
- struct sk_buff *skb;
- for (prio = 0; prio < 3; prio++, list++) {
- skb = __skb_dequeue(list);
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) {
+ struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
if (skb) {
qdisc->q.qlen--;
return skb;
}
}
+
return NULL;
}
-static int
-pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
{
- struct sk_buff_head *list = qdisc_priv(qdisc);
-
- list += prio2band[skb->priority&TC_PRIO_MAX];
-
- __skb_queue_head(list, skb);
qdisc->q.qlen++;
- qdisc->qstats.requeues++;
- return 0;
+ return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
}
-static void
-pfifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc* qdisc)
{
int prio;
struct sk_buff_head *list = qdisc_priv(qdisc);
- for (prio=0; prio < 3; prio++)
- skb_queue_purge(list+prio);
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
+ __qdisc_reset_queue(qdisc, list + prio);
+
+ qdisc->qstats.backlog = 0;
qdisc->q.qlen = 0;
}
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
- unsigned char *b = skb->tail;
- struct tc_prio_qopt opt;
+ struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
- opt.bands = 3;
memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
return -1;
}
static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
{
- int i;
+ int prio;
struct sk_buff_head *list = qdisc_priv(qdisc);
- for (i=0; i<3; i++)
- skb_queue_head_init(list+i);
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
+ skb_queue_head_init(list + prio);
return 0;
}
static struct Qdisc_ops pfifo_fast_ops = {
- .next = NULL,
- .cl_ops = NULL,
.id = "pfifo_fast",
- .priv_size = 3 * sizeof(struct sk_buff_head),
+ .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
.enqueue = pfifo_fast_enqueue,
.dequeue = pfifo_fast_dequeue,
.requeue = pfifo_fast_requeue,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e0c9fbe73b1..bb9bf8d5003 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -53,7 +53,6 @@
struct netem_sched_data {
struct Qdisc *qdisc;
- struct sk_buff_head delayed;
struct timer_list timer;
u32 latency;
@@ -63,11 +62,12 @@ struct netem_sched_data {
u32 gap;
u32 jitter;
u32 duplicate;
+ u32 reorder;
struct crndstate {
unsigned long last;
unsigned long rho;
- } delay_cor, loss_cor, dup_cor;
+ } delay_cor, loss_cor, dup_cor, reorder_cor;
struct disttable {
u32 size;
@@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
-/* Put skb in the private delayed queue. */
-static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- psched_tdiff_t td;
- psched_time_t now;
-
- PSCHED_GET_TIME(now);
- td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
-
- /* Always queue at tail to keep packets in order */
- if (likely(q->delayed.qlen < q->limit)) {
- struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
-
- PSCHED_TADD2(now, td, cb->time_to_send);
-
- pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
- now, cb->time_to_send);
-
- __skb_queue_tail(&q->delayed, skb);
- return NET_XMIT_SUCCESS;
- }
-
- pr_debug("netem_delay: queue over limit %d\n", q->limit);
- sch->qstats.overlimits++;
- kfree_skb(skb);
- return NET_XMIT_DROP;
-}
-
/*
- * Move a packet that is ready to send from the delay holding
- * list to the underlying qdisc.
+ * Insert one skb into qdisc.
+ * Note: parent depends on return value to account for queue length.
+ * NET_XMIT_DROP: queue length didn't change.
+ * NET_XMIT_SUCCESS: one skb was queued.
*/
-static int netem_run(struct Qdisc *sch)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb;
- psched_time_t now;
-
- PSCHED_GET_TIME(now);
-
- skb = skb_peek(&q->delayed);
- if (skb) {
- const struct netem_skb_cb *cb
- = (const struct netem_skb_cb *)skb->cb;
- long delay
- = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
- pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
-
- /* if more time remaining? */
- if (delay > 0) {
- mod_timer(&q->timer, jiffies + delay);
- return 1;
- }
-
- __skb_unlink(skb, &q->delayed);
-
- if (q->qdisc->enqueue(skb, q->qdisc)) {
- sch->q.qlen--;
- sch->qstats.drops++;
- }
- }
-
- return 0;
-}
-
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
+ struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+ struct sk_buff *skb2;
int ret;
+ int count = 1;
pr_debug("netem_enqueue skb=%p\n", skb);
+ /* Random duplication */
+ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ ++count;
+
/* Random packet drop 0 => none, ~0 => all */
- if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
- pr_debug("netem_enqueue: random loss\n");
+ if (q->loss && q->loss >= get_crandom(&q->loss_cor))
+ --count;
+
+ if (count == 0) {
sch->qstats.drops++;
kfree_skb(skb);
- return 0; /* lie about loss so TCP doesn't know */
+ return NET_XMIT_DROP;
}
- /* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
- struct sk_buff *skb2;
-
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) {
- struct Qdisc *qp;
-
- /* Since one packet can generate two packets in the
- * queue, the parent's qlen accounting gets confused,
- * so fix it.
- */
- qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent));
- if (qp)
- qp->q.qlen++;
-
- sch->q.qlen++;
- sch->bstats.bytes += skb2->len;
- sch->bstats.packets++;
- } else
- sch->qstats.drops++;
+ /*
+ * If we need to duplicate packet, then re-insert at top of the
+ * qdisc tree, since parent queuer expects that only one
+ * skb will be queued.
+ */
+ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
+ struct Qdisc *rootq = sch->dev->qdisc;
+ u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
+ q->duplicate = 0;
+
+ rootq->enqueue(skb2, rootq);
+ q->duplicate = dupsave;
}
- /* If doing simple delay then gap == 0 so all packets
- * go into the delayed holding queue
- * otherwise if doing out of order only "1 out of gap"
- * packets will be delayed.
- */
- if (q->counter < q->gap) {
+ if (q->gap == 0 /* not doing reordering */
+ || q->counter < q->gap /* inside last reordering gap */
+ || q->reorder < get_crandom(&q->reorder_cor)) {
+ psched_time_t now;
+ PSCHED_GET_TIME(now);
+ PSCHED_TADD2(now, tabledist(q->latency, q->jitter,
+ &q->delay_cor, q->delay_dist),
+ cb->time_to_send);
++q->counter;
ret = q->qdisc->enqueue(skb, q->qdisc);
} else {
+ /*
+ * Do re-ordering by putting one out of N packets at the front
+ * of the queue.
+ */
+ PSCHED_GET_TIME(cb->time_to_send);
q->counter = 0;
- ret = netem_delay(sch, skb);
- netem_run(sch);
+ ret = q->qdisc->ops->requeue(skb, q->qdisc);
}
if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- int pending;
-
- pending = netem_run(sch);
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
- pr_debug("netem_dequeue: return skb=%p\n", skb);
- sch->q.qlen--;
- sch->flags &= ~TCQ_F_THROTTLED;
- }
- else if (pending) {
- pr_debug("netem_dequeue: throttling\n");
+ const struct netem_skb_cb *cb
+ = (const struct netem_skb_cb *)skb->cb;
+ psched_time_t now;
+ long delay;
+
+ /* if more time remaining? */
+ PSCHED_GET_TIME(now);
+ delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
+ pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
+ if (delay <= 0) {
+ pr_debug("netem_dequeue: return skb=%p\n", skb);
+ sch->q.qlen--;
+ sch->flags &= ~TCQ_F_THROTTLED;
+ return skb;
+ }
+
+ mod_timer(&q->timer, jiffies + delay);
sch->flags |= TCQ_F_THROTTLED;
- }
- return skb;
+ if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
+ sch->qstats.drops++;
+ }
+
+ return NULL;
}
static void netem_watchdog(unsigned long arg)
@@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- skb_queue_purge(&q->delayed);
-
sch->q.qlen = 0;
sch->flags &= ~TCQ_F_THROTTLED;
del_timer_sync(&q->timer);
@@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
return 0;
}
+static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ const struct tc_netem_reorder *r = RTA_DATA(attr);
+
+ if (RTA_PAYLOAD(attr) != sizeof(*r))
+ return -EINVAL;
+
+ q->reorder = r->probability;
+ init_crandom(&q->reorder_cor, r->correlation);
+ return 0;
+}
+
static int netem_change(struct Qdisc *sch, struct rtattr *opt)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
q->jitter = qopt->jitter;
q->limit = qopt->limit;
q->gap = qopt->gap;
+ q->counter = 0;
q->loss = qopt->loss;
q->duplicate = qopt->duplicate;
+ /* for compatiablity with earlier versions.
+ * if gap is set, need to assume 100% probablity
+ */
+ q->reorder = ~0;
+
/* Handle nested options after initial queue options.
* Should have put all options in nested format but too late now.
*/
@@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
if (ret)
return ret;
}
+ if (tb[TCA_NETEM_REORDER-1]) {
+ ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
+ if (ret)
+ return ret;
+ }
}
@@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
if (!opt)
return -EINVAL;
- skb_queue_head_init(&q->delayed);
init_timer(&q->timer);
q->timer.function = netem_watchdog;
q->timer.data = (unsigned long) sch;
- q->counter = 0;
q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
if (!q->qdisc) {
@@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct rtattr *rta = (struct rtattr *) b;
struct tc_netem_qopt qopt;
struct tc_netem_corr cor;
+ struct tc_netem_reorder reorder;
qopt.latency = q->latency;
qopt.jitter = q->jitter;
@@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
cor.loss_corr = q->loss_cor.rho;
cor.dup_corr = q->dup_cor.rho;
RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+
+ reorder.probability = q->reorder;
+ reorder.correlation = q->reorder_cor.rho;
+ RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
+
rta->rta_len = skb->tail - b;
return skb->len;