summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c26
-rw-r--r--net/sched/act_csum.c12
-rw-r--r--net/sched/act_gact.c9
-rw-r--r--net/sched/act_ipt.c12
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c12
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c9
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_bpf.c385
-rw-r--r--net/sched/cls_cgroup.c4
-rw-r--r--net/sched/em_ipset.c7
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_fq.c41
-rw-r--r--net/sched/sch_generic.c11
-rw-r--r--net/sched/sch_htb.c25
-rw-r--r--net/sched/sch_netem.c8
-rw-r--r--net/sched/sch_tbf.c149
23 files changed, 621 insertions, 139 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c03a32a0418..ad1f1d81920 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -443,6 +443,16 @@ config NET_CLS_CGROUP
To compile this code as a module, choose M here: the
module will be called cls_cgroup.
+config NET_CLS_BPF
+ tristate "BPF-based classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ programmable BPF (JIT'ed) filters as an alternative to ematches.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_bpf.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e5f9abe9a5d..35fa47a494a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
+obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fd7072827a4..69cb848e834 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -270,6 +270,16 @@ int tcf_register_action(struct tc_action_ops *act)
{
struct tc_action_ops *a, **ap;
+ /* Must supply act, dump, cleanup and init */
+ if (!act->act || !act->dump || !act->cleanup || !act->init)
+ return -EINVAL;
+
+ /* Supply defaults */
+ if (!act->lookup)
+ act->lookup = tcf_hash_search;
+ if (!act->walk)
+ act->walk = tcf_generic_walker;
+
write_lock(&act_mod_lock);
for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
@@ -381,7 +391,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
}
while ((a = act) != NULL) {
repeat:
- if (a->ops && a->ops->act) {
+ if (a->ops) {
ret = a->ops->act(skb, a, res);
if (TC_MUNGED & skb->tc_verd) {
/* copied already, allow trampling */
@@ -405,7 +415,7 @@ void tcf_action_destroy(struct tc_action *act, int bind)
struct tc_action *a;
for (a = act; a; a = act) {
- if (a->ops && a->ops->cleanup) {
+ if (a->ops) {
if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
module_put(a->ops->owner);
act = act->next;
@@ -424,7 +434,7 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
int err = -EINVAL;
- if (a->ops == NULL || a->ops->dump == NULL)
+ if (a->ops == NULL)
return err;
return a->ops->dump(skb, a, bind, ref);
}
@@ -436,7 +446,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
- if (a->ops == NULL || a->ops->dump == NULL)
+ if (a->ops == NULL)
return err;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
@@ -723,8 +733,6 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (a->ops == NULL)
goto err_free;
- if (a->ops->lookup == NULL)
- goto err_mod;
err = -ENOENT;
if (a->ops->lookup(a, index) == 0)
goto err_mod;
@@ -1084,12 +1092,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
memset(&a, 0, sizeof(struct tc_action));
a.ops = a_o;
- if (a_o->walk == NULL) {
- WARN(1, "tc_dump_action: %s !capable of dumping table\n",
- a_o->kind);
- goto out_module_put;
- }
-
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3a4c0caa1f7..11fe1a41643 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -77,16 +77,16 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
&csum_idx_gen, &csum_hash_info);
if (IS_ERR(pc))
return PTR_ERR(pc);
- p = to_tcf_csum(pc);
ret = ACT_P_CREATED;
} else {
- p = to_tcf_csum(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &csum_hash_info);
+ if (bind)/* dont override defaults */
+ return 0;
+ tcf_hash_release(pc, bind, &csum_hash_info);
+ if (!ovr)
return -EEXIST;
- }
}
+ p = to_tcf_csum(pc);
spin_lock_bh(&p->tcf_lock);
p->tcf_action = parm->action;
p->update_flags = parm->update_flags;
@@ -585,9 +585,7 @@ static struct tc_action_ops act_csum_ops = {
.act = tcf_csum,
.dump = tcf_csum_dump,
.cleanup = tcf_csum_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_csum_init,
- .walk = tcf_generic_walker
};
MODULE_DESCRIPTION("Checksum updating actions");
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index fd2b3cff5fa..eb9ba60ebab 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -102,10 +102,11 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
- tcf_hash_release(pc, bind, &gact_hash_info);
+ if (bind)/* dont override defaults */
+ return 0;
+ tcf_hash_release(pc, bind, &gact_hash_info);
+ if (!ovr)
return -EEXIST;
- }
}
gact = to_gact(pc);
@@ -206,9 +207,7 @@ static struct tc_action_ops act_gact_ops = {
.act = tcf_gact,
.dump = tcf_gact_dump,
.cleanup = tcf_gact_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_gact_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60d88b6b956..dcbfe8ce04a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -141,10 +141,12 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
- tcf_ipt_release(to_ipt(pc), bind);
+ if (bind)/* dont override defaults */
+ return 0;
+ tcf_ipt_release(to_ipt(pc), bind);
+
+ if (!ovr)
return -EEXIST;
- }
}
ipt = to_ipt(pc);
@@ -298,9 +300,7 @@ static struct tc_action_ops act_ipt_ops = {
.act = tcf_ipt,
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_ipt_init,
- .walk = tcf_generic_walker
};
static struct tc_action_ops act_xt_ops = {
@@ -312,9 +312,7 @@ static struct tc_action_ops act_xt_ops = {
.act = tcf_ipt,
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_ipt_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 977c10e0631..252378121ce 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -271,9 +271,7 @@ static struct tc_action_ops act_mirred_ops = {
.act = tcf_mirred,
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_mirred_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002)");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 876f0ef2969..76869538d02 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -70,15 +70,15 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
&nat_idx_gen, &nat_hash_info);
if (IS_ERR(pc))
return PTR_ERR(pc);
- p = to_tcf_nat(pc);
ret = ACT_P_CREATED;
} else {
- p = to_tcf_nat(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &nat_hash_info);
+ if (bind)
+ return 0;
+ tcf_hash_release(pc, bind, &nat_hash_info);
+ if (!ovr)
return -EEXIST;
- }
}
+ p = to_tcf_nat(pc);
spin_lock_bh(&p->tcf_lock);
p->old_addr = parm->old_addr;
@@ -308,9 +308,7 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat,
.dump = tcf_nat_dump,
.cleanup = tcf_nat_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_nat_init,
- .walk = tcf_generic_walker
};
MODULE_DESCRIPTION("Stateless NAT actions");
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7ed78c9e505..7aa2dcd989f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -84,10 +84,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
p = to_pedit(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &pedit_hash_info);
+ tcf_hash_release(pc, bind, &pedit_hash_info);
+ if (bind)
+ return 0;
+ if (!ovr)
return -EEXIST;
- }
+
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL)
@@ -243,9 +245,7 @@ static struct tc_action_ops act_pedit_ops = {
.act = tcf_pedit,
.dump = tcf_pedit_dump,
.cleanup = tcf_pedit_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_pedit_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 189e3c5b3d0..ef246d87e68 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -177,10 +177,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
if (bind) {
police->tcf_bindcnt += 1;
police->tcf_refcnt += 1;
+ return 0;
}
if (ovr)
goto override;
- return ret;
+ /* not replacing */
+ return -EEXIST;
}
}
@@ -231,14 +233,14 @@ override:
}
if (R_tab) {
police->rate_present = true;
- psched_ratecfg_precompute(&police->rate, &R_tab->rate);
+ psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0);
qdisc_put_rtab(R_tab);
} else {
police->rate_present = false;
}
if (P_tab) {
police->peak_present = true;
- psched_ratecfg_precompute(&police->peak, &P_tab->rate);
+ psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0);
qdisc_put_rtab(P_tab);
} else {
police->peak_present = false;
@@ -407,7 +409,6 @@ static struct tc_action_ops act_police_ops = {
.act = tcf_act_police,
.dump = tcf_act_police_dump,
.cleanup = tcf_act_police_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_act_police_locate,
.walk = tcf_act_police_walker
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7725eb4ab75..f7b45ab8538 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -142,10 +142,13 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
d = to_defact(pc);
- if (!ovr) {
- tcf_simp_release(d, bind);
+
+ if (bind)
+ return 0;
+ tcf_simp_release(d, bind);
+ if (!ovr)
return -EEXIST;
- }
+
reset_policy(d, defdata, parm);
}
@@ -201,7 +204,6 @@ static struct tc_action_ops act_simp_ops = {
.dump = tcf_simp_dump,
.cleanup = tcf_simp_cleanup,
.init = tcf_simp_init,
- .walk = tcf_generic_walker,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cb4221171f9..8fe9d25c300 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -120,10 +120,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
d = to_skbedit(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &skbedit_hash_info);
+ if (bind)
+ return 0;
+ tcf_hash_release(pc, bind, &skbedit_hash_info);
+ if (!ovr)
return -EEXIST;
- }
}
spin_lock_bh(&d->tcf_lock);
@@ -203,7 +204,6 @@ static struct tc_action_ops act_skbedit_ops = {
.dump = tcf_skbedit_dump,
.cleanup = tcf_skbedit_cleanup,
.init = tcf_skbedit_init,
- .walk = tcf_generic_walker,
};
MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d76a35d0dc8..636d9131d87 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -137,7 +137,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr **tb,
struct nlattr *est)
{
- int err = -EINVAL;
+ int err;
struct tcf_exts e;
struct tcf_ematch_tree t;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
new file mode 100644
index 00000000000..1002a822628
--- /dev/null
+++ b/net/sched/cls_bpf.c
@@ -0,0 +1,385 @@
+/*
+ * Berkeley Packet Filter based traffic classifier
+ *
+ * Might be used to classify traffic through flexible, user-defined and
+ * possibly JIT-ed BPF filters for traffic control as an alternative to
+ * ematches.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("TC BPF based classifier");
+
+struct cls_bpf_head {
+ struct list_head plist;
+ u32 hgen;
+};
+
+struct cls_bpf_prog {
+ struct sk_filter *filter;
+ struct sock_filter *bpf_ops;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct list_head link;
+ u32 handle;
+ u16 bpf_len;
+};
+
+static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
+ [TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
+ [TCA_BPF_OPS] = { .type = NLA_BINARY,
+ .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static const struct tcf_ext_map bpf_ext_map = {
+ .action = TCA_BPF_ACT,
+ .police = TCA_BPF_POLICE,
+};
+
+static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+ int ret;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ int filter_res = SK_RUN_FILTER(prog->filter, skb);
+
+ if (filter_res == 0)
+ continue;
+
+ *res = prog->res;
+ if (filter_res != -1)
+ res->classid = filter_res;
+
+ ret = tcf_exts_exec(skb, &prog->exts, res);
+ if (ret < 0)
+ continue;
+
+ return ret;
+ }
+
+ return -1;
+}
+
+static int cls_bpf_init(struct tcf_proto *tp)
+{
+ struct cls_bpf_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD(&head->plist);
+ tp->root = head;
+
+ return 0;
+}
+
+static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+{
+ tcf_unbind_filter(tp, &prog->res);
+ tcf_exts_destroy(tp, &prog->exts);
+
+ sk_unattached_filter_destroy(prog->filter);
+
+ kfree(prog->bpf_ops);
+ kfree(prog);
+}
+
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (prog == todel) {
+ tcf_tree_lock(tp);
+ list_del(&prog->link);
+ tcf_tree_unlock(tp);
+
+ cls_bpf_delete_prog(tp, prog);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static void cls_bpf_destroy(struct tcf_proto *tp)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog, *tmp;
+
+ list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+ list_del(&prog->link);
+ cls_bpf_delete_prog(tp, prog);
+ }
+
+ kfree(head);
+}
+
+static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+ unsigned long ret = 0UL;
+
+ if (head == NULL)
+ return 0UL;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (prog->handle == handle) {
+ ret = (unsigned long) prog;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est)
+{
+ struct sock_filter *bpf_ops, *bpf_old;
+ struct tcf_exts exts;
+ struct sock_fprog tmp;
+ struct sk_filter *fp, *fp_old;
+ u16 bpf_size, bpf_len;
+ u32 classid;
+ int ret;
+
+ if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
+ return -EINVAL;
+
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map);
+ if (ret < 0)
+ return ret;
+
+ classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
+ if (bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ bpf_size = bpf_len * sizeof(*bpf_ops);
+ bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ if (bpf_ops == NULL) {
+ ret = -ENOMEM;
+ goto errout;
+ }
+
+ memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
+
+ tmp.len = bpf_len;
+ tmp.filter = (struct sock_filter __user *) bpf_ops;
+
+ ret = sk_unattached_filter_create(&fp, &tmp);
+ if (ret)
+ goto errout_free;
+
+ tcf_tree_lock(tp);
+ fp_old = prog->filter;
+ bpf_old = prog->bpf_ops;
+
+ prog->bpf_len = bpf_len;
+ prog->bpf_ops = bpf_ops;
+ prog->filter = fp;
+ prog->res.classid = classid;
+ tcf_tree_unlock(tp);
+
+ tcf_bind_filter(tp, &prog->res, base);
+ tcf_exts_change(tp, &prog->exts, &exts);
+
+ if (fp_old)
+ sk_unattached_filter_destroy(fp_old);
+ if (bpf_old)
+ kfree(bpf_old);
+
+ return 0;
+
+errout_free:
+ kfree(bpf_ops);
+errout:
+ tcf_exts_destroy(tp, &exts);
+ return ret;
+}
+
+static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
+ struct cls_bpf_head *head)
+{
+ unsigned int i = 0x80000000;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && cls_bpf_get(tp, head->hgen));
+ if (i == 0)
+ pr_err("Insufficient number of handles\n");
+
+ return i;
+}
+
+static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
+ struct nlattr *tb[TCA_BPF_MAX + 1];
+ int ret;
+
+ if (tca[TCA_OPTIONS] == NULL)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
+ if (ret < 0)
+ return ret;
+
+ if (prog != NULL) {
+ if (handle && prog->handle != handle)
+ return -EINVAL;
+ return cls_bpf_modify_existing(net, tp, prog, base, tb,
+ tca[TCA_RATE]);
+ }
+
+ prog = kzalloc(sizeof(*prog), GFP_KERNEL);
+ if (prog == NULL)
+ return -ENOBUFS;
+
+ if (handle == 0)
+ prog->handle = cls_bpf_grab_new_handle(tp, head);
+ else
+ prog->handle = handle;
+ if (prog->handle == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
+ if (ret < 0)
+ goto errout;
+
+ tcf_tree_lock(tp);
+ list_add(&prog->link, &head->plist);
+ tcf_tree_unlock(tp);
+
+ *arg = (unsigned long) prog;
+
+ return 0;
+errout:
+ if (*arg == 0UL && prog)
+ kfree(prog);
+
+ return ret;
+}
+
+static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *tm)
+{
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+ struct nlattr *nest, *nla;
+
+ if (prog == NULL)
+ return skb->len;
+
+ tm->tcm_handle = prog->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ goto nla_put_failure;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
+ .kind = "bpf",
+ .owner = THIS_MODULE,
+ .classify = cls_bpf_classify,
+ .init = cls_bpf_init,
+ .destroy = cls_bpf_destroy,
+ .get = cls_bpf_get,
+ .put = cls_bpf_put,
+ .change = cls_bpf_change,
+ .delete = cls_bpf_delete,
+ .walk = cls_bpf_walk,
+ .dump = cls_bpf_dump,
+};
+
+static int __init cls_bpf_init_mod(void)
+{
+ return register_tcf_proto_ops(&cls_bpf_ops);
+}
+
+static void __exit cls_bpf_exit_mod(void)
+{
+ unregister_tcf_proto_ops(&cls_bpf_ops);
+}
+
+module_init(cls_bpf_init_mod);
+module_exit(cls_bpf_exit_mod);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 867b4a3e398..16006c92c3f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -72,11 +72,11 @@ static void cgrp_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *p;
- void *v;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+ void *v = (void *)(unsigned long)cs->classid;
cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
- v = (void *)(unsigned long)task_cls_classid(p);
iterate_fd(p->files, 0, update_classid, v);
task_unlock(p);
}
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 938b7cbf562..527aeb7a3ff 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
{
struct xt_set_info *set = data;
ip_set_id_t index;
+ struct net *net = dev_net(qdisc_dev(tp->q));
if (data_len != sizeof(*set))
return -EINVAL;
- index = ip_set_nfnl_get_byindex(set->index);
+ index = ip_set_nfnl_get_byindex(net, set->index);
if (index == IPSET_INVALID_ID)
return -ENOENT;
@@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
if (em->data)
return 0;
- ip_set_nfnl_put(index);
+ ip_set_nfnl_put(net, index);
return -ENOMEM;
}
@@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
{
const struct xt_set_info *set = (const void *) em->data;
if (set) {
- ip_set_nfnl_put(set->index);
+ ip_set_nfnl_put(dev_net(qdisc_dev(p->q)), set->index);
kfree((void *) em->data);
}
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 7c3de6ffa51..e5cef956722 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -793,8 +793,10 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
goto errout;
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
- if (meta == NULL)
+ if (meta == NULL) {
+ err = -ENOMEM;
goto errout;
+ }
memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2adda7fa2d3..cd81505662b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -737,9 +737,11 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
+ int drops;
if (n == 0)
return;
+ drops = max_t(int, n, 0);
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
return;
@@ -756,6 +758,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.drops += drops;
}
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a9dfdda9ed1..95d84396190 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -88,7 +88,7 @@ struct fq_sched_data {
struct fq_flow internal; /* for non classified or high prio packets */
u32 quantum;
u32 initial_quantum;
- u32 flow_default_rate;/* rate per flow : bytes per second */
+ u32 flow_refill_delay;
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */
struct rb_root *fq_root;
@@ -115,6 +115,7 @@ static struct fq_flow detached, throttled;
static void fq_flow_set_detached(struct fq_flow *f)
{
f->next = &detached;
+ f->age = jiffies;
}
static bool fq_flow_is_detached(const struct fq_flow *f)
@@ -209,21 +210,15 @@ static void fq_gc(struct fq_sched_data *q,
}
}
-static const u8 prio2band[TC_PRIO_MAX + 1] = {
- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
-};
-
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
{
struct rb_node **p, *parent;
struct sock *sk = skb->sk;
struct rb_root *root;
struct fq_flow *f;
- int band;
/* warning: no starvation prevention... */
- band = prio2band[skb->priority & TC_PRIO_MAX];
- if (unlikely(band == 0))
+ if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
return &q->internal;
if (unlikely(!sk)) {
@@ -255,6 +250,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ f->time_next_packet = 0ULL;
}
return f;
}
@@ -372,17 +368,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
f->qlen++;
- flow_queue_add(f, skb);
if (skb_is_retransmit(skb))
q->stat_tcp_retrans++;
sch->qstats.backlog += qdisc_pkt_len(skb);
if (fq_flow_is_detached(f)) {
fq_flow_add_tail(&q->new_flows, f);
- if (q->quantum > f->credit)
- f->credit = q->quantum;
+ if (time_after(jiffies, f->age + q->flow_refill_delay))
+ f->credit = max_t(u32, f->credit, q->quantum);
q->inactive_flows--;
qdisc_unthrottled(sch);
}
+
+ /* Note: this overwrites f->age */
+ flow_queue_add(f, skb);
+
if (unlikely(f == &q->internal)) {
q->stat_internal_packets++;
qdisc_unthrottled(sch);
@@ -460,7 +459,6 @@ begin:
fq_flow_add_tail(&q->old_flows, f);
} else {
fq_flow_set_detached(f);
- f->age = jiffies;
q->inactive_flows++;
}
goto begin;
@@ -614,6 +612,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -655,7 +654,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
- q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
+ pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
+ nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
if (tb[TCA_FQ_FLOW_MAX_RATE])
q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
@@ -669,6 +669,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
err = -EINVAL;
}
+ if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
+ u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
+
+ q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+ }
+
if (!err)
err = fq_resize(q, fq_log);
@@ -704,7 +710,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
q->flow_plimit = 100;
q->quantum = 2 * psched_mtu(qdisc_dev(sch));
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
- q->flow_default_rate = 0;
+ q->flow_refill_delay = msecs_to_jiffies(40);
q->flow_max_rate = ~0U;
q->rate_enable = 1;
q->new_flows.first = NULL;
@@ -731,15 +737,16 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
- /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore,
- * do not bother giving its value
- */
+ /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
+
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
+ jiffies_to_usecs(q->flow_refill_delay)) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a74e278654a..7fc899a943a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head)
struct net_device *dev;
bool sync_needed = false;
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
&noop_qdisc);
if (dev_ingress_queue(dev))
@@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head)
synchronize_net();
/* Wait for outstanding qdisc_run calls. */
- list_for_each_entry(dev, head, unreg_list)
+ list_for_each_entry(dev, head, close_list)
while (some_qdisc_is_busy(dev))
yield();
}
@@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev)
{
LIST_HEAD(single);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
dev_deactivate_many(&single);
list_del(&single);
}
@@ -910,11 +910,12 @@ void dev_shutdown(struct net_device *dev)
}
void psched_ratecfg_precompute(struct psched_ratecfg *r,
- const struct tc_ratespec *conf)
+ const struct tc_ratespec *conf,
+ u64 rate64)
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
- r->rate_bytes_ps = conf->rate;
+ r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
r->mult = 1;
/*
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 863846cc551..717b2108f85 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -997,6 +997,8 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
[TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
+ [TCA_HTB_RATE64] = { .type = NLA_U64 },
+ [TCA_HTB_CEIL64] = { .type = NLA_U64 },
};
static void htb_work_func(struct work_struct *work)
@@ -1114,6 +1116,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.level = cl->level;
if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps))
+ goto nla_put_failure;
+ if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
spin_unlock_bh(root_lock);
@@ -1332,6 +1340,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
+ u64 rate64, ceil64;
/* extract all subattrs from opt attr */
if (!opt)
@@ -1468,11 +1477,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
}
+ rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+
+ ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
+ psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
+ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
+
/* it used to be a nasty bug here, we have to check that node
* is really leaf before changing cl->un.leaf !
*/
if (!cl->level) {
- cl->quantum = hopt->rate.rate / q->rate2quantum;
+ u64 quantum = cl->rate.rate_bytes_ps;
+
+ do_div(quantum, q->rate2quantum);
+ cl->quantum = min_t(u64, quantum, INT_MAX);
+
if (!hopt->quantum && cl->quantum < 1000) {
pr_warning(
"HTB: quantum of class %X is small. Consider r2q change.\n",
@@ -1491,9 +1511,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->prio = TC_HTB_NUMPRIO - 1;
}
- psched_ratecfg_precompute(&cl->rate, &hopt->rate);
- psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
-
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b87e83d0747..bccd52b36e9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -215,10 +215,10 @@ static bool loss_4state(struct netem_sched_data *q)
if (rnd < clg->a4) {
clg->state = 4;
return true;
- } else if (clg->a4 < rnd && rnd < clg->a1) {
+ } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
clg->state = 3;
return true;
- } else if (clg->a1 < rnd)
+ } else if (clg->a1 + clg->a4 < rnd)
clg->state = 1;
break;
@@ -235,7 +235,6 @@ static bool loss_4state(struct netem_sched_data *q)
clg->state = 2;
else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
clg->state = 1;
- return true;
} else if (clg->a2 + clg->a3 < rnd) {
clg->state = 3;
return true;
@@ -269,10 +268,11 @@ static bool loss_gilb_ell(struct netem_sched_data *q)
clg->state = 2;
if (net_random() < clg->a4)
return true;
+ break;
case 2:
if (net_random() < clg->a2)
clg->state = 1;
- if (clg->a3 > net_random())
+ if (net_random() > clg->a3)
return true;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1aaf1b6e51a..887e672f9d7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -21,6 +21,7 @@
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
+#include <net/tcp.h>
/* Simple Token Bucket Filter.
@@ -117,6 +118,48 @@ struct tbf_sched_data {
};
+/* Time to Length, convert time in ns to length in bytes
+ * to determinate how many bytes can be sent in given time.
+ */
+static u64 psched_ns_t2l(const struct psched_ratecfg *r,
+ u64 time_in_ns)
+{
+ /* The formula is :
+ * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
+ */
+ u64 len = time_in_ns * r->rate_bytes_ps;
+
+ do_div(len, NSEC_PER_SEC);
+
+ if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
+ do_div(len, 53);
+ len = len * 48;
+ }
+
+ if (len > r->overhead)
+ len -= r->overhead;
+ else
+ len = 0;
+
+ return len;
+}
+
+/*
+ * Return length of individual segments of a gso packet,
+ * including all headers (MAC, IP, TCP/UDP)
+ */
+static unsigned int skb_gso_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ hdr_len += tcp_hdrlen(skb);
+ else
+ hdr_len += sizeof(struct udphdr);
+ return hdr_len + shinfo->gso_size;
+}
+
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@@ -136,12 +179,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
while (segs) {
nskb = segs->next;
segs->next = NULL;
- if (likely(segs->len <= q->max_size)) {
- qdisc_skb_cb(segs)->pkt_len = segs->len;
- ret = qdisc_enqueue(segs, q->qdisc);
- } else {
- ret = qdisc_reshape_fail(skb, sch);
- }
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
sch->qstats.drops++;
@@ -163,7 +202,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
int ret;
if (qdisc_pkt_len(skb) > q->max_size) {
- if (skb_is_gso(skb))
+ if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size)
return tbf_segment(skb, sch);
return qdisc_reshape_fail(skb, sch);
}
@@ -266,20 +305,24 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+ [TCA_TBF_RATE64] = { .type = NLA_U64 },
+ [TCA_TBF_PRATE64] = { .type = NLA_U64 },
};
static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
{
int err;
struct tbf_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_TBF_PTAB + 1];
+ struct nlattr *tb[TCA_TBF_MAX + 1];
struct tc_tbf_qopt *qopt;
- struct qdisc_rate_table *rtab = NULL;
- struct qdisc_rate_table *ptab = NULL;
struct Qdisc *child = NULL;
- int max_size, n;
+ struct psched_ratecfg rate;
+ struct psched_ratecfg peak;
+ u64 max_size;
+ s64 buffer, mtu;
+ u64 rate64 = 0, prate64 = 0;
- err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
+ err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
if (err < 0)
return err;
@@ -288,33 +331,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
goto done;
qopt = nla_data(tb[TCA_TBF_PARMS]);
- rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
- if (rtab == NULL)
- goto done;
+ if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
+ tb[TCA_TBF_RTAB]));
- if (qopt->peakrate.rate) {
- if (qopt->peakrate.rate > qopt->rate.rate)
- ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
- if (ptab == NULL)
- goto done;
- }
-
- for (n = 0; n < 256; n++)
- if (rtab->data[n] > qopt->buffer)
- break;
- max_size = (n << qopt->rate.cell_log) - 1;
- if (ptab) {
- int size;
-
- for (n = 0; n < 256; n++)
- if (ptab->data[n] > qopt->mtu)
- break;
- size = (n << qopt->peakrate.cell_log) - 1;
- if (size < max_size)
- max_size = size;
- }
- if (max_size < 0)
- goto done;
+ if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
+ tb[TCA_TBF_PTAB]));
if (q->qdisc != &noop_qdisc) {
err = fifo_set_limit(q->qdisc, qopt->limit);
@@ -328,6 +351,39 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
}
}
+ buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
+ mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
+
+ if (tb[TCA_TBF_RATE64])
+ rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
+ psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
+
+ max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
+
+ if (qopt->peakrate.rate) {
+ if (tb[TCA_TBF_PRATE64])
+ prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
+ psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
+ if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
+ pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
+ peak.rate_bytes_ps, rate.rate_bytes_ps);
+ err = -EINVAL;
+ goto done;
+ }
+
+ max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
+ }
+
+ if (max_size < psched_mtu(qdisc_dev(sch)))
+ pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
+ max_size, qdisc_dev(sch)->name,
+ psched_mtu(qdisc_dev(sch)));
+
+ if (!max_size) {
+ err = -EINVAL;
+ goto done;
+ }
+
sch_tree_lock(sch);
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
@@ -341,9 +397,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->tokens = q->buffer;
q->ptokens = q->mtu;
- psched_ratecfg_precompute(&q->rate, &rtab->rate);
- if (ptab) {
- psched_ratecfg_precompute(&q->peak, &ptab->rate);
+ memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
+ if (qopt->peakrate.rate) {
+ memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
q->peak_present = true;
} else {
q->peak_present = false;
@@ -352,10 +408,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_unlock(sch);
err = 0;
done:
- if (rtab)
- qdisc_put_rtab(rtab);
- if (ptab)
- qdisc_put_rtab(ptab);
return err;
}
@@ -402,6 +454,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.buffer = PSCHED_NS2TICKS(q->buffer);
if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
+ nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps))
+ goto nla_put_failure;
+ if (q->peak_present &&
+ q->peak.rate_bytes_ps >= (1ULL << 32) &&
+ nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
return skb->len;