summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig20
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c59
-rw-r--r--net/sched/act_gact.c14
-rw-r--r--net/sched/act_ipt.c7
-rw-r--r--net/sched/act_mirred.c11
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/cls_api.c12
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/em_canid.c240
-rw-r--r--net/sched/em_ipset.c135
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c24
-rw-r--r--net/sched/sch_netem.c9
-rw-r--r--net/sched/sch_qfq.c95
-rw-r--r--net/sched/sch_teql.c47
17 files changed, 580 insertions, 109 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e7a8976bf25..62fb51face8 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -507,6 +507,26 @@ config NET_EMATCH_TEXT
To compile this code as a module, choose M here: the
module will be called em_text.
+config NET_EMATCH_CANID
+ tristate "CAN Identifier"
+ depends on NET_EMATCH && CAN
+ ---help---
+ Say Y here if you want to be able to classify CAN frames based
+ on CAN Identifier.
+
+ To compile this code as a module, choose M here: the
+ module will be called em_canid.
+
+config NET_EMATCH_IPSET
+ tristate "IPset"
+ depends on NET_EMATCH && IP_SET
+ ---help---
+ Say Y here if you want to be able to classify packets based on
+ ipset membership.
+
+ To compile this code as a module, choose M here: the
+ module will be called em_ipset.
+
config NET_CLS_ACT
bool "Actions"
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5940a1992f0..978cbf004e8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -55,3 +55,5 @@ obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o
obj-$(CONFIG_NET_EMATCH_META) += em_meta.o
obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o
+obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o
+obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 5cfb160df06..e3d2c78cb52 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -652,27 +652,27 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
-
- t = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
- goto nla_put_failure;
+ goto out_nlmsg_trim;
if (tcf_action_dump(skb, a, bind, ref) < 0)
- goto nla_put_failure;
+ goto out_nlmsg_trim;
nla_nest_end(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
-nla_put_failure:
-nlmsg_failure:
+out_nlmsg_trim:
nlmsg_trim(skb, b);
return -1;
}
@@ -799,19 +799,21 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (a->ops == NULL)
goto err_out;
- nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t));
- t = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
+ if (!nlh)
+ goto out_module_put;
+ t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
- goto nla_put_failure;
+ goto out_module_put;
err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
if (err < 0)
- goto nla_put_failure;
+ goto out_module_put;
if (err == 0)
goto noflush_out;
@@ -828,8 +830,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
return err;
-nla_put_failure:
-nlmsg_failure:
+out_module_put:
module_put(a->ops->owner);
err_out:
noflush_out:
@@ -919,18 +920,20 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
b = skb_tail_pointer(skb);
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
- t = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags);
+ if (!nlh)
+ goto out_kfree_skb;
+ t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
- goto nla_put_failure;
+ goto out_kfree_skb;
if (tcf_action_dump(skb, a, 0, 0) < 0)
- goto nla_put_failure;
+ goto out_kfree_skb;
nla_nest_end(skb, nest);
@@ -942,8 +945,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
err = 0;
return err;
-nla_put_failure:
-nlmsg_failure:
+out_kfree_skb:
kfree_skb(skb);
return -1;
}
@@ -1062,7 +1064,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
struct tc_action_ops *a_o;
struct tc_action a;
int ret = 0;
- struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
+ struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
struct nlattr *kind = find_dump_kind(cb->nlh);
if (kind == NULL) {
@@ -1080,23 +1082,25 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (a_o->walk == NULL) {
WARN(1, "tc_dump_action: %s !capable of dumping table\n",
a_o->kind);
- goto nla_put_failure;
+ goto out_module_put;
}
- nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
- cb->nlh->nlmsg_type, sizeof(*t));
- t = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ cb->nlh->nlmsg_type, sizeof(*t), 0);
+ if (!nlh)
+ goto out_module_put;
+ t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
- goto nla_put_failure;
+ goto out_module_put;
ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
if (ret < 0)
- goto nla_put_failure;
+ goto out_module_put;
if (ret > 0) {
nla_nest_end(skb, nest);
@@ -1110,8 +1114,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
module_put(a_o->owner);
return skb->len;
-nla_put_failure:
-nlmsg_failure:
+out_module_put:
module_put(a_o->owner);
nlmsg_trim(skb, b);
return skb->len;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f10fb825644..05d60859d8e 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
struct tcf_common *pc;
int ret = 0;
int err;
+#ifdef CONFIG_GACT_PROB
+ struct tc_gact_p *p_parm = NULL;
+#endif
if (nla == NULL)
return -EINVAL;
@@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
#ifndef CONFIG_GACT_PROB
if (tb[TCA_GACT_PROB] != NULL)
return -EOPNOTSUPP;
+#else
+ if (tb[TCA_GACT_PROB]) {
+ p_parm = nla_data(tb[TCA_GACT_PROB]);
+ if (p_parm->ptype >= MAX_RAND)
+ return -EINVAL;
+ }
#endif
pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
@@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
spin_lock_bh(&gact->tcf_lock);
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
- if (tb[TCA_GACT_PROB] != NULL) {
- struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
+ if (p_parm) {
gact->tcfg_paction = p_parm->paction;
gact->tcfg_pval = p_parm->pval;
gact->tcfg_ptype = p_parm->ptype;
@@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+ if (gact->tcfg_ptype)
action = gact_rand[gact->tcfg_ptype](gact);
else
action = gact->tcf_action;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60e281ad0f0..58fb3c7aab9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -185,7 +185,12 @@ err3:
err2:
kfree(tname);
err1:
- kfree(pc);
+ if (ret == ACT_P_CREATED) {
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
+ }
return err;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fe81cc18e9e..9c0fd0c7881 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
out:
if (err) {
m->tcf_qstats.overlimits++;
- /* should we be asking for packet to be dropped?
- * may make sense for redirect case only
- */
- retval = TC_ACT_SHOT;
- } else {
+ if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ retval = TC_ACT_SHOT;
+ else
+ retval = m->tcf_action;
+ } else
retval = m->tcf_action;
- }
spin_unlock(&m->tcf_lock);
return retval;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 26aa2f6ce25..45c53ab067a 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- kfree(pc);
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
return -ENOMEM;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3922f2a2821..3714f60f0b3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
d = to_defact(pc);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- kfree(pc);
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
return ret;
}
d->tcf_action = parm->action;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f452f696b4b..6dd1131f2ec 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -140,7 +140,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
int tp_created = 0;
replay:
- t = NLMSG_DATA(n);
+ t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
prio = TC_H_MAJ(t->tcm_info);
nprio = prio;
@@ -349,8 +349,10 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
- tcm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
@@ -368,7 +370,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
-nlmsg_failure:
+out_nlmsg_trim:
nla_put_failure:
nlmsg_trim(skb, b);
return -1;
@@ -418,7 +420,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
struct Qdisc *q;
struct tcf_proto *tp, **chain;
- struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
struct tcf_dump_args arg;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 36fec422740..44f405cb9aa 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (head == NULL)
goto old_method;
- iif = ((struct rtable *)dst)->rt_iif;
+ iif = inet_iif(skb);
h = route4_fastmap_hash(id, iif);
if (id == head->fastmap[h].id &&
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
new file mode 100644
index 00000000000..bfd34e4c1af
--- /dev/null
+++ b/net/sched/em_canid.c
@@ -0,0 +1,240 @@
+/*
+ * em_canid.c Ematch rule to match CAN frames according to their CAN IDs
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Idea: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
+ * Copyright: (c) 2011 Czech Technical University in Prague
+ * (c) 2011 Volkswagen Group Research
+ * Authors: Michal Sojka <sojkam1@fel.cvut.cz>
+ * Pavel Pisa <pisa@cmp.felk.cvut.cz>
+ * Rostislav Lisovy <lisovy@gmail.cz>
+ * Funded by: Volkswagen Group Research
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <linux/can.h>
+
+#define EM_CAN_RULES_MAX 500
+
+struct canid_match {
+ /* For each SFF CAN ID (11 bit) there is one record in this bitfield */
+ DECLARE_BITMAP(match_sff, (1 << CAN_SFF_ID_BITS));
+
+ int rules_count;
+ int sff_rules_count;
+ int eff_rules_count;
+
+ /*
+ * Raw rules copied from netlink message; Used for sending
+ * information to userspace (when 'tc filter show' is invoked)
+ * AND when matching EFF frames
+ */
+ struct can_filter rules_raw[];
+};
+
+/**
+ * em_canid_get_id() - Extracts Can ID out of the sk_buff structure.
+ */
+static canid_t em_canid_get_id(struct sk_buff *skb)
+{
+ /* CAN ID is stored within the data field */
+ struct can_frame *cf = (struct can_frame *)skb->data;
+
+ return cf->can_id;
+}
+
+static void em_canid_sff_match_add(struct canid_match *cm, u32 can_id,
+ u32 can_mask)
+{
+ int i;
+
+ /*
+ * Limit can_mask and can_id to SFF range to
+ * protect against write after end of array
+ */
+ can_mask &= CAN_SFF_MASK;
+ can_id &= can_mask;
+
+ /* Single frame */
+ if (can_mask == CAN_SFF_MASK) {
+ set_bit(can_id, cm->match_sff);
+ return;
+ }
+
+ /* All frames */
+ if (can_mask == 0) {
+ bitmap_fill(cm->match_sff, (1 << CAN_SFF_ID_BITS));
+ return;
+ }
+
+ /*
+ * Individual frame filter.
+ * Add record (set bit to 1) for each ID that
+ * conforms particular rule
+ */
+ for (i = 0; i < (1 << CAN_SFF_ID_BITS); i++) {
+ if ((i & can_mask) == can_id)
+ set_bit(i, cm->match_sff);
+ }
+}
+
+static inline struct canid_match *em_canid_priv(struct tcf_ematch *m)
+{
+ return (struct canid_match *)m->data;
+}
+
+static int em_canid_match(struct sk_buff *skb, struct tcf_ematch *m,
+ struct tcf_pkt_info *info)
+{
+ struct canid_match *cm = em_canid_priv(m);
+ canid_t can_id;
+ int match = 0;
+ int i;
+ const struct can_filter *lp;
+
+ can_id = em_canid_get_id(skb);
+
+ if (can_id & CAN_EFF_FLAG) {
+ for (i = 0, lp = cm->rules_raw;
+ i < cm->eff_rules_count; i++, lp++) {
+ if (!(((lp->can_id ^ can_id) & lp->can_mask))) {
+ match = 1;
+ break;
+ }
+ }
+ } else { /* SFF */
+ can_id &= CAN_SFF_MASK;
+ match = (test_bit(can_id, cm->match_sff) ? 1 : 0);
+ }
+
+ return match;
+}
+
+static int em_canid_change(struct tcf_proto *tp, void *data, int len,
+ struct tcf_ematch *m)
+{
+ struct can_filter *conf = data; /* Array with rules */
+ struct canid_match *cm;
+ struct canid_match *cm_old = (struct canid_match *)m->data;
+ int i;
+
+ if (!len)
+ return -EINVAL;
+
+ if (len % sizeof(struct can_filter))
+ return -EINVAL;
+
+ if (len > sizeof(struct can_filter) * EM_CAN_RULES_MAX)
+ return -EINVAL;
+
+ cm = kzalloc(sizeof(struct canid_match) + len, GFP_KERNEL);
+ if (!cm)
+ return -ENOMEM;
+
+ cm->rules_count = len / sizeof(struct can_filter);
+
+ /*
+ * We need two for() loops for copying rules into two contiguous
+ * areas in rules_raw to process all eff rules with a simple loop.
+ * NB: The configuration interface supports sff and eff rules.
+ * We do not support filters here that match for the same can_id
+ * provided in a SFF and EFF frame (e.g. 0x123 / 0x80000123).
+ * For this (unusual case) two filters have to be specified. The
+ * SFF/EFF separation is done with the CAN_EFF_FLAG in the can_id.
+ */
+
+ /* Fill rules_raw with EFF rules first */
+ for (i = 0; i < cm->rules_count; i++) {
+ if (conf[i].can_id & CAN_EFF_FLAG) {
+ memcpy(cm->rules_raw + cm->eff_rules_count,
+ &conf[i],
+ sizeof(struct can_filter));
+
+ cm->eff_rules_count++;
+ }
+ }
+
+ /* append SFF frame rules */
+ for (i = 0; i < cm->rules_count; i++) {
+ if (!(conf[i].can_id & CAN_EFF_FLAG)) {
+ memcpy(cm->rules_raw
+ + cm->eff_rules_count
+ + cm->sff_rules_count,
+ &conf[i], sizeof(struct can_filter));
+
+ cm->sff_rules_count++;
+
+ em_canid_sff_match_add(cm,
+ conf[i].can_id, conf[i].can_mask);
+ }
+ }
+
+ m->datalen = sizeof(struct canid_match) + len;
+ m->data = (unsigned long)cm;
+
+ if (cm_old != NULL) {
+ pr_err("canid: Configuring an existing ematch!\n");
+ kfree(cm_old);
+ }
+
+ return 0;
+}
+
+static void em_canid_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+ struct canid_match *cm = em_canid_priv(m);
+
+ kfree(cm);
+}
+
+static int em_canid_dump(struct sk_buff *skb, struct tcf_ematch *m)
+{
+ struct canid_match *cm = em_canid_priv(m);
+
+ /*
+ * When configuring this ematch 'rules_count' is set not to exceed
+ * 'rules_raw' array size
+ */
+ if (nla_put_nohdr(skb, sizeof(struct can_filter) * cm->rules_count,
+ &cm->rules_raw) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static struct tcf_ematch_ops em_canid_ops = {
+ .kind = TCF_EM_CANID,
+ .change = em_canid_change,
+ .match = em_canid_match,
+ .destroy = em_canid_destroy,
+ .dump = em_canid_dump,
+ .owner = THIS_MODULE,
+ .link = LIST_HEAD_INIT(em_canid_ops.link)
+};
+
+static int __init init_em_canid(void)
+{
+ return tcf_em_register(&em_canid_ops);
+}
+
+static void __exit exit_em_canid(void)
+{
+ tcf_em_unregister(&em_canid_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_canid);
+module_exit(exit_em_canid);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_CANID);
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
new file mode 100644
index 00000000000..3130320997e
--- /dev/null
+++ b/net/sched/em_ipset.c
@@ -0,0 +1,135 @@
+/*
+ * net/sched/em_ipset.c ipset ematch
+ *
+ * Copyright (c) 2012 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/xt_set.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+#include <net/pkt_cls.h>
+
+static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
+ struct tcf_ematch *em)
+{
+ struct xt_set_info *set = data;
+ ip_set_id_t index;
+
+ if (data_len != sizeof(*set))
+ return -EINVAL;
+
+ index = ip_set_nfnl_get_byindex(set->index);
+ if (index == IPSET_INVALID_ID)
+ return -ENOENT;
+
+ em->datalen = sizeof(*set);
+ em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL);
+ if (em->data)
+ return 0;
+
+ ip_set_nfnl_put(index);
+ return -ENOMEM;
+}
+
+static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
+{
+ const struct xt_set_info *set = (const void *) em->data;
+ if (set) {
+ ip_set_nfnl_put(set->index);
+ kfree((void *) em->data);
+ }
+}
+
+static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
+ struct tcf_pkt_info *info)
+{
+ struct ip_set_adt_opt opt;
+ struct xt_action_param acpar;
+ const struct xt_set_info *set = (const void *) em->data;
+ struct net_device *dev, *indev = NULL;
+ int ret, network_offset;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ acpar.family = NFPROTO_IPV4;
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ return 0;
+ acpar.thoff = ip_hdrlen(skb);
+ break;
+ case htons(ETH_P_IPV6):
+ acpar.family = NFPROTO_IPV6;
+ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ return 0;
+ /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
+ acpar.thoff = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return 0;
+ }
+
+ acpar.hooknum = 0;
+
+ opt.family = acpar.family;
+ opt.dim = set->dim;
+ opt.flags = set->flags;
+ opt.cmdflags = 0;
+ opt.timeout = ~0u;
+
+ network_offset = skb_network_offset(skb);
+ skb_pull(skb, network_offset);
+
+ dev = skb->dev;
+
+ rcu_read_lock();
+
+ if (dev && skb->skb_iif)
+ indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif);
+
+ acpar.in = indev ? indev : dev;
+ acpar.out = dev;
+
+ ret = ip_set_test(set->index, skb, &acpar, &opt);
+
+ rcu_read_unlock();
+
+ skb_push(skb, network_offset);
+ return ret;
+}
+
+static struct tcf_ematch_ops em_ipset_ops = {
+ .kind = TCF_EM_IPSET,
+ .change = em_ipset_change,
+ .destroy = em_ipset_destroy,
+ .match = em_ipset_match,
+ .owner = THIS_MODULE,
+ .link = LIST_HEAD_INIT(em_ipset_ops.link)
+};
+
+static int __init init_em_ipset(void)
+{
+ return tcf_em_register(&em_ipset_ops);
+}
+
+static void __exit exit_em_ipset(void)
+{
+ tcf_em_unregister(&em_ipset_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("TC extended match for IP sets");
+
+module_init(init_em_ipset);
+module_exit(exit_em_ipset);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPSET);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 4790c696cbc..4ab6e332557 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif)
if (unlikely(skb_rtable(skb) == NULL))
*err = -1;
else
- dst->value = skb_rtable(skb)->rt_iif;
+ dst->value = inet_iif(skb);
}
/**************************************************************************
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 085ce53d570..a08b4ab3e42 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -973,7 +973,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm = NLMSG_DATA(n);
+ struct tcmsg *tcm = nlmsg_data(n);
struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
u32 clid = tcm->tcm_parent;
@@ -1046,7 +1046,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
replay:
/* Reinit, just in case something touches this. */
- tcm = NLMSG_DATA(n);
+ tcm = nlmsg_data(n);
clid = tcm->tcm_parent;
q = p = NULL;
@@ -1193,8 +1193,10 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
struct gnet_dump d;
struct qdisc_size_table *stab;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
- tcm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
@@ -1230,7 +1232,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
-nlmsg_failure:
+out_nlmsg_trim:
nla_put_failure:
nlmsg_trim(skb, b);
return -1;
@@ -1366,7 +1368,7 @@ done:
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm = NLMSG_DATA(n);
+ struct tcmsg *tcm = nlmsg_data(n);
struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
struct Qdisc *q = NULL;
@@ -1498,8 +1500,10 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
struct gnet_dump d;
const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
- tcm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
@@ -1525,7 +1529,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
-nlmsg_failure:
+out_nlmsg_trim:
nla_put_failure:
nlmsg_trim(skb, b);
return -1;
@@ -1616,7 +1620,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
struct net *net = sock_net(skb->sk);
struct netdev_queue *dev_queue;
struct net_device *dev;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c412ad0d030..298c0ddfb57 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -380,7 +380,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
- skb_orphan(skb);
+ /* If a delay is expected, orphan the skb. (orphaning usually takes
+ * place at TX completion time, so _before_ the link transit delay)
+ * Ideally, this orphaning should be done after the rate limiting
+ * module, because this breaks TCP Small Queue, and other mechanisms
+ * based on socket sk_wmem_alloc.
+ */
+ if (q->latency || q->jitter)
+ skb_orphan(skb);
/*
* If we need to duplicate packet, then re-insert at top of the
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9af01f3df18..e4723d31fdd 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -203,6 +203,34 @@ out:
return index;
}
+/* Length of the next packet (0 if the queue is empty). */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = sch->ops->peek(sch);
+ return skb ? qdisc_pkt_len(skb) : 0;
+}
+
+static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *);
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+ unsigned int len);
+
+static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
+ u32 lmax, u32 inv_w, int delta_w)
+{
+ int i;
+
+ /* update qfq-specific data */
+ cl->lmax = lmax;
+ cl->inv_w = inv_w;
+ i = qfq_calc_index(cl->inv_w, cl->lmax);
+
+ cl->grp = &q->groups[i];
+
+ q->wsum += delta_w;
+}
+
static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct nlattr **tca, unsigned long *arg)
{
@@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
lmax = 1UL << QFQ_MTU_SHIFT;
if (cl != NULL) {
+ bool need_reactivation = false;
+
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
@@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
- if (inv_w != cl->inv_w) {
- sch_tree_lock(sch);
- q->wsum += delta_w;
- cl->inv_w = inv_w;
- sch_tree_unlock(sch);
+ if (lmax == cl->lmax && inv_w == cl->inv_w)
+ return 0; /* nothing to update */
+
+ i = qfq_calc_index(inv_w, lmax);
+ sch_tree_lock(sch);
+ if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+ /*
+ * shift cl->F back, to not charge the
+ * class for the not-yet-served head
+ * packet
+ */
+ cl->F = cl->S;
+ /* remove class from its slot in the old group */
+ qfq_deactivate_class(q, cl);
+ need_reactivation = true;
}
+
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+ if (need_reactivation) /* activate in new group */
+ qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+ sch_tree_unlock(sch);
+
return 0;
}
@@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->refcnt = 1;
cl->common.classid = classid;
- cl->lmax = lmax;
- cl->inv_w = inv_w;
- i = qfq_calc_index(cl->inv_w, cl->lmax);
- cl->grp = &q->groups[i];
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, classid);
@@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
}
- q->wsum += weight;
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
@@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
}
}
-/* What is length of next packet in queue (0 if queue is empty) */
-static unsigned int qdisc_peek_len(struct Qdisc *sch)
-{
- struct sk_buff *skb;
-
- skb = sch->ops->peek(sch);
- return skb ? qdisc_pkt_len(skb) : 0;
-}
-
/*
* Updates the class, returns true if also the group needs to be updated.
*/
@@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_group *grp;
struct qfq_class *cl;
int err;
- u64 roundedS;
- int s;
cl = qfq_classify(skb, sch, &err);
if (cl == NULL) {
@@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
/* If reach this point, queue q was idle */
- grp = cl->grp;
+ qfq_activate_class(q, cl, qdisc_pkt_len(skb));
+
+ return err;
+}
+
+/*
+ * Handle class switch from idle to backlogged.
+ */
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+ unsigned int pkt_len)
+{
+ struct qfq_group *grp = cl->grp;
+ u64 roundedS;
+ int s;
+
qfq_update_start(q, cl);
/* compute new finish time and rounded start. */
- cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+ cl->F = cl->S + (u64)pkt_len * cl->inv_w;
roundedS = qfq_round_down(cl->S, grp->slot_shift);
/*
@@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skip_update:
qfq_slot_insert(grp, cl, roundedS);
-
- return err;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index ca0c29695d5..47416716294 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -67,7 +67,6 @@ struct teql_master {
struct teql_sched_data {
struct Qdisc *next;
struct teql_master *m;
- struct neighbour *ncache;
struct sk_buff_head q;
};
@@ -134,7 +133,6 @@ teql_reset(struct Qdisc *sch)
skb_queue_purge(&dat->q);
sch->q.qlen = 0;
- teql_neigh_release(xchg(&dat->ncache, NULL));
}
static void
@@ -166,7 +164,6 @@ teql_destroy(struct Qdisc *sch)
}
}
skb_queue_purge(&dat->q);
- teql_neigh_release(xchg(&dat->ncache, NULL));
break;
}
@@ -225,21 +222,25 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
static int
__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
struct net_device *dev, struct netdev_queue *txq,
- struct neighbour *mn)
+ struct dst_entry *dst)
{
- struct teql_sched_data *q = qdisc_priv(txq->qdisc);
- struct neighbour *n = q->ncache;
+ struct neighbour *n;
+ int err = 0;
- if (mn->tbl == NULL)
- return -EINVAL;
- if (n && n->tbl == mn->tbl &&
- memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
- atomic_inc(&n->refcnt);
- } else {
- n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
- if (IS_ERR(n))
- return PTR_ERR(n);
+ n = dst_neigh_lookup_skb(dst, skb);
+ if (!n)
+ return -ENOENT;
+
+ if (dst->dev != dev) {
+ struct neighbour *mn;
+
+ mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
+ neigh_release(n);
+ if (IS_ERR(mn))
+ return PTR_ERR(mn);
+ n = mn;
}
+
if (neigh_event_send(n, skb_res) == 0) {
int err;
char haddr[MAX_ADDR_LEN];
@@ -248,15 +249,13 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
NULL, skb->len);
- if (err < 0) {
- neigh_release(n);
- return -EINVAL;
- }
- teql_neigh_release(xchg(&q->ncache, n));
- return 0;
+ if (err < 0)
+ err = -EINVAL;
+ } else {
+ err = (skb_res == NULL) ? -EAGAIN : 1;
}
neigh_release(n);
- return (skb_res == NULL) ? -EAGAIN : 1;
+ return err;
}
static inline int teql_resolve(struct sk_buff *skb,
@@ -265,7 +264,6 @@ static inline int teql_resolve(struct sk_buff *skb,
struct netdev_queue *txq)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *mn;
int res;
if (txq->qdisc == &noop_qdisc)
@@ -275,8 +273,7 @@ static inline int teql_resolve(struct sk_buff *skb,
return 0;
rcu_read_lock();
- mn = dst_get_neighbour_noref(dst);
- res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
+ res = __teql_resolve(skb, skb_res, dev, txq, dst);
rcu_read_unlock();
return res;