diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 20 | ||||
-rw-r--r-- | net/sched/Makefile | 2 | ||||
-rw-r--r-- | net/sched/act_api.c | 59 | ||||
-rw-r--r-- | net/sched/act_gact.c | 14 | ||||
-rw-r--r-- | net/sched/act_ipt.c | 7 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 11 | ||||
-rw-r--r-- | net/sched/act_pedit.c | 5 | ||||
-rw-r--r-- | net/sched/act_simple.c | 5 | ||||
-rw-r--r-- | net/sched/cls_api.c | 12 | ||||
-rw-r--r-- | net/sched/cls_route.c | 2 | ||||
-rw-r--r-- | net/sched/em_canid.c | 240 | ||||
-rw-r--r-- | net/sched/em_ipset.c | 135 | ||||
-rw-r--r-- | net/sched/em_meta.c | 2 | ||||
-rw-r--r-- | net/sched/sch_api.c | 24 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 9 | ||||
-rw-r--r-- | net/sched/sch_qfq.c | 95 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 47 |
17 files changed, 580 insertions, 109 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e7a8976bf25..62fb51face8 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -507,6 +507,26 @@ config NET_EMATCH_TEXT To compile this code as a module, choose M here: the module will be called em_text. +config NET_EMATCH_CANID + tristate "CAN Identifier" + depends on NET_EMATCH && CAN + ---help--- + Say Y here if you want to be able to classify CAN frames based + on CAN Identifier. + + To compile this code as a module, choose M here: the + module will be called em_canid. + +config NET_EMATCH_IPSET + tristate "IPset" + depends on NET_EMATCH && IP_SET + ---help--- + Say Y here if you want to be able to classify packets based on + ipset membership. + + To compile this code as a module, choose M here: the + module will be called em_ipset. + config NET_CLS_ACT bool "Actions" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 5940a1992f0..978cbf004e8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -55,3 +55,5 @@ obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o obj-$(CONFIG_NET_EMATCH_META) += em_meta.o obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o +obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o +obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5cfb160df06..e3d2c78cb52 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -652,27 +652,27 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); - - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + if (!nlh) + goto out_nlmsg_trim; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_nlmsg_trim; if (tcf_action_dump(skb, a, bind, ref) < 0) - goto nla_put_failure; + goto out_nlmsg_trim; nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nla_put_failure: -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, b); return -1; } @@ -799,19 +799,21 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t)); - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + if (!nlh) + goto out_module_put; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_module_put; err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); if (err < 0) - goto nla_put_failure; + goto out_module_put; if (err == 0) goto noflush_out; @@ -828,8 +830,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, return err; -nla_put_failure: -nlmsg_failure: +out_module_put: module_put(a->ops->owner); err_out: noflush_out: @@ -919,18 +920,20 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + if (!nlh) + goto out_kfree_skb; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_kfree_skb; if (tcf_action_dump(skb, a, 0, 0) < 0) - goto nla_put_failure; + goto out_kfree_skb; nla_nest_end(skb, nest); @@ -942,8 +945,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, err = 0; return err; -nla_put_failure: -nlmsg_failure: +out_kfree_skb: kfree_skb(skb); return -1; } @@ -1062,7 +1064,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; struct tc_action a; int ret = 0; - struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); + struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); struct nlattr *kind = find_dump_kind(cb->nlh); if (kind == NULL) { @@ -1080,23 +1082,25 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o->walk == NULL) { WARN(1, "tc_dump_action: %s !capable of dumping table\n", a_o->kind); - goto nla_put_failure; + goto out_module_put; } - nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, sizeof(*t)); - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*t), 0); + if (!nlh) + goto out_module_put; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_module_put; ret = a_o->walk(skb, cb, RTM_GETACTION, &a); if (ret < 0) - goto nla_put_failure; + goto out_module_put; if (ret > 0) { nla_nest_end(skb, nest); @@ -1110,8 +1114,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) module_put(a_o->owner); return skb->len; -nla_put_failure: -nlmsg_failure: +out_module_put: module_put(a_o->owner); nlmsg_trim(skb, b); return skb->len; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index f10fb825644..05d60859d8e 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, struct tcf_common *pc; int ret = 0; int err; +#ifdef CONFIG_GACT_PROB + struct tc_gact_p *p_parm = NULL; +#endif if (nla == NULL) return -EINVAL; @@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) return -EOPNOTSUPP; +#else + if (tb[TCA_GACT_PROB]) { + p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm->ptype >= MAX_RAND) + return -EINVAL; + } #endif pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); @@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, spin_lock_bh(&gact->tcf_lock); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB - if (tb[TCA_GACT_PROB] != NULL) { - struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm) { gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = p_parm->pval; gact->tcfg_ptype = p_parm->ptype; @@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + if (gact->tcfg_ptype) action = gact_rand[gact->tcfg_ptype](gact); else action = gact->tcf_action; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60e281ad0f0..58fb3c7aab9 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -185,7 +185,12 @@ err3: err2: kfree(tname); err1: - kfree(pc); + if (ret == ACT_P_CREATED) { + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); + } return err; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fe81cc18e9e..9c0fd0c7881 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, out: if (err) { m->tcf_qstats.overlimits++; - /* should we be asking for packet to be dropped? - * may make sense for redirect case only - */ - retval = TC_ACT_SHOT; - } else { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + retval = TC_ACT_SHOT; + else + retval = m->tcf_action; + } else retval = m->tcf_action; - } spin_unlock(&m->tcf_lock); return retval; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 26aa2f6ce25..45c53ab067a 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(pc); + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); return -ENOMEM; } ret = ACT_P_CREATED; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3922f2a2821..3714f60f0b3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, d = to_defact(pc); ret = alloc_defdata(d, defdata); if (ret < 0) { - kfree(pc); + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); return ret; } d->tcf_action = parm->action; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f452f696b4b..6dd1131f2ec 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -140,7 +140,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) int tp_created = 0; replay: - t = NLMSG_DATA(n); + t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); nprio = prio; @@ -349,8 +349,10 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); - tcm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; @@ -368,7 +370,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; @@ -418,7 +420,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct Qdisc *q; struct tcf_proto *tp, **chain; - struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); + struct tcmsg *tcm = nlmsg_data(cb->nlh); unsigned long cl = 0; const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 36fec422740..44f405cb9aa 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (head == NULL) goto old_method; - iif = ((struct rtable *)dst)->rt_iif; + iif = inet_iif(skb); h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c new file mode 100644 index 00000000000..bfd34e4c1af --- /dev/null +++ b/net/sched/em_canid.c @@ -0,0 +1,240 @@ +/* + * em_canid.c Ematch rule to match CAN frames according to their CAN IDs + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Idea: Oliver Hartkopp <oliver.hartkopp@volkswagen.de> + * Copyright: (c) 2011 Czech Technical University in Prague + * (c) 2011 Volkswagen Group Research + * Authors: Michal Sojka <sojkam1@fel.cvut.cz> + * Pavel Pisa <pisa@cmp.felk.cvut.cz> + * Rostislav Lisovy <lisovy@gmail.cz> + * Funded by: Volkswagen Group Research + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <net/pkt_cls.h> +#include <linux/can.h> + +#define EM_CAN_RULES_MAX 500 + +struct canid_match { + /* For each SFF CAN ID (11 bit) there is one record in this bitfield */ + DECLARE_BITMAP(match_sff, (1 << CAN_SFF_ID_BITS)); + + int rules_count; + int sff_rules_count; + int eff_rules_count; + + /* + * Raw rules copied from netlink message; Used for sending + * information to userspace (when 'tc filter show' is invoked) + * AND when matching EFF frames + */ + struct can_filter rules_raw[]; +}; + +/** + * em_canid_get_id() - Extracts Can ID out of the sk_buff structure. + */ +static canid_t em_canid_get_id(struct sk_buff *skb) +{ + /* CAN ID is stored within the data field */ + struct can_frame *cf = (struct can_frame *)skb->data; + + return cf->can_id; +} + +static void em_canid_sff_match_add(struct canid_match *cm, u32 can_id, + u32 can_mask) +{ + int i; + + /* + * Limit can_mask and can_id to SFF range to + * protect against write after end of array + */ + can_mask &= CAN_SFF_MASK; + can_id &= can_mask; + + /* Single frame */ + if (can_mask == CAN_SFF_MASK) { + set_bit(can_id, cm->match_sff); + return; + } + + /* All frames */ + if (can_mask == 0) { + bitmap_fill(cm->match_sff, (1 << CAN_SFF_ID_BITS)); + return; + } + + /* + * Individual frame filter. + * Add record (set bit to 1) for each ID that + * conforms particular rule + */ + for (i = 0; i < (1 << CAN_SFF_ID_BITS); i++) { + if ((i & can_mask) == can_id) + set_bit(i, cm->match_sff); + } +} + +static inline struct canid_match *em_canid_priv(struct tcf_ematch *m) +{ + return (struct canid_match *)m->data; +} + +static int em_canid_match(struct sk_buff *skb, struct tcf_ematch *m, + struct tcf_pkt_info *info) +{ + struct canid_match *cm = em_canid_priv(m); + canid_t can_id; + int match = 0; + int i; + const struct can_filter *lp; + + can_id = em_canid_get_id(skb); + + if (can_id & CAN_EFF_FLAG) { + for (i = 0, lp = cm->rules_raw; + i < cm->eff_rules_count; i++, lp++) { + if (!(((lp->can_id ^ can_id) & lp->can_mask))) { + match = 1; + break; + } + } + } else { /* SFF */ + can_id &= CAN_SFF_MASK; + match = (test_bit(can_id, cm->match_sff) ? 1 : 0); + } + + return match; +} + +static int em_canid_change(struct tcf_proto *tp, void *data, int len, + struct tcf_ematch *m) +{ + struct can_filter *conf = data; /* Array with rules */ + struct canid_match *cm; + struct canid_match *cm_old = (struct canid_match *)m->data; + int i; + + if (!len) + return -EINVAL; + + if (len % sizeof(struct can_filter)) + return -EINVAL; + + if (len > sizeof(struct can_filter) * EM_CAN_RULES_MAX) + return -EINVAL; + + cm = kzalloc(sizeof(struct canid_match) + len, GFP_KERNEL); + if (!cm) + return -ENOMEM; + + cm->rules_count = len / sizeof(struct can_filter); + + /* + * We need two for() loops for copying rules into two contiguous + * areas in rules_raw to process all eff rules with a simple loop. + * NB: The configuration interface supports sff and eff rules. + * We do not support filters here that match for the same can_id + * provided in a SFF and EFF frame (e.g. 0x123 / 0x80000123). + * For this (unusual case) two filters have to be specified. The + * SFF/EFF separation is done with the CAN_EFF_FLAG in the can_id. + */ + + /* Fill rules_raw with EFF rules first */ + for (i = 0; i < cm->rules_count; i++) { + if (conf[i].can_id & CAN_EFF_FLAG) { + memcpy(cm->rules_raw + cm->eff_rules_count, + &conf[i], + sizeof(struct can_filter)); + + cm->eff_rules_count++; + } + } + + /* append SFF frame rules */ + for (i = 0; i < cm->rules_count; i++) { + if (!(conf[i].can_id & CAN_EFF_FLAG)) { + memcpy(cm->rules_raw + + cm->eff_rules_count + + cm->sff_rules_count, + &conf[i], sizeof(struct can_filter)); + + cm->sff_rules_count++; + + em_canid_sff_match_add(cm, + conf[i].can_id, conf[i].can_mask); + } + } + + m->datalen = sizeof(struct canid_match) + len; + m->data = (unsigned long)cm; + + if (cm_old != NULL) { + pr_err("canid: Configuring an existing ematch!\n"); + kfree(cm_old); + } + + return 0; +} + +static void em_canid_destroy(struct tcf_proto *tp, struct tcf_ematch *m) +{ + struct canid_match *cm = em_canid_priv(m); + + kfree(cm); +} + +static int em_canid_dump(struct sk_buff *skb, struct tcf_ematch *m) +{ + struct canid_match *cm = em_canid_priv(m); + + /* + * When configuring this ematch 'rules_count' is set not to exceed + * 'rules_raw' array size + */ + if (nla_put_nohdr(skb, sizeof(struct can_filter) * cm->rules_count, + &cm->rules_raw) < 0) + return -EMSGSIZE; + + return 0; +} + +static struct tcf_ematch_ops em_canid_ops = { + .kind = TCF_EM_CANID, + .change = em_canid_change, + .match = em_canid_match, + .destroy = em_canid_destroy, + .dump = em_canid_dump, + .owner = THIS_MODULE, + .link = LIST_HEAD_INIT(em_canid_ops.link) +}; + +static int __init init_em_canid(void) +{ + return tcf_em_register(&em_canid_ops); +} + +static void __exit exit_em_canid(void) +{ + tcf_em_unregister(&em_canid_ops); +} + +MODULE_LICENSE("GPL"); + +module_init(init_em_canid); +module_exit(exit_em_canid); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_CANID); diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c new file mode 100644 index 00000000000..3130320997e --- /dev/null +++ b/net/sched/em_ipset.c @@ -0,0 +1,135 @@ +/* + * net/sched/em_ipset.c ipset ematch + * + * Copyright (c) 2012 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_set.h> +#include <linux/ipv6.h> +#include <net/ip.h> +#include <net/pkt_cls.h> + +static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, + struct tcf_ematch *em) +{ + struct xt_set_info *set = data; + ip_set_id_t index; + + if (data_len != sizeof(*set)) + return -EINVAL; + + index = ip_set_nfnl_get_byindex(set->index); + if (index == IPSET_INVALID_ID) + return -ENOENT; + + em->datalen = sizeof(*set); + em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL); + if (em->data) + return 0; + + ip_set_nfnl_put(index); + return -ENOMEM; +} + +static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) +{ + const struct xt_set_info *set = (const void *) em->data; + if (set) { + ip_set_nfnl_put(set->index); + kfree((void *) em->data); + } +} + +static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, + struct tcf_pkt_info *info) +{ + struct ip_set_adt_opt opt; + struct xt_action_param acpar; + const struct xt_set_info *set = (const void *) em->data; + struct net_device *dev, *indev = NULL; + int ret, network_offset; + + switch (skb->protocol) { + case htons(ETH_P_IP): + acpar.family = NFPROTO_IPV4; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return 0; + acpar.thoff = ip_hdrlen(skb); + break; + case htons(ETH_P_IPV6): + acpar.family = NFPROTO_IPV6; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ + acpar.thoff = sizeof(struct ipv6hdr); + break; + default: + return 0; + } + + acpar.hooknum = 0; + + opt.family = acpar.family; + opt.dim = set->dim; + opt.flags = set->flags; + opt.cmdflags = 0; + opt.timeout = ~0u; + + network_offset = skb_network_offset(skb); + skb_pull(skb, network_offset); + + dev = skb->dev; + + rcu_read_lock(); + + if (dev && skb->skb_iif) + indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif); + + acpar.in = indev ? indev : dev; + acpar.out = dev; + + ret = ip_set_test(set->index, skb, &acpar, &opt); + + rcu_read_unlock(); + + skb_push(skb, network_offset); + return ret; +} + +static struct tcf_ematch_ops em_ipset_ops = { + .kind = TCF_EM_IPSET, + .change = em_ipset_change, + .destroy = em_ipset_destroy, + .match = em_ipset_match, + .owner = THIS_MODULE, + .link = LIST_HEAD_INIT(em_ipset_ops.link) +}; + +static int __init init_em_ipset(void) +{ + return tcf_em_register(&em_ipset_ops); +} + +static void __exit exit_em_ipset(void) +{ + tcf_em_unregister(&em_ipset_ops); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("TC extended match for IP sets"); + +module_init(init_em_ipset); +module_exit(exit_em_ipset); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPSET); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4790c696cbc..4ab6e332557 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif) if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb_rtable(skb)->rt_iif; + dst->value = inet_iif(skb); } /************************************************************************** diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 085ce53d570..a08b4ab3e42 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -973,7 +973,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = NLMSG_DATA(n); + struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; u32 clid = tcm->tcm_parent; @@ -1046,7 +1046,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) replay: /* Reinit, just in case something touches this. */ - tcm = NLMSG_DATA(n); + tcm = nlmsg_data(n); clid = tcm->tcm_parent; q = p = NULL; @@ -1193,8 +1193,10 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); - tcm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; @@ -1230,7 +1232,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; @@ -1366,7 +1368,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = NLMSG_DATA(n); + struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct Qdisc *q = NULL; @@ -1498,8 +1500,10 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); - tcm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; @@ -1525,7 +1529,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; @@ -1616,7 +1620,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { - struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); + struct tcmsg *tcm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; struct net_device *dev; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c412ad0d030..298c0ddfb57 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -380,7 +380,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } - skb_orphan(skb); + /* If a delay is expected, orphan the skb. (orphaning usually takes + * place at TX completion time, so _before_ the link transit delay) + * Ideally, this orphaning should be done after the rate limiting + * module, because this breaks TCP Small Queue, and other mechanisms + * based on socket sk_wmem_alloc. + */ + if (q->latency || q->jitter) + skb_orphan(skb); /* * If we need to duplicate packet, then re-insert at top of the diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9af01f3df18..e4723d31fdd 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -203,6 +203,34 @@ out: return index; } +/* Length of the next packet (0 if the queue is empty). */ +static unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + + skb = sch->ops->peek(sch); + return skb ? qdisc_pkt_len(skb) : 0; +} + +static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *); +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int len); + +static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl, + u32 lmax, u32 inv_w, int delta_w) +{ + int i; + + /* update qfq-specific data */ + cl->lmax = lmax; + cl->inv_w = inv_w; + i = qfq_calc_index(cl->inv_w, cl->lmax); + + cl->grp = &q->groups[i]; + + q->wsum += delta_w; +} + static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) { @@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, lmax = 1UL << QFQ_MTU_SHIFT; if (cl != NULL) { + bool need_reactivation = false; + if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, &cl->rate_est, qdisc_root_sleeping_lock(sch), @@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } - if (inv_w != cl->inv_w) { - sch_tree_lock(sch); - q->wsum += delta_w; - cl->inv_w = inv_w; - sch_tree_unlock(sch); + if (lmax == cl->lmax && inv_w == cl->inv_w) + return 0; /* nothing to update */ + + i = qfq_calc_index(inv_w, lmax); + sch_tree_lock(sch); + if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) { + /* + * shift cl->F back, to not charge the + * class for the not-yet-served head + * packet + */ + cl->F = cl->S; + /* remove class from its slot in the old group */ + qfq_deactivate_class(q, cl); + need_reactivation = true; } + + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); + + if (need_reactivation) /* activate in new group */ + qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc)); + sch_tree_unlock(sch); + return 0; } @@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->common.classid = classid; - cl->lmax = lmax; - cl->inv_w = inv_w; - i = qfq_calc_index(cl->inv_w, cl->lmax); - cl->grp = &q->groups[i]; + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); @@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } } - q->wsum += weight; sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); @@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) } } -/* What is length of next packet in queue (0 if queue is empty) */ -static unsigned int qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - - skb = sch->ops->peek(sch); - return skb ? qdisc_pkt_len(skb) : 0; -} - /* * Updates the class, returns true if also the group needs to be updated. */ @@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_group *grp; struct qfq_class *cl; int err; - u64 roundedS; - int s; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; /* If reach this point, queue q was idle */ - grp = cl->grp; + qfq_activate_class(q, cl, qdisc_pkt_len(skb)); + + return err; +} + +/* + * Handle class switch from idle to backlogged. + */ +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int pkt_len) +{ + struct qfq_group *grp = cl->grp; + u64 roundedS; + int s; + qfq_update_start(q, cl); /* compute new finish time and rounded start. */ - cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; + cl->F = cl->S + (u64)pkt_len * cl->inv_w; roundedS = qfq_round_down(cl->S, grp->slot_shift); /* @@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) skip_update: qfq_slot_insert(grp, cl, roundedS); - - return err; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ca0c29695d5..47416716294 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -67,7 +67,6 @@ struct teql_master { struct teql_sched_data { struct Qdisc *next; struct teql_master *m; - struct neighbour *ncache; struct sk_buff_head q; }; @@ -134,7 +133,6 @@ teql_reset(struct Qdisc *sch) skb_queue_purge(&dat->q); sch->q.qlen = 0; - teql_neigh_release(xchg(&dat->ncache, NULL)); } static void @@ -166,7 +164,6 @@ teql_destroy(struct Qdisc *sch) } } skb_queue_purge(&dat->q); - teql_neigh_release(xchg(&dat->ncache, NULL)); break; } @@ -225,21 +222,25 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq, - struct neighbour *mn) + struct dst_entry *dst) { - struct teql_sched_data *q = qdisc_priv(txq->qdisc); - struct neighbour *n = q->ncache; + struct neighbour *n; + int err = 0; - if (mn->tbl == NULL) - return -EINVAL; - if (n && n->tbl == mn->tbl && - memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) { - atomic_inc(&n->refcnt); - } else { - n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev); - if (IS_ERR(n)) - return PTR_ERR(n); + n = dst_neigh_lookup_skb(dst, skb); + if (!n) + return -ENOENT; + + if (dst->dev != dev) { + struct neighbour *mn; + + mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); + neigh_release(n); + if (IS_ERR(mn)) + return PTR_ERR(mn); + n = mn; } + if (neigh_event_send(n, skb_res) == 0) { int err; char haddr[MAX_ADDR_LEN]; @@ -248,15 +249,13 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, NULL, skb->len); - if (err < 0) { - neigh_release(n); - return -EINVAL; - } - teql_neigh_release(xchg(&q->ncache, n)); - return 0; + if (err < 0) + err = -EINVAL; + } else { + err = (skb_res == NULL) ? -EAGAIN : 1; } neigh_release(n); - return (skb_res == NULL) ? -EAGAIN : 1; + return err; } static inline int teql_resolve(struct sk_buff *skb, @@ -265,7 +264,6 @@ static inline int teql_resolve(struct sk_buff *skb, struct netdev_queue *txq) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *mn; int res; if (txq->qdisc == &noop_qdisc) @@ -275,8 +273,7 @@ static inline int teql_resolve(struct sk_buff *skb, return 0; rcu_read_lock(); - mn = dst_get_neighbour_noref(dst); - res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + res = __teql_resolve(skb, skb_res, dev, txq, dst); rcu_read_unlock(); return res; |