diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 24 | ||||
-rw-r--r-- | net/sched/Makefile | 2 | ||||
-rw-r--r-- | net/sched/act_bpf.c | 208 | ||||
-rw-r--r-- | net/sched/act_connmark.c | 192 | ||||
-rw-r--r-- | net/sched/act_csum.c | 2 | ||||
-rw-r--r-- | net/sched/cls_basic.c | 7 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 18 | ||||
-rw-r--r-- | net/sched/cls_flow.c | 8 | ||||
-rw-r--r-- | net/sched/em_ipset.c | 2 | ||||
-rw-r--r-- | net/sched/em_meta.c | 4 | ||||
-rw-r--r-- | net/sched/sch_api.c | 2 | ||||
-rw-r--r-- | net/sched/sch_dsmark.c | 6 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 33 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 11 |
14 files changed, 481 insertions, 38 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index c54c9d9d1ff..899d0319f2b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -698,6 +698,30 @@ config NET_ACT_VLAN To compile this code as a module, choose M here: the module will be called act_vlan. +config NET_ACT_BPF + tristate "BPF based action" + depends on NET_CLS_ACT + ---help--- + Say Y here to execute BPF code on packets. The BPF code will decide + if the packet should be dropped or not. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_bpf. + +config NET_ACT_CONNMARK + tristate "Netfilter Connection Mark Retriever" + depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NF_CONNTRACK && NF_CONNTRACK_MARK + ---help--- + Say Y here to allow retrieving of conn mark + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_connmark. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 679f24ae7f9..7ca7f4c1b8c 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o +obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o +obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c new file mode 100644 index 00000000000..82c5d7fc198 --- /dev/null +++ b/net/sched/act_bpf.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/filter.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_bpf.h> +#include <net/tc_act/tc_bpf.h> + +#define BPF_TAB_MASK 15 + +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_bpf *b = a->priv; + int action; + int filter_res; + + spin_lock(&b->tcf_lock); + b->tcf_tm.lastuse = jiffies; + bstats_update(&b->tcf_bstats, skb); + action = b->tcf_action; + + filter_res = BPF_PROG_RUN(b->filter, skb); + if (filter_res == 0) { + /* Return code 0 from the BPF program + * is being interpreted as a drop here. + */ + action = TC_ACT_SHOT; + b->tcf_qstats.drops++; + } + + spin_unlock(&b->tcf_lock); + return action; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *tp = skb_tail_pointer(skb); + struct tcf_bpf *b = a->priv; + struct tc_act_bpf opt = { + .index = b->tcf_index, + .refcnt = b->tcf_refcnt - ref, + .bindcnt = b->tcf_bindcnt - bind, + .action = b->tcf_action, + }; + struct tcf_t t; + struct nlattr *nla; + + if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) + goto nla_put_failure; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * + sizeof(struct sock_filter)); + if (!nla) + goto nla_put_failure; + + memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + + t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(b->tcf_tm.expires); + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, tp); + return -1; +} + +static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { + [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, + [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, + .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, +}; + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *b; + u16 bpf_size, bpf_num_ops; + struct sock_filter *bpf_ops; + struct sock_fprog_kern tmp; + struct bpf_prog *fp; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + if (!tb[TCA_ACT_BPF_PARMS] || + !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) + return -EINVAL; + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; + + bpf_size = bpf_num_ops * sizeof(*bpf_ops); + if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) + return -EINVAL; + + bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + if (!bpf_ops) + return -ENOMEM; + + memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); + + tmp.len = bpf_num_ops; + tmp.filter = bpf_ops; + + ret = bpf_prog_create(&fp, &tmp); + if (ret) + goto free_bpf_ops; + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); + if (ret) + goto destroy_fp; + + ret = ACT_P_CREATED; + } else { + if (bind) + goto destroy_fp; + tcf_hash_release(a, bind); + if (!ovr) { + ret = -EEXIST; + goto destroy_fp; + } + } + + b = to_bpf(a); + spin_lock_bh(&b->tcf_lock); + b->tcf_action = parm->action; + b->bpf_num_ops = bpf_num_ops; + b->bpf_ops = bpf_ops; + b->filter = fp; + spin_unlock_bh(&b->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(a); + return ret; + +destroy_fp: + bpf_prog_destroy(fp); +free_bpf_ops: + kfree(bpf_ops); + return ret; +} + +static void tcf_bpf_cleanup(struct tc_action *a, int bind) +{ + struct tcf_bpf *b = a->priv; + + bpf_prog_destroy(b->filter); +} + +static struct tc_action_ops act_bpf_ops = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, +}; + +static int __init bpf_init_module(void) +{ + return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); +} + +static void __exit bpf_cleanup_module(void) +{ + tcf_unregister_action(&act_bpf_ops); +} + +module_init(bpf_init_module); +module_exit(bpf_cleanup_module); + +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("TC BPF based action"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c new file mode 100644 index 00000000000..8e472518f9f --- /dev/null +++ b/net/sched/act_connmark.c @@ -0,0 +1,192 @@ +/* + * net/sched/act_connmark.c netfilter connmark retriever action + * skb mark is over-written + * + * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/pkt_cls.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/act_api.h> +#include <uapi/linux/tc_act/tc_connmark.h> +#include <net/tc_act/tc_connmark.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> + +#define CONNMARK_TAB_MASK 3 + +static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + const struct nf_conntrack_tuple_hash *thash; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + struct tcf_connmark_info *ca = a->priv; + struct nf_conn *c; + int proto; + + spin_lock(&ca->tcf_lock); + ca->tcf_tm.lastuse = jiffies; + bstats_update(&ca->tcf_bstats, skb); + + if (skb->protocol == htons(ETH_P_IP)) { + if (skb->len < sizeof(struct iphdr)) + goto out; + + proto = NFPROTO_IPV4; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (skb->len < sizeof(struct ipv6hdr)) + goto out; + + proto = NFPROTO_IPV6; + } else { + goto out; + } + + c = nf_ct_get(skb, &ctinfo); + if (c) { + skb->mark = c->mark; + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + nf_ct_put(c); + goto out; + } + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + proto, &tuple)) + goto out; + + thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple); + if (!thash) + goto out; + + c = nf_ct_tuplehash_to_ctrack(thash); + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + skb->mark = c->mark; + nf_ct_put(c); + +out: + skb->nfct = NULL; + spin_unlock(&ca->tcf_lock); + return ca->tcf_action; +} + +static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { + [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) }, +}; + +static int tcf_connmark_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_connmark_info *ci; + struct tc_connmark *parm; + int ret = 0; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy); + if (ret < 0) + return ret; + + parm = nla_data(tb[TCA_CONNMARK_PARMS]); + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind); + if (ret) + return ret; + + ci = to_connmark(a); + ci->tcf_action = parm->action; + ci->zone = parm->zone; + + tcf_hash_insert(a); + ret = ACT_P_CREATED; + } else { + ci = to_connmark(a); + if (bind) + return 0; + tcf_hash_release(a, bind); + if (!ovr) + return -EEXIST; + /* replacing action and zone */ + ci->tcf_action = parm->action; + ci->zone = parm->zone; + } + + return ret; +} + +static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_connmark_info *ci = a->priv; + + struct tc_connmark opt = { + .index = ci->tcf_index, + .refcnt = ci->tcf_refcnt - ref, + .bindcnt = ci->tcf_bindcnt - bind, + .action = ci->tcf_action, + .zone = ci->zone, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); + if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_connmark_ops = { + .kind = "connmark", + .type = TCA_ACT_CONNMARK, + .owner = THIS_MODULE, + .act = tcf_connmark, + .dump = tcf_connmark_dump, + .init = tcf_connmark_init, +}; + +static int __init connmark_init_module(void) +{ + return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK); +} + +static void __exit connmark_cleanup_module(void) +{ + tcf_unregister_action(&act_connmark_ops); +} + +module_init(connmark_init_module); +module_exit(connmark_cleanup_module); +MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); +MODULE_DESCRIPTION("Connection tracking mark restoring"); +MODULE_LICENSE("GPL"); + diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index edbf40dac70..4cd5cf1aedf 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb, if (unlikely(action == TC_ACT_SHOT)) goto drop; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5aed341406c..fc399db86f1 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) if (head == NULL) return 0UL; - list_for_each_entry(f, &head->flist, link) - if (f->handle == handle) + list_for_each_entry(f, &head->flist, link) { + if (f->handle == handle) { l = (unsigned long) f; + break; + } + } return l; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index f59adf8a4cd..5f3ee9e4b5b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -37,7 +37,7 @@ struct cls_bpf_prog { struct tcf_result res; struct list_head link; u32 handle; - u16 bpf_len; + u16 bpf_num_ops; struct tcf_proto *tp; struct rcu_head rcu; }; @@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct tcf_exts exts; struct sock_fprog_kern tmp; struct bpf_prog *fp; - u16 bpf_size, bpf_len; + u16 bpf_size, bpf_num_ops; u32 classid; int ret; @@ -173,13 +173,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return ret; classid = nla_get_u32(tb[TCA_BPF_CLASSID]); - bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { + bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { ret = -EINVAL; goto errout; } - bpf_size = bpf_len * sizeof(*bpf_ops); + bpf_size = bpf_num_ops * sizeof(*bpf_ops); if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { ret = -EINVAL; goto errout; @@ -193,14 +193,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_len; + tmp.len = bpf_num_ops; tmp.filter = bpf_ops; ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; - prog->bpf_len = bpf_len; + prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; prog->filter = fp; prog->res.classid = classid; @@ -314,10 +314,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * sizeof(struct sock_filter)); if (nla == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 15d68f24a52..461410394d0 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->dst) return ntohl(flow->dst); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) @@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys if (flow->ports) return ntohs(flow->port16[1]); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -156,7 +156,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 5b4a4efe468..a3d79c8bf3b 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct net_device *dev, *indev = NULL; int ret, network_offset; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): acpar.family = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index c8f8c399b99..b5294ce20cd 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = vlan_tx_tag_get(skb); + tag = skb_vlan_tag_get(skb); if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else @@ -197,7 +197,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = skb->protocol; + dst->value = tc_skb_protocol(skb); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 76f402e05bd..243b7d169d6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1807,7 +1807,7 @@ done: int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - __be16 protocol = skb->protocol; + __be16 protocol = tc_skb_protocol(skb); int err; for (; tp; tp = rcu_dereference_bh(tp->next)) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 227114f27f9..66700a6116a 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); @@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mask[index] != 0xff || p->value[index]) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb->protocol)); + __func__, ntohs(tc_skb_protocol(skb))); break; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 333cd94ba38..dfcea20e317 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1,7 +1,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -52,6 +52,7 @@ #include <net/pkt_sched.h> #include <net/sock.h> #include <net/tcp_states.h> +#include <net/tcp.h> /* * Per flow structure, dynamically allocated @@ -92,6 +93,7 @@ struct fq_sched_data { u32 flow_refill_delay; u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ + u32 orphan_mask; /* mask for orphaned skb */ struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -222,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) return &q->internal; - if (unlikely(!sk)) { + /* SYNACK messages are attached to a listener socket. + * 1) They are not part of a 'flow' yet + * 2) We do not want to rate limit them (eg SYNFLOOD attack), + * especially if the listener set SO_MAX_PACING_RATE + * 3) We pretend they are orphaned + */ + if (!sk || sk->sk_state == TCP_LISTEN) { + unsigned long hash = skb_get_hash(skb) & q->orphan_mask; + /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ - sk = (struct sock *)(skb_get_hash(skb) | 1L); + sk = (struct sock *)((hash << 1) | 1UL); + skb_orphan(skb); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -445,7 +456,9 @@ begin: goto begin; } - if (unlikely(f->head && now < f->time_next_packet)) { + skb = f->head; + if (unlikely(skb && now < f->time_next_packet && + !skb_is_tcp_pure_ack(skb))) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; @@ -464,14 +477,17 @@ begin: goto begin; } prefetch(&skb->end); - f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; + /* Do not pace locally generated ack packets */ + if (skb_is_tcp_pure_ack(skb)) + goto out; + rate = q->flow_max_rate; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { @@ -704,6 +720,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } + if (tb[TCA_FQ_ORPHAN_MASK]) + q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); + if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); @@ -749,6 +768,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); + q->orphan_mask = 1024 - 1; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -778,6 +798,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || + nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6ada42396a2..e02687185a5 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch) return NULL; } -static inline void -teql_neigh_release(struct neighbour *n) -{ - if (n) - neigh_release(n); -} - static void teql_reset(struct Qdisc *sch) { @@ -249,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, - NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + haddr, NULL, skb->len); if (err < 0) err = -EINVAL; |