diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 17 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/act_api.c | 8 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 3 | ||||
-rw-r--r-- | net/sched/act_nat.c | 322 | ||||
-rw-r--r-- | net/sched/act_police.c | 9 | ||||
-rw-r--r-- | net/sched/cls_api.c | 4 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 4 | ||||
-rw-r--r-- | net/sched/em_meta.c | 2 | ||||
-rw-r--r-- | net/sched/sch_api.c | 35 | ||||
-rw-r--r-- | net/sched/sch_atm.c | 3 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 4 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 31 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 6 | ||||
-rw-r--r-- | net/sched/sch_ingress.c | 3 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 15 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 53 | ||||
-rw-r--r-- | net/sched/sch_tbf.c | 4 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 12 |
19 files changed, 446 insertions, 90 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index d3f7c3f9407..92435a882fa 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -97,7 +97,7 @@ config NET_SCH_ATM select classes of this queuing discipline. Each class maps the flow(s) it is handling to a given virtual circuit. - See the top of <file:net/sched/sch_atm.c>) for more details. + See the top of <file:net/sched/sch_atm.c> for more details. To compile this code as a module, choose M here: the module will be called sch_atm. @@ -137,7 +137,7 @@ config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" ---help--- Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) - packet scheduling algorithm . + packet scheduling algorithm. See the top of <file:net/sched/sch_sfq.c> for more details. @@ -306,7 +306,7 @@ config NET_CLS_RSVP6 is important for real time data such as streaming sound or video. Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6. + on their RSVP requests and you are using the IPv6 protocol. To compile this code as a module, choose M here: the module will be called cls_rsvp6. @@ -447,6 +447,17 @@ config NET_ACT_IPT To compile this code as a module, choose M here: the module will be called ipt. +config NET_ACT_NAT + tristate "Stateless NAT" + depends on NET_CLS_ACT + select NETFILTER + ---help--- + Say Y here to do stateless NAT on IPv4 packets. You should use + netfilter for NAT unless you know what you are doing. + + To compile this code as a module, choose M here: the + module will be called nat. + config NET_ACT_PEDIT tristate "Packet Editing" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index b67c36f65cf..81ecbe8e7dc 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o +obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index feef366cad5..72cdb0fade2 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -68,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; struct rtattr *r ; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); s_i = cb->args[0]; @@ -96,7 +96,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, } } done: - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -156,13 +156,13 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { struct tcf_common *p; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; p = p->tcfc_next) { if (p->tcfc_index == index) break; } - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); return p; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 579578944ae..fd7bca4d5c2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_mirred.h> @@ -73,7 +74,7 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); if (parm->ifindex) { - dev = __dev_get_by_index(parm->ifindex); + dev = __dev_get_by_index(&init_net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c new file mode 100644 index 00000000000..c96273bcaf9 --- /dev/null +++ b/net/sched/act_nat.c @@ -0,0 +1,322 @@ +/* + * Stateless NAT actions + * + * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/tc_act/tc_nat.h> +#include <net/act_api.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/netlink.h> +#include <net/tc_act/tc_nat.h> +#include <net/tcp.h> +#include <net/udp.h> + + +#define NAT_TAB_MASK 15 +static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; +static u32 nat_idx_gen; +static DEFINE_RWLOCK(nat_lock); + +static struct tcf_hashinfo nat_hash_info = { + .htab = tcf_nat_ht, + .hmask = NAT_TAB_MASK, + .lock = &nat_lock, +}; + +static int tcf_nat_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_NAT_MAX]; + struct tc_nat *parm; + int ret = 0; + struct tcf_nat *p; + struct tcf_common *pc; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_NAT_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]); + + pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, + &nat_idx_gen, &nat_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + p = to_tcf_nat(pc); + ret = ACT_P_CREATED; + } else { + p = to_tcf_nat(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &nat_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&p->tcf_lock); + p->old_addr = parm->old_addr; + p->new_addr = parm->new_addr; + p->mask = parm->mask; + p->flags = parm->flags; + + p->tcf_action = parm->action; + spin_unlock_bh(&p->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &nat_hash_info); + + return ret; +} + +static int tcf_nat_cleanup(struct tc_action *a, int bind) +{ + struct tcf_nat *p = a->priv; + + return tcf_hash_release(&p->common, bind, &nat_hash_info); +} + +static int tcf_nat(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_nat *p = a->priv; + struct iphdr *iph; + __be32 old_addr; + __be32 new_addr; + __be32 mask; + __be32 addr; + int egress; + int action; + int ihl; + + spin_lock(&p->tcf_lock); + + p->tcf_tm.lastuse = jiffies; + old_addr = p->old_addr; + new_addr = p->new_addr; + mask = p->mask; + egress = p->flags & TCA_NAT_FLAG_EGRESS; + action = p->tcf_action; + + p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.packets++; + + spin_unlock(&p->tcf_lock); + + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(*iph))) + goto drop; + + iph = ip_hdr(skb); + + if (egress) + addr = iph->saddr; + else + addr = iph->daddr; + + if (!((old_addr ^ addr) & mask)) { + if (skb_cloned(skb) && + !skb_clone_writable(skb, sizeof(*iph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + new_addr &= mask; + new_addr |= addr & ~mask; + + /* Rewrite IP header */ + iph = ip_hdr(skb); + if (egress) + iph->saddr = new_addr; + else + iph->daddr = new_addr; + + nf_csum_replace4(&iph->check, addr, new_addr); + } + + ihl = iph->ihl * 4; + + /* It would be nice to share code with stateful NAT. */ + switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { + case IPPROTO_TCP: + { + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) || + (skb_cloned(skb) && + !skb_clone_writable(skb, ihl + sizeof(*tcph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto drop; + + tcph = (void *)(skb_network_header(skb) + ihl); + nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); + break; + } + case IPPROTO_UDP: + { + struct udphdr *udph; + + if (!pskb_may_pull(skb, ihl + sizeof(*udph)) || + (skb_cloned(skb) && + !skb_clone_writable(skb, ihl + sizeof(*udph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto drop; + + udph = (void *)(skb_network_header(skb) + ihl); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&udph->check, skb, addr, + new_addr, 1); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + break; + } + case IPPROTO_ICMP: + { + struct icmphdr *icmph; + + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); + + if ((icmph->type != ICMP_DEST_UNREACH) && + (icmph->type != ICMP_TIME_EXCEEDED) && + (icmph->type != ICMP_PARAMETERPROB)) + break; + + iph = (void *)(icmph + 1); + if (egress) + addr = iph->daddr; + else + addr = iph->saddr; + + if ((old_addr ^ addr) & mask) + break; + + if (skb_cloned(skb) && + !skb_clone_writable(skb, + ihl + sizeof(*icmph) + sizeof(*iph)) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); + iph = (void *)(icmph + 1); + + new_addr &= mask; + new_addr |= addr & ~mask; + + /* XXX Fix up the inner checksums. */ + if (egress) + iph->daddr = new_addr; + else + iph->saddr = new_addr; + + nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, + 1); + break; + } + default: + break; + } + + return action; + +drop: + spin_lock(&p->tcf_lock); + p->tcf_qstats.drops++; + spin_unlock(&p->tcf_lock); + return TC_ACT_SHOT; +} + +static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_nat *p = a->priv; + struct tc_nat *opt; + struct tcf_t t; + int s; + + s = sizeof(*opt); + + /* netlink spinlocks held above us - must use ATOMIC */ + opt = kzalloc(s, GFP_ATOMIC); + if (unlikely(!opt)) + return -ENOBUFS; + + opt->old_addr = p->old_addr; + opt->new_addr = p->new_addr; + opt->mask = p->mask; + opt->flags = p->flags; + + opt->index = p->tcf_index; + opt->action = p->tcf_action; + opt->refcnt = p->tcf_refcnt - ref; + opt->bindcnt = p->tcf_bindcnt - bind; + + RTA_PUT(skb, TCA_NAT_PARMS, s, opt); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); + + kfree(opt); + + return skb->len; + +rtattr_failure: + nlmsg_trim(skb, b); + kfree(opt); + return -1; +} + +static struct tc_action_ops act_nat_ops = { + .kind = "nat", + .hinfo = &nat_hash_info, + .type = TCA_ACT_NAT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_nat, + .dump = tcf_nat_dump, + .cleanup = tcf_nat_cleanup, + .lookup = tcf_hash_search, + .init = tcf_nat_init, + .walk = tcf_generic_walker +}; + +MODULE_DESCRIPTION("Stateless NAT actions"); +MODULE_LICENSE("GPL"); + +static int __init nat_init_module(void) +{ + return tcf_register_action(&act_nat_ops); +} + +static void __exit nat_cleanup_module(void) +{ + tcf_unregister_action(&act_nat_ops); +} + +module_init(nat_init_module); +module_exit(nat_cleanup_module); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index bf90e60f841..a73e3e6d87e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -16,14 +16,13 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <net/act_api.h> #include <net/netlink.h> -#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) +#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) +#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; @@ -57,7 +56,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; - read_lock(&police_lock); + read_lock_bh(&police_lock); s_i = cb->args[0]; @@ -86,7 +85,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c } } done: - read_unlock(&police_lock); + read_unlock_bh(&police_lock); if (n_i) cb->args[0] += n_i; return n_i; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5f0fbca7393..03657976fd5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -154,7 +154,7 @@ replay: /* Find head of filter chain. */ /* Find link */ - if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ @@ -387,7 +387,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 8dbe36912ec..9e98c6e567d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -502,7 +502,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_U32_INDEV-1]) { - int err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]); + err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]); if (err < 0) goto errout; } @@ -592,7 +592,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL-1] == 0 || + if (tb[TCA_U32_SEL-1] == NULL || RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel)) return -EINVAL; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 650f09c8bd6..e9989610712 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -291,7 +291,7 @@ META_COLLECTOR(var_sk_bound_if) } else { struct net_device *dev; - dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); if (dev) dev_put(dev); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 13c09bc32aa..8ae137e3522 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -28,6 +28,7 @@ #include <linux/list.h> #include <linux/hrtimer.h> +#include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -380,6 +381,10 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) return; while ((parentid = sch->parent)) { sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + if (sch == NULL) { + WARN_ON(parentid != TC_H_ROOT); + return; + } cops = sch->ops->cl_ops; if (cops->qlen_notify) { cl = cops->get(sch, parentid); @@ -420,8 +425,6 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, unsigned long cl = cops->get(parent, classid); if (cl) { err = cops->graft(parent, cl, new, old); - if (new) - new->parent = classid; cops->put(parent, cl); } } @@ -436,7 +439,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) +qdisc_create(struct net_device *dev, u32 parent, u32 handle, + struct rtattr **tca, int *errp) { int err; struct rtattr *kind = tca[TCA_KIND-1]; @@ -482,6 +486,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) goto err_out2; } + sch->parent = parent; + if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; sch->stats_lock = &dev->ingress_lock; @@ -601,7 +607,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -668,7 +674,7 @@ replay: clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -758,9 +764,11 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tca, &err); + q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, + tca, &err); else - q = qdisc_create(dev, tcm->tcm_handle, tca, &err); + q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, + tca, &err); if (q == NULL) { if (err == -EAGAIN) goto replay; @@ -873,7 +881,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = q_idx = cb->args[1]; read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -924,7 +932,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; /* @@ -1107,7 +1115,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1218,10 +1226,13 @@ EXPORT_SYMBOL(tcf_destroy_chain); #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { + struct timespec ts; + + hrtimer_get_res(CLOCK_MONOTONIC, &ts); seq_printf(seq, "%08x %08x %08x %08x\n", (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), 1000000, - (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES)); + (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); return 0; } @@ -1244,7 +1255,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 417ec8fb7f1..ddc4f2c5437 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -292,13 +292,12 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, } } DPRINTK("atm_tc_change: new id %x\n", classid); - flow = kmalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); + flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); DPRINTK("atm_tc_change: flow %p\n", flow); if (!flow) { error = -ENOBUFS; goto err_out; } - memset(flow, 0, sizeof(*flow)); flow->filter_list = NULL; if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) flow->q = &noop_qdisc; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index e38c2839b25..4de3744e65c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -175,7 +175,7 @@ struct cbq_sched_data }; -#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log]) +#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) static __inline__ unsigned cbq_hash(u32 h) @@ -380,7 +380,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); int len = skb->len; - int ret; + int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index c81649cf0b9..95ae11956f3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -134,34 +134,19 @@ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; - unsigned lockless; int ret; /* Dequeue packet */ if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) return 0; - /* - * When the driver has LLTX set, it does its own locking in - * start_xmit. These checks are worth it because even uncongested - * locks can be quite expensive. The driver can do a trylock, as - * is being done here; in case of lock contention it should return - * NETDEV_TX_LOCKED and the packet will be requeued. - */ - lockless = (dev->features & NETIF_F_LLTX); - - if (!lockless && !netif_tx_trylock(dev)) { - /* Another CPU grabbed the driver tx lock */ - return handle_dev_cpu_collision(skb, dev, q); - } /* And release queue */ spin_unlock(&dev->queue_lock); + HARD_TX_LOCK(dev, smp_processor_id()); ret = dev_hard_start_xmit(skb, dev); - - if (!lockless) - netif_tx_unlock(dev); + HARD_TX_UNLOCK(dev); spin_lock(&dev->queue_lock); q = dev->qdisc; @@ -256,6 +241,12 @@ static void dev_watchdog_down(struct net_device *dev) netif_tx_unlock_bh(dev); } +/** + * netif_carrier_on - set carrier + * @dev: network device + * + * Device has detected that carrier. + */ void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) @@ -264,6 +255,12 @@ void netif_carrier_on(struct net_device *dev) __netdev_watchdog_up(dev); } +/** + * netif_carrier_off - clear carrier + * @dev: network device + * + * Device has detected loss of carrier. + */ void netif_carrier_off(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 246a2f9765f..5e608a64935 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -132,10 +132,8 @@ struct htb_class { static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { - int slot = size >> rate->rate.cell_log; - if (slot > 255) - return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); - return rate->data[slot]; + long result = qdisc_l2t(rate, size); + return result; } struct htb_sched { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 51f16b0af19..2d32fd27496 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -158,9 +158,8 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch) break; case TC_ACT_RECLASSIFY: case TC_ACT_OK: - case TC_ACT_UNSPEC: - default: skb->tc_index = TC_H_MIN(res.classid); + default: result = TC_ACT_OK; break; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2d8c08493d6..abd82fc3ec6 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -38,22 +38,21 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; struct tcf_result res; + int err; *qerr = NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { + err = tc_classify(skb, q->filter_list, &res); #ifdef CONFIG_NET_CLS_ACT - switch (tc_classify(skb, q->filter_list, &res)) { + switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; } - - if (!q->filter_list ) { -#else - if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) { #endif + if (!q->filter_list || err < 0) { if (TC_H_MAJ(band)) band = 0; band = q->prio2band[band&TC_PRIO_MAX]; @@ -239,11 +238,13 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt) /* If we're multiqueue, make sure the number of incoming bands * matches the number of queues on the device we're associating with. * If the number of bands requested is zero, then set q->bands to - * dev->egress_subqueue_count. + * dev->egress_subqueue_count. Also, the root qdisc must be the + * only one that is enabled for multiqueue, since it's the only one + * that interacts with the underlying device. */ q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); if (q->mq) { - if (sch->handle != TC_H_ROOT) + if (sch->parent != TC_H_ROOT) return -EINVAL; if (netif_is_multiqueue(sch->dev)) { if (q->bands == 0) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 95795730985..b542c875e15 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/ipv6.h> #include <linux/skbuff.h> +#include <linux/jhash.h> #include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -95,7 +96,7 @@ struct sfq_sched_data /* Variables */ struct timer_list perturb_timer; - int perturbation; + u32 perturbation; sfq_index tail; /* Index of current slot in round */ sfq_index max_depth; /* Maximal depth */ @@ -109,12 +110,7 @@ struct sfq_sched_data static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { - int pert = q->perturbation; - - /* Have we any rotation primitives? If not, WHY? */ - h ^= (h1<<pert) ^ (h1>>(0x1F - pert)); - h ^= h>>10; - return h & 0x3FF; + return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); } static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) @@ -256,6 +252,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->ht[hash] = x = q->dep[SFQ_DEPTH].next; q->hash[x] = hash; } + /* If selected queue has length q->limit, this means that + * all another queues are empty and that we do simple tail drop, + * i.e. drop _this_ packet. + */ + if (q->qs[x].qlen >= q->limit) + return qdisc_drop(skb, sch); + sch->qstats.backlog += skb->len; __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); @@ -270,7 +273,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit-1) { + if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += skb->len; sch->bstats.packets++; return 0; @@ -294,6 +297,19 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) } sch->qstats.backlog += skb->len; __skb_queue_head(&q->qs[x], skb); + /* If selected queue has length q->limit+1, this means that + * all another queues are empty and we do simple tail drop. + * This packet is still requeued at head of queue, tail packet + * is dropped. + */ + if (q->qs[x].qlen > q->limit) { + skb = q->qs[x].prev; + __skb_unlink(skb, &q->qs[x]); + sch->qstats.drops++; + sch->qstats.backlog -= skb->len; + kfree_skb(skb); + return NET_XMIT_CN; + } sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ if (q->tail == SFQ_DEPTH) { /* It is the first flow */ @@ -306,7 +322,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit - 1) { + if (++sch->q.qlen <= q->limit) { sch->qstats.requeues++; return 0; } @@ -370,12 +386,10 @@ static void sfq_perturbation(unsigned long arg) struct Qdisc *sch = (struct Qdisc*)arg; struct sfq_sched_data *q = qdisc_priv(sch); - q->perturbation = net_random()&0x1F; + get_random_bytes(&q->perturbation, 4); - if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); - } + if (q->perturb_period) + mod_timer(&q->perturb_timer, jiffies + q->perturb_period); } static int sfq_change(struct Qdisc *sch, struct rtattr *opt) @@ -391,17 +405,17 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) q->quantum = ctl->quantum ? : psched_mtu(sch->dev); q->perturb_period = ctl->perturb_period*HZ; if (ctl->limit) - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); qlen = sch->q.qlen; - while (sch->q.qlen >= q->limit-1) + while (sch->q.qlen > q->limit) sfq_drop(sch); qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); + mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + get_random_bytes(&q->perturbation, 4); } sch_tree_unlock(sch); return 0; @@ -423,12 +437,13 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt) q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; } - q->limit = SFQ_DEPTH; + q->limit = SFQ_DEPTH - 1; q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { q->quantum = psched_mtu(sch->dev); q->perturb_period = 0; + get_random_bytes(&q->perturbation, 4); } else { int err = sfq_change(sch, opt); if (err) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 8c2639af4c6..b0d81098b0e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -115,8 +115,8 @@ struct tbf_sched_data struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log]) -#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log]) +#define L2T(q,L) qdisc_l2t((q)->R_tab,L) +#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) { diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0968184ea6b..be57cf317a7 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -232,9 +232,12 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } if (neigh_event_send(n, skb_res) == 0) { int err; + read_lock(&n->lock); - err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + n->ha, NULL, skb->len); read_unlock(&n->lock); + if (err < 0) { neigh_release(n); return -EINVAL; @@ -246,10 +249,10 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * return (skb_res == NULL) ? -EAGAIN : 1; } -static __inline__ int -teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +static inline int teql_resolve(struct sk_buff *skb, + struct sk_buff *skb_res, struct net_device *dev) { - if (dev->hard_header == NULL || + if (dev->header_ops == NULL || skb->dst == NULL || skb->dst->neighbour == NULL) return 0; @@ -432,7 +435,6 @@ static __init void teql_master_setup(struct net_device *dev) dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; - SET_MODULE_OWNER(dev); } static LIST_HEAD(master_dev_list); |