summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig17
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c8
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_nat.c322
-rw-r--r--net/sched/act_police.c9
-rw-r--r--net/sched/cls_api.c4
-rw-r--r--net/sched/cls_u32.c4
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c35
-rw-r--r--net/sched/sch_atm.c3
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_generic.c31
-rw-r--r--net/sched/sch_htb.c6
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_sfq.c53
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/sched/sch_teql.c12
19 files changed, 446 insertions, 90 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index d3f7c3f9407..92435a882fa 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -97,7 +97,7 @@ config NET_SCH_ATM
select classes of this queuing discipline. Each class maps
the flow(s) it is handling to a given virtual circuit.
- See the top of <file:net/sched/sch_atm.c>) for more details.
+ See the top of <file:net/sched/sch_atm.c> for more details.
To compile this code as a module, choose M here: the
module will be called sch_atm.
@@ -137,7 +137,7 @@ config NET_SCH_SFQ
tristate "Stochastic Fairness Queueing (SFQ)"
---help---
Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
- packet scheduling algorithm .
+ packet scheduling algorithm.
See the top of <file:net/sched/sch_sfq.c> for more details.
@@ -306,7 +306,7 @@ config NET_CLS_RSVP6
is important for real time data such as streaming sound or video.
Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests and you are using the IPv6.
+ on their RSVP requests and you are using the IPv6 protocol.
To compile this code as a module, choose M here: the
module will be called cls_rsvp6.
@@ -447,6 +447,17 @@ config NET_ACT_IPT
To compile this code as a module, choose M here: the
module will be called ipt.
+config NET_ACT_NAT
+ tristate "Stateless NAT"
+ depends on NET_CLS_ACT
+ select NETFILTER
+ ---help---
+ Say Y here to do stateless NAT on IPv4 packets. You should use
+ netfilter for NAT unless you know what you are doing.
+
+ To compile this code as a module, choose M here: the
+ module will be called nat.
+
config NET_ACT_PEDIT
tristate "Packet Editing"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b67c36f65cf..81ecbe8e7dc 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
+obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index feef366cad5..72cdb0fade2 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -68,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
struct rtattr *r ;
- read_lock(hinfo->lock);
+ read_lock_bh(hinfo->lock);
s_i = cb->args[0];
@@ -96,7 +96,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
}
}
done:
- read_unlock(hinfo->lock);
+ read_unlock_bh(hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
@@ -156,13 +156,13 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
struct tcf_common *p;
- read_lock(hinfo->lock);
+ read_lock_bh(hinfo->lock);
for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
p = p->tcfc_next) {
if (p->tcfc_index == index)
break;
}
- read_unlock(hinfo->lock);
+ read_unlock_bh(hinfo->lock);
return p;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 579578944ae..fd7bca4d5c2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -20,6 +20,7 @@
#include <linux/rtnetlink.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_mirred.h>
@@ -73,7 +74,7 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]);
if (parm->ifindex) {
- dev = __dev_get_by_index(parm->ifindex);
+ dev = __dev_get_by_index(&init_net, parm->ifindex);
if (dev == NULL)
return -ENODEV;
switch (dev->type) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
new file mode 100644
index 00000000000..c96273bcaf9
--- /dev/null
+++ b/net/sched/act_nat.c
@@ -0,0 +1,322 @@
+/*
+ * Stateless NAT actions
+ *
+ * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tc_act/tc_nat.h>
+#include <net/act_api.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#include <net/tc_act/tc_nat.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+
+#define NAT_TAB_MASK 15
+static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1];
+static u32 nat_idx_gen;
+static DEFINE_RWLOCK(nat_lock);
+
+static struct tcf_hashinfo nat_hash_info = {
+ .htab = tcf_nat_ht,
+ .hmask = NAT_TAB_MASK,
+ .lock = &nat_lock,
+};
+
+static int tcf_nat_init(struct rtattr *rta, struct rtattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ struct rtattr *tb[TCA_NAT_MAX];
+ struct tc_nat *parm;
+ int ret = 0;
+ struct tcf_nat *p;
+ struct tcf_common *pc;
+
+ if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0)
+ return -EINVAL;
+
+ if (tb[TCA_NAT_PARMS - 1] == NULL ||
+ RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm))
+ return -EINVAL;
+ parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]);
+
+ pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+ &nat_idx_gen, &nat_hash_info);
+ if (unlikely(!pc))
+ return -ENOMEM;
+ p = to_tcf_nat(pc);
+ ret = ACT_P_CREATED;
+ } else {
+ p = to_tcf_nat(pc);
+ if (!ovr) {
+ tcf_hash_release(pc, bind, &nat_hash_info);
+ return -EEXIST;
+ }
+ }
+
+ spin_lock_bh(&p->tcf_lock);
+ p->old_addr = parm->old_addr;
+ p->new_addr = parm->new_addr;
+ p->mask = parm->mask;
+ p->flags = parm->flags;
+
+ p->tcf_action = parm->action;
+ spin_unlock_bh(&p->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(pc, &nat_hash_info);
+
+ return ret;
+}
+
+static int tcf_nat_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_nat *p = a->priv;
+
+ return tcf_hash_release(&p->common, bind, &nat_hash_info);
+}
+
+static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_nat *p = a->priv;
+ struct iphdr *iph;
+ __be32 old_addr;
+ __be32 new_addr;
+ __be32 mask;
+ __be32 addr;
+ int egress;
+ int action;
+ int ihl;
+
+ spin_lock(&p->tcf_lock);
+
+ p->tcf_tm.lastuse = jiffies;
+ old_addr = p->old_addr;
+ new_addr = p->new_addr;
+ mask = p->mask;
+ egress = p->flags & TCA_NAT_FLAG_EGRESS;
+ action = p->tcf_action;
+
+ p->tcf_bstats.bytes += skb->len;
+ p->tcf_bstats.packets++;
+
+ spin_unlock(&p->tcf_lock);
+
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ goto drop;
+
+ iph = ip_hdr(skb);
+
+ if (egress)
+ addr = iph->saddr;
+ else
+ addr = iph->daddr;
+
+ if (!((old_addr ^ addr) & mask)) {
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, sizeof(*iph)) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto drop;
+
+ new_addr &= mask;
+ new_addr |= addr & ~mask;
+
+ /* Rewrite IP header */
+ iph = ip_hdr(skb);
+ if (egress)
+ iph->saddr = new_addr;
+ else
+ iph->daddr = new_addr;
+
+ nf_csum_replace4(&iph->check, addr, new_addr);
+ }
+
+ ihl = iph->ihl * 4;
+
+ /* It would be nice to share code with stateful NAT. */
+ switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
+ case IPPROTO_TCP:
+ {
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) ||
+ (skb_cloned(skb) &&
+ !skb_clone_writable(skb, ihl + sizeof(*tcph)) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ goto drop;
+
+ tcph = (void *)(skb_network_header(skb) + ihl);
+ nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+ break;
+ }
+ case IPPROTO_UDP:
+ {
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, ihl + sizeof(*udph)) ||
+ (skb_cloned(skb) &&
+ !skb_clone_writable(skb, ihl + sizeof(*udph)) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ goto drop;
+
+ udph = (void *)(skb_network_header(skb) + ihl);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ nf_proto_csum_replace4(&udph->check, skb, addr,
+ new_addr, 1);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+ break;
+ }
+ case IPPROTO_ICMP:
+ {
+ struct icmphdr *icmph;
+
+ if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+ goto drop;
+
+ icmph = (void *)(skb_network_header(skb) + ihl);
+
+ if ((icmph->type != ICMP_DEST_UNREACH) &&
+ (icmph->type != ICMP_TIME_EXCEEDED) &&
+ (icmph->type != ICMP_PARAMETERPROB))
+ break;
+
+ iph = (void *)(icmph + 1);
+ if (egress)
+ addr = iph->daddr;
+ else
+ addr = iph->saddr;
+
+ if ((old_addr ^ addr) & mask)
+ break;
+
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb,
+ ihl + sizeof(*icmph) + sizeof(*iph)) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto drop;
+
+ icmph = (void *)(skb_network_header(skb) + ihl);
+ iph = (void *)(icmph + 1);
+
+ new_addr &= mask;
+ new_addr |= addr & ~mask;
+
+ /* XXX Fix up the inner checksums. */
+ if (egress)
+ iph->daddr = new_addr;
+ else
+ iph->saddr = new_addr;
+
+ nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
+ 1);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return action;
+
+drop:
+ spin_lock(&p->tcf_lock);
+ p->tcf_qstats.drops++;
+ spin_unlock(&p->tcf_lock);
+ return TC_ACT_SHOT;
+}
+
+static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_nat *p = a->priv;
+ struct tc_nat *opt;
+ struct tcf_t t;
+ int s;
+
+ s = sizeof(*opt);
+
+ /* netlink spinlocks held above us - must use ATOMIC */
+ opt = kzalloc(s, GFP_ATOMIC);
+ if (unlikely(!opt))
+ return -ENOBUFS;
+
+ opt->old_addr = p->old_addr;
+ opt->new_addr = p->new_addr;
+ opt->mask = p->mask;
+ opt->flags = p->flags;
+
+ opt->index = p->tcf_index;
+ opt->action = p->tcf_action;
+ opt->refcnt = p->tcf_refcnt - ref;
+ opt->bindcnt = p->tcf_bindcnt - bind;
+
+ RTA_PUT(skb, TCA_NAT_PARMS, s, opt);
+ t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+ RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
+
+ kfree(opt);
+
+ return skb->len;
+
+rtattr_failure:
+ nlmsg_trim(skb, b);
+ kfree(opt);
+ return -1;
+}
+
+static struct tc_action_ops act_nat_ops = {
+ .kind = "nat",
+ .hinfo = &nat_hash_info,
+ .type = TCA_ACT_NAT,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_nat,
+ .dump = tcf_nat_dump,
+ .cleanup = tcf_nat_cleanup,
+ .lookup = tcf_hash_search,
+ .init = tcf_nat_init,
+ .walk = tcf_generic_walker
+};
+
+MODULE_DESCRIPTION("Stateless NAT actions");
+MODULE_LICENSE("GPL");
+
+static int __init nat_init_module(void)
+{
+ return tcf_register_action(&act_nat_ops);
+}
+
+static void __exit nat_cleanup_module(void)
+{
+ tcf_unregister_action(&act_nat_ops);
+}
+
+module_init(nat_init_module);
+module_exit(nat_cleanup_module);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index bf90e60f841..a73e3e6d87e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -16,14 +16,13 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
-#include <linux/module.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/act_api.h>
#include <net/netlink.h>
-#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
-#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
+#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
#define POL_TAB_MASK 15
static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -57,7 +56,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct rtattr *r;
- read_lock(&police_lock);
+ read_lock_bh(&police_lock);
s_i = cb->args[0];
@@ -86,7 +85,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
}
}
done:
- read_unlock(&police_lock);
+ read_unlock_bh(&police_lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5f0fbca7393..03657976fd5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -154,7 +154,7 @@ replay:
/* Find head of filter chain. */
/* Find link */
- if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL)
+ if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL)
return -ENODEV;
/* Find qdisc */
@@ -387,7 +387,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return skb->len;
- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return skb->len;
if (!tcm->tcm_parent)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 8dbe36912ec..9e98c6e567d 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -502,7 +502,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_U32_INDEV-1]) {
- int err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]);
+ err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]);
if (err < 0)
goto errout;
}
@@ -592,7 +592,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
} else
handle = gen_new_kid(ht, htid);
- if (tb[TCA_U32_SEL-1] == 0 ||
+ if (tb[TCA_U32_SEL-1] == NULL ||
RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
return -EINVAL;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 650f09c8bd6..e9989610712 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -291,7 +291,7 @@ META_COLLECTOR(var_sk_bound_if)
} else {
struct net_device *dev;
- dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
+ dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if);
*err = var_dev(dev, dst);
if (dev)
dev_put(dev);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 13c09bc32aa..8ae137e3522 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -28,6 +28,7 @@
#include <linux/list.h>
#include <linux/hrtimer.h>
+#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -380,6 +381,10 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
return;
while ((parentid = sch->parent)) {
sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ if (sch == NULL) {
+ WARN_ON(parentid != TC_H_ROOT);
+ return;
+ }
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
cl = cops->get(sch, parentid);
@@ -420,8 +425,6 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
unsigned long cl = cops->get(parent, classid);
if (cl) {
err = cops->graft(parent, cl, new, old);
- if (new)
- new->parent = classid;
cops->put(parent, cl);
}
}
@@ -436,7 +439,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
*/
static struct Qdisc *
-qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
+qdisc_create(struct net_device *dev, u32 parent, u32 handle,
+ struct rtattr **tca, int *errp)
{
int err;
struct rtattr *kind = tca[TCA_KIND-1];
@@ -482,6 +486,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
goto err_out2;
}
+ sch->parent = parent;
+
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
sch->stats_lock = &dev->ingress_lock;
@@ -601,7 +607,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
struct Qdisc *p = NULL;
int err;
- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return -ENODEV;
if (clid) {
@@ -668,7 +674,7 @@ replay:
clid = tcm->tcm_parent;
q = p = NULL;
- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return -ENODEV;
if (clid) {
@@ -758,9 +764,11 @@ create_n_graft:
if (!(n->nlmsg_flags&NLM_F_CREATE))
return -ENOENT;
if (clid == TC_H_INGRESS)
- q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
+ q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
+ tca, &err);
else
- q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
+ q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
+ tca, &err);
if (q == NULL) {
if (err == -EAGAIN)
goto replay;
@@ -873,7 +881,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = q_idx = cb->args[1];
read_lock(&dev_base_lock);
idx = 0;
- for_each_netdev(dev) {
+ for_each_netdev(&init_net, dev) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -924,7 +932,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
u32 qid = TC_H_MAJ(clid);
int err;
- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return -ENODEV;
/*
@@ -1107,7 +1115,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return 0;
- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return 0;
s_t = cb->args[0];
@@ -1218,10 +1226,13 @@ EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
+ struct timespec ts;
+
+ hrtimer_get_res(CLOCK_MONOTONIC, &ts);
seq_printf(seq, "%08x %08x %08x %08x\n",
(u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1000000,
- (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
+ (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
return 0;
}
@@ -1244,7 +1255,7 @@ static int __init pktsched_init(void)
{
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
- proc_net_fops_create("psched", 0, &psched_fops);
+ proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 417ec8fb7f1..ddc4f2c5437 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -292,13 +292,12 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
}
}
DPRINTK("atm_tc_change: new id %x\n", classid);
- flow = kmalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
+ flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
DPRINTK("atm_tc_change: flow %p\n", flow);
if (!flow) {
error = -ENOBUFS;
goto err_out;
}
- memset(flow, 0, sizeof(*flow));
flow->filter_list = NULL;
if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
flow->q = &noop_qdisc;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index e38c2839b25..4de3744e65c 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -175,7 +175,7 @@ struct cbq_sched_data
};
-#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
+#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
static __inline__ unsigned cbq_hash(u32 h)
@@ -380,7 +380,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
int len = skb->len;
- int ret;
+ int uninitialized_var(ret);
struct cbq_class *cl = cbq_classify(skb, sch, &ret);
#ifdef CONFIG_NET_CLS_ACT
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index c81649cf0b9..95ae11956f3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -134,34 +134,19 @@ static inline int qdisc_restart(struct net_device *dev)
{
struct Qdisc *q = dev->qdisc;
struct sk_buff *skb;
- unsigned lockless;
int ret;
/* Dequeue packet */
if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
return 0;
- /*
- * When the driver has LLTX set, it does its own locking in
- * start_xmit. These checks are worth it because even uncongested
- * locks can be quite expensive. The driver can do a trylock, as
- * is being done here; in case of lock contention it should return
- * NETDEV_TX_LOCKED and the packet will be requeued.
- */
- lockless = (dev->features & NETIF_F_LLTX);
-
- if (!lockless && !netif_tx_trylock(dev)) {
- /* Another CPU grabbed the driver tx lock */
- return handle_dev_cpu_collision(skb, dev, q);
- }
/* And release queue */
spin_unlock(&dev->queue_lock);
+ HARD_TX_LOCK(dev, smp_processor_id());
ret = dev_hard_start_xmit(skb, dev);
-
- if (!lockless)
- netif_tx_unlock(dev);
+ HARD_TX_UNLOCK(dev);
spin_lock(&dev->queue_lock);
q = dev->qdisc;
@@ -256,6 +241,12 @@ static void dev_watchdog_down(struct net_device *dev)
netif_tx_unlock_bh(dev);
}
+/**
+ * netif_carrier_on - set carrier
+ * @dev: network device
+ *
+ * Device has detected that carrier.
+ */
void netif_carrier_on(struct net_device *dev)
{
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
@@ -264,6 +255,12 @@ void netif_carrier_on(struct net_device *dev)
__netdev_watchdog_up(dev);
}
+/**
+ * netif_carrier_off - clear carrier
+ * @dev: network device
+ *
+ * Device has detected loss of carrier.
+ */
void netif_carrier_off(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 246a2f9765f..5e608a64935 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -132,10 +132,8 @@ struct htb_class {
static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
int size)
{
- int slot = size >> rate->rate.cell_log;
- if (slot > 255)
- return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]);
- return rate->data[slot];
+ long result = qdisc_l2t(rate, size);
+ return result;
}
struct htb_sched {
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 51f16b0af19..2d32fd27496 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -158,9 +158,8 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
break;
case TC_ACT_RECLASSIFY:
case TC_ACT_OK:
- case TC_ACT_UNSPEC:
- default:
skb->tc_index = TC_H_MIN(res.classid);
+ default:
result = TC_ACT_OK;
break;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2d8c08493d6..abd82fc3ec6 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -38,22 +38,21 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct prio_sched_data *q = qdisc_priv(sch);
u32 band = skb->priority;
struct tcf_result res;
+ int err;
*qerr = NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
+ err = tc_classify(skb, q->filter_list, &res);
#ifdef CONFIG_NET_CLS_ACT
- switch (tc_classify(skb, q->filter_list, &res)) {
+ switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
*qerr = NET_XMIT_SUCCESS;
case TC_ACT_SHOT:
return NULL;
}
-
- if (!q->filter_list ) {
-#else
- if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) {
#endif
+ if (!q->filter_list || err < 0) {
if (TC_H_MAJ(band))
band = 0;
band = q->prio2band[band&TC_PRIO_MAX];
@@ -239,11 +238,13 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
/* If we're multiqueue, make sure the number of incoming bands
* matches the number of queues on the device we're associating with.
* If the number of bands requested is zero, then set q->bands to
- * dev->egress_subqueue_count.
+ * dev->egress_subqueue_count. Also, the root qdisc must be the
+ * only one that is enabled for multiqueue, since it's the only one
+ * that interacts with the underlying device.
*/
q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]);
if (q->mq) {
- if (sch->handle != TC_H_ROOT)
+ if (sch->parent != TC_H_ROOT)
return -EINVAL;
if (netif_is_multiqueue(sch->dev)) {
if (q->bands == 0)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 95795730985..b542c875e15 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/ipv6.h>
#include <linux/skbuff.h>
+#include <linux/jhash.h>
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -95,7 +96,7 @@ struct sfq_sched_data
/* Variables */
struct timer_list perturb_timer;
- int perturbation;
+ u32 perturbation;
sfq_index tail; /* Index of current slot in round */
sfq_index max_depth; /* Maximal depth */
@@ -109,12 +110,7 @@ struct sfq_sched_data
static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
{
- int pert = q->perturbation;
-
- /* Have we any rotation primitives? If not, WHY? */
- h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
- h ^= h>>10;
- return h & 0x3FF;
+ return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
}
static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
@@ -256,6 +252,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
q->hash[x] = hash;
}
+ /* If selected queue has length q->limit, this means that
+ * all another queues are empty and that we do simple tail drop,
+ * i.e. drop _this_ packet.
+ */
+ if (q->qs[x].qlen >= q->limit)
+ return qdisc_drop(skb, sch);
+
sch->qstats.backlog += skb->len;
__skb_queue_tail(&q->qs[x], skb);
sfq_inc(q, x);
@@ -270,7 +273,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
q->tail = x;
}
}
- if (++sch->q.qlen < q->limit-1) {
+ if (++sch->q.qlen <= q->limit) {
sch->bstats.bytes += skb->len;
sch->bstats.packets++;
return 0;
@@ -294,6 +297,19 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
}
sch->qstats.backlog += skb->len;
__skb_queue_head(&q->qs[x], skb);
+ /* If selected queue has length q->limit+1, this means that
+ * all another queues are empty and we do simple tail drop.
+ * This packet is still requeued at head of queue, tail packet
+ * is dropped.
+ */
+ if (q->qs[x].qlen > q->limit) {
+ skb = q->qs[x].prev;
+ __skb_unlink(skb, &q->qs[x]);
+ sch->qstats.drops++;
+ sch->qstats.backlog -= skb->len;
+ kfree_skb(skb);
+ return NET_XMIT_CN;
+ }
sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */
if (q->tail == SFQ_DEPTH) { /* It is the first flow */
@@ -306,7 +322,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
q->tail = x;
}
}
- if (++sch->q.qlen < q->limit - 1) {
+ if (++sch->q.qlen <= q->limit) {
sch->qstats.requeues++;
return 0;
}
@@ -370,12 +386,10 @@ static void sfq_perturbation(unsigned long arg)
struct Qdisc *sch = (struct Qdisc*)arg;
struct sfq_sched_data *q = qdisc_priv(sch);
- q->perturbation = net_random()&0x1F;
+ get_random_bytes(&q->perturbation, 4);
- if (q->perturb_period) {
- q->perturb_timer.expires = jiffies + q->perturb_period;
- add_timer(&q->perturb_timer);
- }
+ if (q->perturb_period)
+ mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
}
static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
@@ -391,17 +405,17 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
q->perturb_period = ctl->perturb_period*HZ;
if (ctl->limit)
- q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+ q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
qlen = sch->q.qlen;
- while (sch->q.qlen >= q->limit-1)
+ while (sch->q.qlen > q->limit)
sfq_drop(sch);
qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
- q->perturb_timer.expires = jiffies + q->perturb_period;
- add_timer(&q->perturb_timer);
+ mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ get_random_bytes(&q->perturbation, 4);
}
sch_tree_unlock(sch);
return 0;
@@ -423,12 +437,13 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
}
- q->limit = SFQ_DEPTH;
+ q->limit = SFQ_DEPTH - 1;
q->max_depth = 0;
q->tail = SFQ_DEPTH;
if (opt == NULL) {
q->quantum = psched_mtu(sch->dev);
q->perturb_period = 0;
+ get_random_bytes(&q->perturbation, 4);
} else {
int err = sfq_change(sch, opt);
if (err)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 8c2639af4c6..b0d81098b0e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -115,8 +115,8 @@ struct tbf_sched_data
struct qdisc_watchdog watchdog; /* Watchdog timer */
};
-#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
-#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log])
+#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
+#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 0968184ea6b..be57cf317a7 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -232,9 +232,12 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
}
if (neigh_event_send(n, skb_res) == 0) {
int err;
+
read_lock(&n->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ n->ha, NULL, skb->len);
read_unlock(&n->lock);
+
if (err < 0) {
neigh_release(n);
return -EINVAL;
@@ -246,10 +249,10 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
return (skb_res == NULL) ? -EAGAIN : 1;
}
-static __inline__ int
-teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+static inline int teql_resolve(struct sk_buff *skb,
+ struct sk_buff *skb_res, struct net_device *dev)
{
- if (dev->hard_header == NULL ||
+ if (dev->header_ops == NULL ||
skb->dst == NULL ||
skb->dst->neighbour == NULL)
return 0;
@@ -432,7 +435,6 @@ static __init void teql_master_setup(struct net_device *dev)
dev->tx_queue_len = 100;
dev->flags = IFF_NOARP;
dev->hard_header_len = LL_MAX_HEADER;
- SET_MODULE_OWNER(dev);
}
static LIST_HEAD(master_dev_list);