summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/cls_flow.c182
-rw-r--r--net/sched/sch_choke.c121
-rw-r--r--net/sched/sch_generic.c9
-rw-r--r--net/sched/sch_gred.c36
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sched/sch_multiq.c6
-rw-r--r--net/sched/sch_netem.c74
-rw-r--r--net/sched/sch_red.c30
-rw-r--r--net/sched/sch_sfb.c17
-rw-r--r--net/sched/sch_sfq.c136
-rw-r--r--net/sched/sch_teql.c8
11 files changed, 333 insertions, 296 deletions
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 7b582300d05..1d8bd0dbcd1 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,6 +26,8 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/flow_keys.h>
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
#endif
@@ -66,134 +68,37 @@ static inline u32 addr_fold(void *addr)
return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}
-static u32 flow_get_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be32 *data = NULL, hdata;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct iphdr,
- saddr),
- 4, &hdata);
- break;
- case htons(ETH_P_IPV6):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct ipv6hdr,
- saddr.s6_addr32[3]),
- 4, &hdata);
- break;
- }
-
- if (data)
- return ntohl(*data);
+ if (flow->src)
+ return ntohl(flow->src);
return addr_fold(skb->sk);
}
-static u32 flow_get_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be32 *data = NULL, hdata;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct iphdr,
- daddr),
- 4, &hdata);
- break;
- case htons(ETH_P_IPV6):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct ipv6hdr,
- daddr.s6_addr32[3]),
- 4, &hdata);
- break;
- }
-
- if (data)
- return ntohl(*data);
+ if (flow->dst)
+ return ntohl(flow->dst);
return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
}
-static u32 flow_get_proto(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __u8 *data = NULL, hdata;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct iphdr,
- protocol),
- 1, &hdata);
- break;
- case htons(ETH_P_IPV6):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct ipv6hdr,
- nexthdr),
- 1, &hdata);
- break;
- }
- if (data)
- return *data;
- return 0;
+ return flow->ip_proto;
}
-/* helper function to get either src or dst port */
-static __be16 *flow_get_proto_common(const struct sk_buff *skb, int nhoff,
- __be16 *_port, int dst)
+static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be16 *port = NULL;
- int poff;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP): {
- struct iphdr *iph, _iph;
-
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
- break;
- if (ip_is_fragment(iph))
- break;
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0)
- port = skb_header_pointer(skb,
- nhoff + iph->ihl * 4 + poff + dst,
- sizeof(*_port), _port);
- break;
- }
- case htons(ETH_P_IPV6): {
- struct ipv6hdr *iph, _iph;
-
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
- break;
- poff = proto_ports_offset(iph->nexthdr);
- if (poff >= 0)
- port = skb_header_pointer(skb,
- nhoff + sizeof(*iph) + poff + dst,
- sizeof(*_port), _port);
- break;
- }
- }
-
- return port;
-}
-
-static u32 flow_get_proto_src(const struct sk_buff *skb, int nhoff)
-{
- __be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 0);
-
- if (port)
- return ntohs(*port);
+ if (flow->ports)
+ return ntohs(flow->port16[0]);
return addr_fold(skb->sk);
}
-static u32 flow_get_proto_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 2);
-
- if (port)
- return ntohs(*port);
+ if (flow->ports)
+ return ntohs(flow->port16[1]);
return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
}
@@ -239,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
})
#endif
-static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
@@ -248,10 +153,10 @@ static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
}
fallback:
- return flow_get_src(skb, nhoff);
+ return flow_get_src(skb, flow);
}
-static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
@@ -260,21 +165,21 @@ static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
}
fallback:
- return flow_get_dst(skb, nhoff);
+ return flow_get_dst(skb, flow);
}
-static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
return ntohs(CTTUPLE(skb, src.u.all));
fallback:
- return flow_get_proto_src(skb, nhoff);
+ return flow_get_proto_src(skb, flow);
}
-static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
- return flow_get_proto_dst(skb, nhoff);
+ return flow_get_proto_dst(skb, flow);
}
static u32 flow_get_rtclassid(const struct sk_buff *skb)
@@ -314,21 +219,19 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
return skb_get_rxhash(skb);
}
-static u32 flow_key_get(struct sk_buff *skb, int key)
+static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
{
- int nhoff = skb_network_offset(skb);
-
switch (key) {
case FLOW_KEY_SRC:
- return flow_get_src(skb, nhoff);
+ return flow_get_src(skb, flow);
case FLOW_KEY_DST:
- return flow_get_dst(skb, nhoff);
+ return flow_get_dst(skb, flow);
case FLOW_KEY_PROTO:
- return flow_get_proto(skb, nhoff);
+ return flow_get_proto(skb, flow);
case FLOW_KEY_PROTO_SRC:
- return flow_get_proto_src(skb, nhoff);
+ return flow_get_proto_src(skb, flow);
case FLOW_KEY_PROTO_DST:
- return flow_get_proto_dst(skb, nhoff);
+ return flow_get_proto_dst(skb, flow);
case FLOW_KEY_IIF:
return flow_get_iif(skb);
case FLOW_KEY_PRIORITY:
@@ -338,13 +241,13 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
case FLOW_KEY_NFCT:
return flow_get_nfct(skb);
case FLOW_KEY_NFCT_SRC:
- return flow_get_nfct_src(skb, nhoff);
+ return flow_get_nfct_src(skb, flow);
case FLOW_KEY_NFCT_DST:
- return flow_get_nfct_dst(skb, nhoff);
+ return flow_get_nfct_dst(skb, flow);
case FLOW_KEY_NFCT_PROTO_SRC:
- return flow_get_nfct_proto_src(skb, nhoff);
+ return flow_get_nfct_proto_src(skb, flow);
case FLOW_KEY_NFCT_PROTO_DST:
- return flow_get_nfct_proto_dst(skb, nhoff);
+ return flow_get_nfct_proto_dst(skb, flow);
case FLOW_KEY_RTCLASSID:
return flow_get_rtclassid(skb);
case FLOW_KEY_SKUID:
@@ -361,6 +264,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
}
}
+#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \
+ (1 << FLOW_KEY_DST) | \
+ (1 << FLOW_KEY_PROTO) | \
+ (1 << FLOW_KEY_PROTO_SRC) | \
+ (1 << FLOW_KEY_PROTO_DST) | \
+ (1 << FLOW_KEY_NFCT_SRC) | \
+ (1 << FLOW_KEY_NFCT_DST) | \
+ (1 << FLOW_KEY_NFCT_PROTO_SRC) | \
+ (1 << FLOW_KEY_NFCT_PROTO_DST))
+
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -372,17 +285,20 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
int r;
list_for_each_entry(f, &head->filters, list) {
- u32 keys[f->nkeys];
+ u32 keys[FLOW_KEY_MAX + 1];
+ struct flow_keys flow_keys;
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
keymask = f->keymask;
+ if (keymask & FLOW_KEYS_NEEDED)
+ skb_flow_dissect(skb, &flow_keys);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
keymask &= ~(1 << key);
- keys[n] = flow_key_get(skb, key);
+ keys[n] = flow_key_get(skb, key, &flow_keys);
}
if (f->mode == FLOW_MODE_HASH)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3422b25df9e..bef00acb8bd 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -19,10 +19,7 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
+#include <net/flow_keys.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -142,85 +139,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
--sch->q.qlen;
}
-/*
- * Compare flow of two packets
- * Returns true only if source and destination address and port match.
- * false for special cases
- */
-static bool choke_match_flow(struct sk_buff *skb1,
- struct sk_buff *skb2)
-{
- int off1, off2, poff;
- const u32 *ports1, *ports2;
- u8 ip_proto;
- __u32 hash1;
-
- if (skb1->protocol != skb2->protocol)
- return false;
-
- /* Use hash value as quick check
- * Assumes that __skb_get_rxhash makes IP header and ports linear
- */
- hash1 = skb_get_rxhash(skb1);
- if (!hash1 || hash1 != skb_get_rxhash(skb2))
- return false;
-
- /* Probably match, but be sure to avoid hash collisions */
- off1 = skb_network_offset(skb1);
- off2 = skb_network_offset(skb2);
-
- switch (skb1->protocol) {
- case __constant_htons(ETH_P_IP): {
- const struct iphdr *ip1, *ip2;
-
- ip1 = (const struct iphdr *) (skb1->data + off1);
- ip2 = (const struct iphdr *) (skb2->data + off2);
-
- ip_proto = ip1->protocol;
- if (ip_proto != ip2->protocol ||
- ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
- return false;
-
- if (ip_is_fragment(ip1) | ip_is_fragment(ip2))
- ip_proto = 0;
- off1 += ip1->ihl * 4;
- off2 += ip2->ihl * 4;
- break;
- }
-
- case __constant_htons(ETH_P_IPV6): {
- const struct ipv6hdr *ip1, *ip2;
-
- ip1 = (const struct ipv6hdr *) (skb1->data + off1);
- ip2 = (const struct ipv6hdr *) (skb2->data + off2);
-
- ip_proto = ip1->nexthdr;
- if (ip_proto != ip2->nexthdr ||
- ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
- ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
- return false;
- off1 += 40;
- off2 += 40;
- }
-
- default: /* Maybe compare MAC header here? */
- return false;
- }
-
- poff = proto_ports_offset(ip_proto);
- if (poff < 0)
- return true;
-
- off1 += poff;
- off2 += poff;
-
- ports1 = (__force u32 *)(skb1->data + off1);
- ports2 = (__force u32 *)(skb2->data + off2);
- return *ports1 == *ports2;
-}
-
struct choke_skb_cb {
- u16 classid;
+ u16 classid;
+ u8 keys_valid;
+ struct flow_keys keys;
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -241,6 +163,32 @@ static u16 choke_get_classid(const struct sk_buff *skb)
}
/*
+ * Compare flow of two packets
+ * Returns true only if source and destination address and port match.
+ * false for special cases
+ */
+static bool choke_match_flow(struct sk_buff *skb1,
+ struct sk_buff *skb2)
+{
+ if (skb1->protocol != skb2->protocol)
+ return false;
+
+ if (!choke_skb_cb(skb1)->keys_valid) {
+ choke_skb_cb(skb1)->keys_valid = 1;
+ skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
+ }
+
+ if (!choke_skb_cb(skb2)->keys_valid) {
+ choke_skb_cb(skb2)->keys_valid = 1;
+ skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
+ }
+
+ return !memcmp(&choke_skb_cb(skb1)->keys,
+ &choke_skb_cb(skb2)->keys,
+ sizeof(struct flow_keys));
+}
+
+/*
* Classify flow using either:
* 1. pre-existing classification result in skb
* 2. fast internal classification
@@ -326,6 +274,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
goto other_drop; /* Packet was eaten by filter */
}
+ choke_skb_cb(skb)->keys_valid = 0;
/* Compute average queue usage (see RED) */
p->qavg = red_calc_qavg(p, sch->q.qlen);
if (red_is_idling(p))
@@ -445,6 +394,7 @@ static void choke_reset(struct Qdisc *sch)
static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
[TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
+ [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
};
@@ -466,6 +416,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
int err;
struct sk_buff **old = NULL;
unsigned int mask;
+ u32 max_P;
if (opt == NULL)
return -EINVAL;
@@ -478,6 +429,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
tb[TCA_CHOKE_STAB] == NULL)
return -EINVAL;
+ max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -527,7 +480,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_CHOKE_STAB]));
+ nla_data(tb[TCA_CHOKE_STAB]),
+ max_P);
if (q->head == q->tail)
red_end_of_idle_period(&q->parms);
@@ -561,6 +515,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+ NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P);
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca279880..67fc573e013 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
/* check the reason of requeuing without tx lock first */
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- if (!netif_tx_queue_frozen_or_stopped(txq)) {
+ if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
q->q.qlen--;
} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
spin_unlock(root_lock);
HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_tx_queue_frozen_or_stopped(txq))
+ if (!netif_xmit_frozen_or_stopped(txq))
ret = dev_hard_start_xmit(skb, dev, txq);
HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
ret = dev_requeue_skb(skb, q);
}
- if (ret && netif_tx_queue_frozen_or_stopped(txq))
+ if (ret && netif_xmit_frozen_or_stopped(txq))
ret = 0;
return ret;
@@ -242,10 +242,11 @@ static void dev_watchdog(unsigned long arg)
* old device drivers set dev->trans_start
*/
trans_start = txq->trans_start ? : dev->trans_start;
- if (netif_tx_queue_stopped(txq) &&
+ if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
+ txq->trans_timeout++;
break;
}
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 6cd8ddfb512..53204de71c3 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -34,7 +34,7 @@ struct gred_sched;
struct gred_sched_data {
u32 limit; /* HARD maximal queue length */
- u32 DP; /* the drop pramaters */
+ u32 DP; /* the drop parameters */
u32 bytesin; /* bytes seen on virtualQ so far*/
u32 packetsin; /* packets seen on virtualQ so far*/
u32 backlog; /* bytes on the virtualQ */
@@ -379,18 +379,20 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
}
static inline int gred_change_vq(struct Qdisc *sch, int dp,
- struct tc_gred_qopt *ctl, int prio, u8 *stab)
+ struct tc_gred_qopt *ctl, int prio,
+ u8 *stab, u32 max_P,
+ struct gred_sched_data **prealloc)
{
struct gred_sched *table = qdisc_priv(sch);
- struct gred_sched_data *q;
+ struct gred_sched_data *q = table->tab[dp];
- if (table->tab[dp] == NULL) {
- table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC);
- if (table->tab[dp] == NULL)
+ if (!q) {
+ table->tab[dp] = q = *prealloc;
+ *prealloc = NULL;
+ if (!q)
return -ENOMEM;
}
- q = table->tab[dp];
q->DP = dp;
q->prio = prio;
q->limit = ctl->limit;
@@ -400,7 +402,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
red_set_parms(&q->parms,
ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
- ctl->Scell_log, stab);
+ ctl->Scell_log, stab, max_P);
return 0;
}
@@ -409,6 +411,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) },
[TCA_GRED_STAB] = { .len = 256 },
[TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
+ [TCA_GRED_MAX_P] = { .type = NLA_U32 },
};
static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -418,6 +421,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
struct nlattr *tb[TCA_GRED_MAX + 1];
int err, prio = GRED_DEF_PRIO;
u8 *stab;
+ u32 max_P;
+ struct gred_sched_data *prealloc;
if (opt == NULL)
return -EINVAL;
@@ -433,6 +438,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
tb[TCA_GRED_STAB] == NULL)
return -EINVAL;
+ max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+
err = -EINVAL;
ctl = nla_data(tb[TCA_GRED_PARMS]);
stab = nla_data(tb[TCA_GRED_STAB]);
@@ -455,9 +462,10 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
prio = ctl->prio;
}
+ prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
sch_tree_lock(sch);
- err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
+ err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc);
if (err < 0)
goto errout_locked;
@@ -471,6 +479,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
errout_locked:
sch_tree_unlock(sch);
+ kfree(prealloc);
errout:
return err;
}
@@ -498,6 +507,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
struct gred_sched *table = qdisc_priv(sch);
struct nlattr *parms, *opts = NULL;
int i;
+ u32 max_p[MAX_DPs];
struct tc_gred_sopt sopt = {
.DPs = table->DPs,
.def_DP = table->def,
@@ -509,6 +519,14 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+
+ for (i = 0; i < MAX_DPs; i++) {
+ struct gred_sched_data *q = table->tab[i];
+
+ max_p[i] = q ? q->parms.max_P : 0;
+ }
+ NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p);
+
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
goto nla_put_failure;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6488e642565..9bdca2e011e 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1368,6 +1368,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct tc_hfsc_stats xstats;
cl->qstats.qlen = cl->qdisc->q.qlen;
+ cl->qstats.backlog = cl->qdisc->qstats.backlog;
xstats.level = cl->level;
xstats.period = cl->cl_vtperiod;
xstats.work = cl->cl_total;
@@ -1561,6 +1562,15 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
struct hfsc_sched *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
+ struct hfsc_class *cl;
+ struct hlist_node *n;
+ unsigned int i;
+
+ sch->qstats.backlog = 0;
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+ sch->qstats.backlog += cl->qdisc->qstats.backlog;
+ }
qopt.defcls = q->defcls;
NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950e0e7..49131d7a744 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
/* Check that target subqueue is available before
* pulling an skb to avoid head-of-line blocking.
*/
- if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+ if (!netif_xmit_stopped(
+ netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
qdisc = q->queues[q->curband];
skb = qdisc->dequeue(qdisc);
if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
/* Check that target subqueue is available before
* pulling an skb to avoid head-of-line blocking.
*/
- if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+ if (!netif_xmit_stopped(
+ netdev_get_tx_queue(qdisc_dev(sch), curband))) {
qdisc = q->queues[curband];
skb = qdisc->ops->peek(qdisc);
if (skb)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index eb3b9a86c6e..ffcaa597594 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
+#include <linux/reciprocal_div.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -79,6 +80,11 @@ struct netem_sched_data {
u32 duplicate;
u32 reorder;
u32 corrupt;
+ u32 rate;
+ s32 packet_overhead;
+ u32 cell_size;
+ u32 cell_size_reciprocal;
+ s32 cell_overhead;
struct crndstate {
u32 last;
@@ -298,6 +304,26 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
+static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+{
+ u64 ticks;
+
+ len += q->packet_overhead;
+
+ if (q->cell_size) {
+ u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
+
+ if (len > cells * q->cell_size) /* extra cell needed for remainder */
+ cells++;
+ len = cells * (q->cell_size + q->cell_overhead);
+ }
+
+ ticks = (u64)len * NSEC_PER_SEC;
+
+ do_div(ticks, q->rate);
+ return PSCHED_NS2TICKS(ticks);
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -371,6 +397,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
&q->delay_cor, q->delay_dist);
now = psched_get_time();
+
+ if (q->rate) {
+ struct sk_buff_head *list = &q->qdisc->q;
+
+ delay += packet_len_2_sched_time(skb->len, q);
+
+ if (!skb_queue_empty(list)) {
+ /*
+ * Last packet in queue is reference point (now).
+ * First packet in queue is already in flight,
+ * calculate this time bonus and substract
+ * from delay.
+ */
+ delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+ now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
+ }
+ }
+
cb->time_to_send = now + delay;
++q->counter;
ret = qdisc_enqueue(skb, q->qdisc);
@@ -535,6 +579,19 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->corrupt_cor, r->correlation);
}
+static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ const struct tc_netem_rate *r = nla_data(attr);
+
+ q->rate = r->rate;
+ q->packet_overhead = r->packet_overhead;
+ q->cell_size = r->cell_size;
+ if (q->cell_size)
+ q->cell_size_reciprocal = reciprocal_value(q->cell_size);
+ q->cell_overhead = r->cell_overhead;
+}
+
static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -548,7 +605,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
case NETEM_LOSS_GI: {
const struct tc_netem_gimodel *gi = nla_data(la);
- if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
+ if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
pr_info("netem: incorrect gi model size\n");
return -EINVAL;
}
@@ -567,8 +624,8 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
case NETEM_LOSS_GE: {
const struct tc_netem_gemodel *ge = nla_data(la);
- if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
- pr_info("netem: incorrect gi model size\n");
+ if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
+ pr_info("netem: incorrect ge model size\n");
return -EINVAL;
}
@@ -594,6 +651,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
[TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
[TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
+ [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
};
@@ -666,6 +724,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_NETEM_CORRUPT])
get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+ if (tb[TCA_NETEM_RATE])
+ get_rate(sch, tb[TCA_NETEM_RATE]);
+
q->loss_model = CLG_RANDOM;
if (tb[TCA_NETEM_LOSS])
ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -846,6 +907,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_corr cor;
struct tc_netem_reorder reorder;
struct tc_netem_corrupt corrupt;
+ struct tc_netem_rate rate;
qopt.latency = q->latency;
qopt.jitter = q->jitter;
@@ -868,6 +930,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
corrupt.correlation = q->corrupt_cor.rho;
NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+ rate.rate = q->rate;
+ rate.packet_overhead = q->packet_overhead;
+ rate.cell_size = q->cell_size;
+ rate.cell_overhead = q->cell_overhead;
+ NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
+
if (dump_loss_model(q, skb) != 0)
goto nla_put_failure;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d617161f8dd..ce2256a17d7 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,6 +39,7 @@
struct red_sched_data {
u32 limit; /* HARD maximal queue length */
unsigned char flags;
+ struct timer_list adapt_timer;
struct red_parms parms;
struct red_stats stats;
struct Qdisc *qdisc;
@@ -161,12 +162,15 @@ static void red_reset(struct Qdisc *sch)
static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
+
+ del_timer_sync(&q->adapt_timer);
qdisc_destroy(q->qdisc);
}
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
+ [TCA_RED_MAX_P] = { .type = NLA_U32 },
};
static int red_change(struct Qdisc *sch, struct nlattr *opt)
@@ -176,6 +180,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
struct tc_red_qopt *ctl;
struct Qdisc *child = NULL;
int err;
+ u32 max_P;
if (opt == NULL)
return -EINVAL;
@@ -188,6 +193,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
+ max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+
ctl = nla_data(tb[TCA_RED_PARMS]);
if (ctl->limit > 0) {
@@ -206,8 +213,13 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
}
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
- ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_RED_STAB]));
+ ctl->Plog, ctl->Scell_log,
+ nla_data(tb[TCA_RED_STAB]),
+ max_P);
+
+ del_timer(&q->adapt_timer);
+ if (ctl->flags & TC_RED_ADAPTATIVE)
+ mod_timer(&q->adapt_timer, jiffies + HZ/2);
if (!q->qdisc->q.qlen)
red_start_of_idle_period(&q->parms);
@@ -216,11 +228,24 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
+static inline void red_adaptative_timer(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc *)arg;
+ struct red_sched_data *q = qdisc_priv(sch);
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+ spin_lock(root_lock);
+ red_adaptative_algo(&q->parms);
+ mod_timer(&q->adapt_timer, jiffies + HZ/2);
+ spin_unlock(root_lock);
+}
+
static int red_init(struct Qdisc *sch, struct nlattr *opt)
{
struct red_sched_data *q = qdisc_priv(sch);
q->qdisc = &noop_qdisc;
+ setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
return red_change(sch, opt);
}
@@ -243,6 +268,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+ NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index e83c272c032..96e42cae4c7 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,6 +26,7 @@
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
+#include <net/flow_keys.h>
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -286,6 +287,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
u32 minqlen = ~0;
u32 r, slot, salt, sfbhash;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
sch->qstats.overlimits++;
@@ -309,13 +311,19 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, q, &ret, &salt))
goto other_drop;
+ keys.src = salt;
+ keys.dst = 0;
+ keys.ports = 0;
} else {
- salt = skb_get_rxhash(skb);
+ skb_flow_dissect(skb, &keys);
}
slot = q->slot;
- sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+ sfbhash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -347,7 +355,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(p_min >= SFB_MAX_PROB)) {
/* Inelastic flow */
if (q->double_buffering) {
- sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+ sfbhash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4f5510e2bd6..d329a8a7235 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -17,14 +17,13 @@
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/init.h>
-#include <linux/ipv6.h>
#include <linux/skbuff.h>
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/flow_keys.h>
/* Stochastic Fairness Queuing algorithm.
@@ -137,61 +136,31 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_SLOTS];
}
-static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+/*
+ * In order to be able to quickly rehash our queue when timer changes
+ * q->perturbation, we store flow_keys in skb->cb[]
+ */
+struct sfq_skb_cb {
+ struct flow_keys keys;
+};
+
+static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
{
- return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
+ return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
}
-static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(const struct sfq_sched_data *q,
+ const struct sk_buff *skb)
{
- u32 h, h2;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- {
- const struct iphdr *iph;
- int poff;
-
- if (!pskb_network_may_pull(skb, sizeof(*iph)))
- goto err;
- iph = ip_hdr(skb);
- h = (__force u32)iph->daddr;
- h2 = (__force u32)iph->saddr ^ iph->protocol;
- if (ip_is_fragment(iph))
- break;
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0 &&
- pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
- iph = ip_hdr(skb);
- h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
- }
- break;
- }
- case htons(ETH_P_IPV6):
- {
- const struct ipv6hdr *iph;
- int poff;
-
- if (!pskb_network_may_pull(skb, sizeof(*iph)))
- goto err;
- iph = ipv6_hdr(skb);
- h = (__force u32)iph->daddr.s6_addr32[3];
- h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
- poff = proto_ports_offset(iph->nexthdr);
- if (poff >= 0 &&
- pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
- iph = ipv6_hdr(skb);
- h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
- }
- break;
- }
- default:
-err:
- h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
- h2 = (unsigned long)skb->sk;
- }
+ const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
+ unsigned int hash;
- return sfq_fold_hash(q, h, h2);
+ hash = jhash_3words((__force u32)keys->dst,
+ (__force u32)keys->src ^ keys->ip_proto,
+ (__force u32)keys->ports, q->perturbation);
+ return hash & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -206,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
- if (!q->filter_list)
+ if (!q->filter_list) {
+ skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
return sfq_hash(q, skb) + 1;
+ }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, q->filter_list, &res);
@@ -468,12 +439,71 @@ sfq_reset(struct Qdisc *sch)
kfree_skb(skb);
}
+/*
+ * When q->perturbation is changed, we rehash all queued skbs
+ * to avoid OOO (Out Of Order) effects.
+ * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
+ * counters.
+ */
+static void sfq_rehash(struct sfq_sched_data *q)
+{
+ struct sk_buff *skb;
+ int i;
+ struct sfq_slot *slot;
+ struct sk_buff_head list;
+
+ __skb_queue_head_init(&list);
+
+ for (i = 0; i < SFQ_SLOTS; i++) {
+ slot = &q->slots[i];
+ if (!slot->qlen)
+ continue;
+ while (slot->qlen) {
+ skb = slot_dequeue_head(slot);
+ sfq_dec(q, i);
+ __skb_queue_tail(&list, skb);
+ }
+ q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+ }
+ q->tail = NULL;
+
+ while ((skb = __skb_dequeue(&list)) != NULL) {
+ unsigned int hash = sfq_hash(q, skb);
+ sfq_index x = q->ht[hash];
+
+ slot = &q->slots[x];
+ if (x == SFQ_EMPTY_SLOT) {
+ x = q->dep[0].next; /* get a free slot */
+ q->ht[hash] = x;
+ slot = &q->slots[x];
+ slot->hash = hash;
+ }
+ slot_queue_add(slot, skb);
+ sfq_inc(q, x);
+ if (slot->qlen == 1) { /* The flow is new */
+ if (q->tail == NULL) { /* It is the first flow */
+ slot->next = x;
+ } else {
+ slot->next = q->tail->next;
+ q->tail->next = x;
+ }
+ q->tail = slot;
+ slot->allot = q->scaled_quantum;
+ }
+ }
+}
+
static void sfq_perturbation(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc *)arg;
struct sfq_sched_data *q = qdisc_priv(sch);
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spin_lock(root_lock);
q->perturbation = net_random();
+ if (!q->filter_list && q->tail)
+ sfq_rehash(q);
+ spin_unlock(root_lock);
if (q->perturb_period)
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4f4c52c0eeb..45326599fda 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -277,7 +277,7 @@ static inline int teql_resolve(struct sk_buff *skb,
return 0;
rcu_read_lock();
- mn = dst_get_neighbour(dst);
+ mn = dst_get_neighbour_noref(dst);
res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
rcu_read_unlock();
@@ -310,7 +310,7 @@ restart:
if (slave_txq->qdisc_sleeping != q)
continue;
- if (__netif_subqueue_stopped(slave, subq) ||
+ if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
!netif_running(slave)) {
busy = 1;
continue;
@@ -321,7 +321,7 @@ restart:
if (__netif_tx_trylock(slave_txq)) {
unsigned int length = qdisc_pkt_len(skb);
- if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+ if (!netif_xmit_frozen_or_stopped(slave_txq) &&
slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
txq_trans_update(slave_txq);
__netif_tx_unlock(slave_txq);
@@ -333,7 +333,7 @@ restart:
}
__netif_tx_unlock(slave_txq);
}
- if (netif_queue_stopped(dev))
+ if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
busy = 1;
break;
case 1: