summaryrefslogtreecommitdiffstats
path: root/net/sched/sch_sfq.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/sch_sfq.c')
-rw-r--r--net/sched/sch_sfq.c202
1 files changed, 123 insertions, 79 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4f5510e2bd6..843018154a5 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -17,14 +17,13 @@
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/init.h>
-#include <linux/ipv6.h>
#include <linux/skbuff.h>
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/flow_keys.h>
/* Stochastic Fairness Queuing algorithm.
@@ -137,61 +136,31 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_SLOTS];
}
-static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+/*
+ * In order to be able to quickly rehash our queue when timer changes
+ * q->perturbation, we store flow_keys in skb->cb[]
+ */
+struct sfq_skb_cb {
+ struct flow_keys keys;
+};
+
+static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
{
- return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
+ return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
}
-static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(const struct sfq_sched_data *q,
+ const struct sk_buff *skb)
{
- u32 h, h2;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- {
- const struct iphdr *iph;
- int poff;
-
- if (!pskb_network_may_pull(skb, sizeof(*iph)))
- goto err;
- iph = ip_hdr(skb);
- h = (__force u32)iph->daddr;
- h2 = (__force u32)iph->saddr ^ iph->protocol;
- if (ip_is_fragment(iph))
- break;
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0 &&
- pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
- iph = ip_hdr(skb);
- h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
- }
- break;
- }
- case htons(ETH_P_IPV6):
- {
- const struct ipv6hdr *iph;
- int poff;
-
- if (!pskb_network_may_pull(skb, sizeof(*iph)))
- goto err;
- iph = ipv6_hdr(skb);
- h = (__force u32)iph->daddr.s6_addr32[3];
- h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
- poff = proto_ports_offset(iph->nexthdr);
- if (poff >= 0 &&
- pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
- iph = ipv6_hdr(skb);
- h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
- }
- break;
- }
- default:
-err:
- h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
- h2 = (unsigned long)skb->sk;
- }
+ const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
+ unsigned int hash;
- return sfq_fold_hash(q, h, h2);
+ hash = jhash_3words((__force u32)keys->dst,
+ (__force u32)keys->src ^ keys->ip_proto,
+ (__force u32)keys->ports, q->perturbation);
+ return hash & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -206,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
- if (!q->filter_list)
+ if (!q->filter_list) {
+ skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
return sfq_hash(q, skb) + 1;
+ }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, q->filter_list, &res);
@@ -395,11 +366,11 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (slot->qlen == 1) { /* The flow is new */
if (q->tail == NULL) { /* It is the first flow */
slot->next = x;
+ q->tail = slot;
} else {
slot->next = q->tail->next;
q->tail->next = x;
}
- q->tail = slot;
slot->allot = q->scaled_quantum;
}
if (++sch->q.qlen <= q->limit)
@@ -468,12 +439,71 @@ sfq_reset(struct Qdisc *sch)
kfree_skb(skb);
}
+/*
+ * When q->perturbation is changed, we rehash all queued skbs
+ * to avoid OOO (Out Of Order) effects.
+ * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
+ * counters.
+ */
+static void sfq_rehash(struct sfq_sched_data *q)
+{
+ struct sk_buff *skb;
+ int i;
+ struct sfq_slot *slot;
+ struct sk_buff_head list;
+
+ __skb_queue_head_init(&list);
+
+ for (i = 0; i < SFQ_SLOTS; i++) {
+ slot = &q->slots[i];
+ if (!slot->qlen)
+ continue;
+ while (slot->qlen) {
+ skb = slot_dequeue_head(slot);
+ sfq_dec(q, i);
+ __skb_queue_tail(&list, skb);
+ }
+ q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+ }
+ q->tail = NULL;
+
+ while ((skb = __skb_dequeue(&list)) != NULL) {
+ unsigned int hash = sfq_hash(q, skb);
+ sfq_index x = q->ht[hash];
+
+ slot = &q->slots[x];
+ if (x == SFQ_EMPTY_SLOT) {
+ x = q->dep[0].next; /* get a free slot */
+ q->ht[hash] = x;
+ slot = &q->slots[x];
+ slot->hash = hash;
+ }
+ slot_queue_add(slot, skb);
+ sfq_inc(q, x);
+ if (slot->qlen == 1) { /* The flow is new */
+ if (q->tail == NULL) { /* It is the first flow */
+ slot->next = x;
+ } else {
+ slot->next = q->tail->next;
+ q->tail->next = x;
+ }
+ q->tail = slot;
+ slot->allot = q->scaled_quantum;
+ }
+ }
+}
+
static void sfq_perturbation(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc *)arg;
struct sfq_sched_data *q = qdisc_priv(sch);
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spin_lock(root_lock);
q->perturbation = net_random();
+ if (!q->filter_list && q->tail)
+ sfq_rehash(q);
+ spin_unlock(root_lock);
if (q->perturb_period)
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
@@ -514,10 +544,38 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
+static void *sfq_alloc(size_t sz)
+{
+ void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
+
+ if (!ptr)
+ ptr = vmalloc(sz);
+ return ptr;
+}
+
+static void sfq_free(void *addr)
+{
+ if (addr) {
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+ }
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+
+ tcf_destroy_chain(&q->filter_list);
+ q->perturb_period = 0;
+ del_timer_sync(&q->perturb_timer);
+ sfq_free(q->ht);
+}
+
static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- size_t sz;
int i;
q->perturb_timer.function = sfq_perturbation;
@@ -533,23 +591,22 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->cur_depth = 0;
q->tail = NULL;
q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
- if (opt == NULL) {
- q->quantum = psched_mtu(qdisc_dev(sch));
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
- q->perturb_period = 0;
- q->perturbation = net_random();
- } else {
+ q->quantum = psched_mtu(qdisc_dev(sch));
+ q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+ q->perturb_period = 0;
+ q->perturbation = net_random();
+
+ if (opt) {
int err = sfq_change(sch, opt);
if (err)
return err;
}
- sz = sizeof(q->ht[0]) * q->divisor;
- q->ht = kmalloc(sz, GFP_KERNEL);
- if (!q->ht && sz > PAGE_SIZE)
- q->ht = vmalloc(sz);
- if (!q->ht)
+ q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
+ if (!q->ht) {
+ sfq_destroy(sch);
return -ENOMEM;
+ }
for (i = 0; i < q->divisor; i++)
q->ht[i] = SFQ_EMPTY_SLOT;
@@ -564,19 +621,6 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static void sfq_destroy(struct Qdisc *sch)
-{
- struct sfq_sched_data *q = qdisc_priv(sch);
-
- tcf_destroy_chain(&q->filter_list);
- q->perturb_period = 0;
- del_timer_sync(&q->perturb_timer);
- if (is_vmalloc_addr(q->ht))
- vfree(q->ht);
- else
- kfree(q->ht);
-}
-
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct sfq_sched_data *q = qdisc_priv(sch);