summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-05-07 10:26:44 -0600
committerJens Axboe <axboe@fb.com>2014-05-07 10:26:44 -0600
commit506e931f92defdc60c1dc4aa2ff4a19a5dcd8618 (patch)
tree8c0fdc0c0c4186f927246b5164396da446fbc8e5 /block
parent5cf8c2277576fcc48966b105bb42782d7929fc48 (diff)
blk-mq: add basic round-robin of what CPU to queue workqueue work on
Right now we just pick the first CPU in the mask, but that can easily overload that one. Add some basic batching and round-robin all the entries in the mask instead. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq.c45
1 files changed, 31 insertions, 14 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0d379830a27..2410e0cb7ae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -670,6 +670,30 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
}
}
+/*
+ * It'd be great if the workqueue API had a way to pass
+ * in a mask and had some smarts for more clever placement.
+ * For now we just round-robin here, switching for every
+ * BLK_MQ_CPU_WORK_BATCH queued items.
+ */
+static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
+{
+ int cpu = hctx->next_cpu;
+
+ if (--hctx->next_cpu_batch <= 0) {
+ int next_cpu;
+
+ next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(hctx->cpumask);
+
+ hctx->next_cpu = next_cpu;
+ hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+ }
+
+ return cpu;
+}
+
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
@@ -682,13 +706,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
else {
unsigned int cpu;
- /*
- * It'd be great if the workqueue API had a way to pass
- * in a mask and had some smarts for more clever placement
- * than the first CPU. Or we could round-robin here. For now,
- * just queue on the first CPU.
- */
- cpu = cpumask_first(hctx->cpumask);
+ cpu = blk_mq_hctx_next_cpu(hctx);
kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
}
}
@@ -795,13 +813,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
else {
unsigned int cpu;
- /*
- * It'd be great if the workqueue API had a way to pass
- * in a mask and had some smarts for more clever placement
- * than the first CPU. Or we could round-robin here. For now,
- * just queue on the first CPU.
- */
- cpu = cpumask_first(hctx->cpumask);
+ cpu = blk_mq_hctx_next_cpu(hctx);
kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
}
}
@@ -1378,6 +1390,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx;
}
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ hctx->next_cpu = cpumask_first(hctx->cpumask);
+ hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+ }
}
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)