summaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c139
1 files changed, 65 insertions, 74 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0f5879c42dc..ad69ef657e8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -33,28 +33,6 @@ static LIST_HEAD(all_q_list);
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
-static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
- unsigned int cpu)
-{
- return per_cpu_ptr(q->queue_ctx, cpu);
-}
-
-/*
- * This assumes per-cpu software queueing queues. They could be per-node
- * as well, for instance. For now this is hardcoded as-is. Note that we don't
- * care about preemption, since we know the ctx's are persistent. This does
- * mean that we can't rely on ctx always matching the currently running CPU.
- */
-static struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
-{
- return __blk_mq_get_ctx(q, get_cpu());
-}
-
-static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
-{
- put_cpu();
-}
-
/*
* Check if any of the ctx's have pending work in this hardware queue
*/
@@ -104,8 +82,10 @@ static int blk_mq_queue_enter(struct request_queue *q)
__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
smp_wmb();
- /* we have problems to freeze the queue if it's initializing */
- if (!blk_queue_bypass(q) || !blk_queue_init_done(q))
+
+ /* we have problems freezing the queue if it's initializing */
+ if (!blk_queue_dying(q) &&
+ (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
return 0;
__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
@@ -129,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q)
__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
}
-static void __blk_mq_drain_queue(struct request_queue *q)
+void blk_mq_drain_queue(struct request_queue *q)
{
while (true) {
s64 count;
@@ -140,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q)
if (count == 0)
break;
- blk_mq_run_queues(q, false);
+ blk_mq_start_hw_queues(q);
msleep(10);
}
}
@@ -159,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
if (drain)
- __blk_mq_drain_queue(q);
-}
-
-void blk_mq_drain_queue(struct request_queue *q)
-{
- __blk_mq_drain_queue(q);
+ blk_mq_drain_queue(q);
}
static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -205,6 +180,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
RB_CLEAR_NODE(&rq->rb_node);
rq->rq_disk = NULL;
rq->part = NULL;
+ rq->start_time = jiffies;
#ifdef CONFIG_BLK_CGROUP
rq->rl = NULL;
set_start_time_ns(rq);
@@ -224,6 +200,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
rq->sense = NULL;
INIT_LIST_HEAD(&rq->timeout_list);
+ rq->timeout = 0;
+
rq->end_io = NULL;
rq->end_io_data = NULL;
rq->next_rq = NULL;
@@ -232,24 +210,23 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
}
static struct request *
-__blk_mq_alloc_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx, int rw, gfp_t gfp, bool reserved)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
{
struct request *rq;
unsigned int tag;
- tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved);
+ tag = blk_mq_get_tag(data);
if (tag != BLK_MQ_TAG_FAIL) {
- rq = hctx->tags->rqs[tag];
+ rq = data->hctx->tags->rqs[tag];
rq->cmd_flags = 0;
- if (blk_mq_tag_busy(hctx)) {
+ if (blk_mq_tag_busy(data->hctx)) {
rq->cmd_flags = REQ_MQ_INFLIGHT;
- atomic_inc(&hctx->nr_active);
+ atomic_inc(&data->hctx->nr_active);
}
rq->tag = tag;
- blk_mq_rq_ctx_init(q, ctx, rq, rw);
+ blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw);
return rq;
}
@@ -262,22 +239,27 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
struct blk_mq_ctx *ctx;
struct blk_mq_hw_ctx *hctx;
struct request *rq;
+ struct blk_mq_alloc_data alloc_data;
if (blk_mq_queue_enter(q))
return NULL;
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
+ blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT,
+ reserved, ctx, hctx);
- rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp & ~__GFP_WAIT,
- reserved);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
if (!rq && (gfp & __GFP_WAIT)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp, reserved);
+ blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx,
+ hctx);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
+ ctx = alloc_data.ctx;
}
blk_mq_put_ctx(ctx);
return rq;
@@ -424,16 +406,7 @@ static void blk_mq_start_request(struct request *rq, bool last)
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
- /*
- * Just mark start time and set the started bit. Due to memory
- * ordering, we know we'll see the correct deadline as long as
- * REQ_ATOMIC_STARTED is seen. Use the default queue timeout,
- * unless one has been set in the request.
- */
- if (!rq->timeout)
- rq->deadline = jiffies + q->rq_timeout;
- else
- rq->deadline = jiffies + rq->timeout;
+ blk_add_timer(rq);
/*
* Mark us as started and clear complete. Complete might have been
@@ -547,15 +520,20 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
}
EXPORT_SYMBOL(blk_mq_kick_requeue_list);
-struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static inline bool is_flush_request(struct request *rq, unsigned int tag)
{
- struct request_queue *q = hctx->queue;
+ return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
+ rq->q->flush_rq->tag == tag);
+}
+
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+{
+ struct request *rq = tags->rqs[tag];
- if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) &&
- q->flush_rq->tag == tag)
- return q->flush_rq;
+ if (!is_flush_request(rq, tag))
+ return rq;
- return hctx->tags->rqs[tag];
+ return rq->q->flush_rq;
}
EXPORT_SYMBOL(blk_mq_tag_to_rq);
@@ -584,7 +562,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
if (tag >= hctx->tags->nr_tags)
break;
- rq = blk_mq_tag_to_rq(hctx, tag++);
+ rq = blk_mq_tag_to_rq(hctx->tags, tag++);
if (rq->q != hctx->queue)
continue;
if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
@@ -900,7 +878,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
preempt_disable();
- __blk_mq_run_hw_queue(hctx);
+ blk_mq_run_hw_queue(hctx, false);
preempt_enable();
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);
@@ -980,11 +958,6 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
list_add_tail(&rq->queuelist, &ctx->rq_list);
blk_mq_hctx_mark_pending(hctx, ctx);
-
- /*
- * We do this early, to ensure we are on the right CPU.
- */
- blk_add_timer(rq);
}
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
@@ -1113,10 +1086,8 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
{
init_request_from_bio(rq, bio);
- if (blk_do_io_stat(rq)) {
- rq->start_time = jiffies;
+ if (blk_do_io_stat(rq))
blk_account_io_start(rq, 1);
- }
}
static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
@@ -1158,6 +1129,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
struct blk_mq_ctx *ctx;
struct request *rq;
int rw = bio_data_dir(bio);
+ struct blk_mq_alloc_data alloc_data;
if (unlikely(blk_mq_queue_enter(q))) {
bio_endio(bio, -EIO);
@@ -1171,7 +1143,9 @@ static struct request *blk_mq_map_request(struct request_queue *q,
rw |= REQ_SYNC;
trace_block_getrq(q, bio, rw);
- rq = __blk_mq_alloc_request(q, hctx, ctx, rw, GFP_ATOMIC, false);
+ blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx,
+ hctx);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
if (unlikely(!rq)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
@@ -1179,8 +1153,11 @@ static struct request *blk_mq_map_request(struct request_queue *q,
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- rq = __blk_mq_alloc_request(q, hctx, ctx, rw,
- __GFP_WAIT|GFP_ATOMIC, false);
+ blk_mq_set_alloc_data(&alloc_data, q,
+ __GFP_WAIT|GFP_ATOMIC, false, ctx, hctx);
+ rq = __blk_mq_alloc_request(&alloc_data, rw);
+ ctx = alloc_data.ctx;
+ hctx = alloc_data.hctx;
}
hctx->queued++;
@@ -1223,7 +1200,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio);
blk_mq_start_request(rq, true);
- blk_add_timer(rq);
/*
* For OK queue, we are done. For error, kill it. Any other
@@ -1288,6 +1264,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
return;
rq = blk_mq_map_request(q, bio, &data);
+ if (unlikely(!rq))
+ return;
if (unlikely(is_flush_fua)) {
blk_mq_bio_to_request(rq, bio);
@@ -1562,6 +1540,8 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
if (i == nr_queue)
break;
+ blk_mq_tag_idle(hctx);
+
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, i);
@@ -1779,7 +1759,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{
struct blk_mq_hw_ctx **hctxs;
- struct blk_mq_ctx *ctx;
+ struct blk_mq_ctx __percpu *ctx;
struct request_queue *q;
unsigned int *map;
int i;
@@ -1970,13 +1950,19 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
return NOTIFY_OK;
}
+/*
+ * Alloc a tag set to be associated with one or more request queues.
+ * May fail with EINVAL for various error conditions. May adjust the
+ * requested depth down, if if it too large. In that case, the set
+ * value will be stored in set->queue_depth.
+ */
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
int i;
if (!set->nr_hw_queues)
return -EINVAL;
- if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
+ if (!set->queue_depth)
return -EINVAL;
if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
return -EINVAL;
@@ -1984,6 +1970,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
return -EINVAL;
+ if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
+ pr_info("blk-mq: reduced tag depth to %u\n",
+ BLK_MQ_MAX_DEPTH);
+ set->queue_depth = BLK_MQ_MAX_DEPTH;
+ }
set->tags = kmalloc_node(set->nr_hw_queues *
sizeof(struct blk_mq_tags *),