diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 139 |
1 files changed, 65 insertions, 74 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 0f5879c42dc..ad69ef657e8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -33,28 +33,6 @@ static LIST_HEAD(all_q_list); static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); -static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, - unsigned int cpu) -{ - return per_cpu_ptr(q->queue_ctx, cpu); -} - -/* - * This assumes per-cpu software queueing queues. They could be per-node - * as well, for instance. For now this is hardcoded as-is. Note that we don't - * care about preemption, since we know the ctx's are persistent. This does - * mean that we can't rely on ctx always matching the currently running CPU. - */ -static struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) -{ - return __blk_mq_get_ctx(q, get_cpu()); -} - -static void blk_mq_put_ctx(struct blk_mq_ctx *ctx) -{ - put_cpu(); -} - /* * Check if any of the ctx's have pending work in this hardware queue */ @@ -104,8 +82,10 @@ static int blk_mq_queue_enter(struct request_queue *q) __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); smp_wmb(); - /* we have problems to freeze the queue if it's initializing */ - if (!blk_queue_bypass(q) || !blk_queue_init_done(q)) + + /* we have problems freezing the queue if it's initializing */ + if (!blk_queue_dying(q) && + (!blk_queue_bypass(q) || !blk_queue_init_done(q))) return 0; __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); @@ -129,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q) __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); } -static void __blk_mq_drain_queue(struct request_queue *q) +void blk_mq_drain_queue(struct request_queue *q) { while (true) { s64 count; @@ -140,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q) if (count == 0) break; - blk_mq_run_queues(q, false); + blk_mq_start_hw_queues(q); msleep(10); } } @@ -159,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); if (drain) - __blk_mq_drain_queue(q); -} - -void blk_mq_drain_queue(struct request_queue *q) -{ - __blk_mq_drain_queue(q); + blk_mq_drain_queue(q); } static void blk_mq_unfreeze_queue(struct request_queue *q) @@ -205,6 +180,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, RB_CLEAR_NODE(&rq->rb_node); rq->rq_disk = NULL; rq->part = NULL; + rq->start_time = jiffies; #ifdef CONFIG_BLK_CGROUP rq->rl = NULL; set_start_time_ns(rq); @@ -224,6 +200,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->sense = NULL; INIT_LIST_HEAD(&rq->timeout_list); + rq->timeout = 0; + rq->end_io = NULL; rq->end_io_data = NULL; rq->next_rq = NULL; @@ -232,24 +210,23 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, } static struct request * -__blk_mq_alloc_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, int rw, gfp_t gfp, bool reserved) +__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) { struct request *rq; unsigned int tag; - tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved); + tag = blk_mq_get_tag(data); if (tag != BLK_MQ_TAG_FAIL) { - rq = hctx->tags->rqs[tag]; + rq = data->hctx->tags->rqs[tag]; rq->cmd_flags = 0; - if (blk_mq_tag_busy(hctx)) { + if (blk_mq_tag_busy(data->hctx)) { rq->cmd_flags = REQ_MQ_INFLIGHT; - atomic_inc(&hctx->nr_active); + atomic_inc(&data->hctx->nr_active); } rq->tag = tag; - blk_mq_rq_ctx_init(q, ctx, rq, rw); + blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw); return rq; } @@ -262,22 +239,27 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; struct request *rq; + struct blk_mq_alloc_data alloc_data; if (blk_mq_queue_enter(q)) return NULL; ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); + blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT, + reserved, ctx, hctx); - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp & ~__GFP_WAIT, - reserved); + rq = __blk_mq_alloc_request(&alloc_data, rw); if (!rq && (gfp & __GFP_WAIT)) { __blk_mq_run_hw_queue(hctx); blk_mq_put_ctx(ctx); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp, reserved); + blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx, + hctx); + rq = __blk_mq_alloc_request(&alloc_data, rw); + ctx = alloc_data.ctx; } blk_mq_put_ctx(ctx); return rq; @@ -424,16 +406,7 @@ static void blk_mq_start_request(struct request *rq, bool last) if (unlikely(blk_bidi_rq(rq))) rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); - /* - * Just mark start time and set the started bit. Due to memory - * ordering, we know we'll see the correct deadline as long as - * REQ_ATOMIC_STARTED is seen. Use the default queue timeout, - * unless one has been set in the request. - */ - if (!rq->timeout) - rq->deadline = jiffies + q->rq_timeout; - else - rq->deadline = jiffies + rq->timeout; + blk_add_timer(rq); /* * Mark us as started and clear complete. Complete might have been @@ -547,15 +520,20 @@ void blk_mq_kick_requeue_list(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_kick_requeue_list); -struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static inline bool is_flush_request(struct request *rq, unsigned int tag) { - struct request_queue *q = hctx->queue; + return ((rq->cmd_flags & REQ_FLUSH_SEQ) && + rq->q->flush_rq->tag == tag); +} + +struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) +{ + struct request *rq = tags->rqs[tag]; - if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) && - q->flush_rq->tag == tag) - return q->flush_rq; + if (!is_flush_request(rq, tag)) + return rq; - return hctx->tags->rqs[tag]; + return rq->q->flush_rq; } EXPORT_SYMBOL(blk_mq_tag_to_rq); @@ -584,7 +562,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) if (tag >= hctx->tags->nr_tags) break; - rq = blk_mq_tag_to_rq(hctx, tag++); + rq = blk_mq_tag_to_rq(hctx->tags, tag++); if (rq->q != hctx->queue) continue; if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) @@ -900,7 +878,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) clear_bit(BLK_MQ_S_STOPPED, &hctx->state); preempt_disable(); - __blk_mq_run_hw_queue(hctx); + blk_mq_run_hw_queue(hctx, false); preempt_enable(); } EXPORT_SYMBOL(blk_mq_start_hw_queue); @@ -980,11 +958,6 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, list_add_tail(&rq->queuelist, &ctx->rq_list); blk_mq_hctx_mark_pending(hctx, ctx); - - /* - * We do this early, to ensure we are on the right CPU. - */ - blk_add_timer(rq); } void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, @@ -1113,10 +1086,8 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) { init_request_from_bio(rq, bio); - if (blk_do_io_stat(rq)) { - rq->start_time = jiffies; + if (blk_do_io_stat(rq)) blk_account_io_start(rq, 1); - } } static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, @@ -1158,6 +1129,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, struct blk_mq_ctx *ctx; struct request *rq; int rw = bio_data_dir(bio); + struct blk_mq_alloc_data alloc_data; if (unlikely(blk_mq_queue_enter(q))) { bio_endio(bio, -EIO); @@ -1171,7 +1143,9 @@ static struct request *blk_mq_map_request(struct request_queue *q, rw |= REQ_SYNC; trace_block_getrq(q, bio, rw); - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, GFP_ATOMIC, false); + blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx, + hctx); + rq = __blk_mq_alloc_request(&alloc_data, rw); if (unlikely(!rq)) { __blk_mq_run_hw_queue(hctx); blk_mq_put_ctx(ctx); @@ -1179,8 +1153,11 @@ static struct request *blk_mq_map_request(struct request_queue *q, ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq = __blk_mq_alloc_request(q, hctx, ctx, rw, - __GFP_WAIT|GFP_ATOMIC, false); + blk_mq_set_alloc_data(&alloc_data, q, + __GFP_WAIT|GFP_ATOMIC, false, ctx, hctx); + rq = __blk_mq_alloc_request(&alloc_data, rw); + ctx = alloc_data.ctx; + hctx = alloc_data.hctx; } hctx->queued++; @@ -1223,7 +1200,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_bio_to_request(rq, bio); blk_mq_start_request(rq, true); - blk_add_timer(rq); /* * For OK queue, we are done. For error, kill it. Any other @@ -1288,6 +1264,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) return; rq = blk_mq_map_request(q, bio, &data); + if (unlikely(!rq)) + return; if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); @@ -1562,6 +1540,8 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, if (i == nr_queue) break; + blk_mq_tag_idle(hctx); + if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, i); @@ -1779,7 +1759,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { struct blk_mq_hw_ctx **hctxs; - struct blk_mq_ctx *ctx; + struct blk_mq_ctx __percpu *ctx; struct request_queue *q; unsigned int *map; int i; @@ -1970,13 +1950,19 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, return NOTIFY_OK; } +/* + * Alloc a tag set to be associated with one or more request queues. + * May fail with EINVAL for various error conditions. May adjust the + * requested depth down, if if it too large. In that case, the set + * value will be stored in set->queue_depth. + */ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) { int i; if (!set->nr_hw_queues) return -EINVAL; - if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH) + if (!set->queue_depth) return -EINVAL; if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) return -EINVAL; @@ -1984,6 +1970,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) return -EINVAL; + if (set->queue_depth > BLK_MQ_MAX_DEPTH) { + pr_info("blk-mq: reduced tag depth to %u\n", + BLK_MQ_MAX_DEPTH); + set->queue_depth = BLK_MQ_MAX_DEPTH; + } set->tags = kmalloc_node(set->nr_hw_queues * sizeof(struct blk_mq_tags *), |