From f0276924fa35a3607920a58cf5d878212824b951 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 31 Dec 2013 11:38:50 +0800 Subject: blk-mq: Don't reserve a tag for flush request Reserving a tag (request) for flush to avoid dead lock is a overkill. A tag is valuable resource. We can track the number of flush requests and disallow having too many pending flush requests allocated. With this patch, blk_mq_alloc_request_pinned() could do a busy nop (but not a dead loop) if too many pending requests are allocated and new flush request is allocated. But this should not be a problem, too many pending flush requests are very rare case. I verified this can fix the deadlock caused by too many pending flush requests. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/blk-mq.h') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 161b23105b1..1e8f16f65af 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -36,12 +36,15 @@ struct blk_mq_hw_ctx { struct list_head page_list; struct blk_mq_tags *tags; + atomic_t pending_flush; + unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int queue_depth; + unsigned int reserved_tags; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ -- cgit v1.2.3-70-g09d2 From 72a0a36e2854a6eadb4cf2561858f613f9cd4639 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Feb 2014 10:22:36 -0800 Subject: blk-mq: support at_head inserations for blk_execute_rq This is neede for proper SG_IO operation as well as various uses of blk_execute_rq from the SCSI midlayer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-exec.c | 2 +- block/blk-mq.c | 17 ++++++++++------- include/linux/blk-mq.h | 3 ++- 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux/blk-mq.h') diff --git a/block/blk-exec.c b/block/blk-exec.c index bbfc072a79c..c68613bb4c7 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be resued after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(q, rq, true); + blk_mq_insert_request(q, rq, at_head, true); return; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 9072d0ab184..c9306e3403f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -714,13 +714,16 @@ static void blk_mq_work_fn(struct work_struct *work) } static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, - struct request *rq) + struct request *rq, bool at_head) { struct blk_mq_ctx *ctx = rq->mq_ctx; trace_block_rq_insert(hctx->queue, rq); - list_add_tail(&rq->queuelist, &ctx->rq_list); + if (at_head) + list_add(&rq->queuelist, &ctx->rq_list); + else + list_add_tail(&rq->queuelist, &ctx->rq_list); blk_mq_hctx_mark_pending(hctx, ctx); /* @@ -730,7 +733,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, } void blk_mq_insert_request(struct request_queue *q, struct request *rq, - bool run_queue) + bool at_head, bool run_queue) { struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx, *current_ctx; @@ -749,7 +752,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, rq->mq_ctx = ctx; } spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, at_head); spin_unlock(&ctx->lock); blk_mq_put_ctx(current_ctx); @@ -781,7 +784,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async) /* ctx->cpu might be offline */ spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, false); spin_unlock(&ctx->lock); blk_mq_put_ctx(current_ctx); @@ -819,7 +822,7 @@ static void blk_mq_insert_requests(struct request_queue *q, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); rq->mq_ctx = ctx; - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, false); } spin_unlock(&ctx->lock); @@ -971,7 +974,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) __blk_mq_free_request(hctx, ctx, rq); else { blk_mq_bio_to_request(rq, bio); - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, false); } spin_unlock(&ctx->lock); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1e8f16f65af..b7638be5859 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -122,7 +122,8 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, bool); +void blk_mq_insert_request(struct request_queue *, struct request *, + bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v1.2.3-70-g09d2 From 30a91cb4ef385fe1b260df204ef314d86fff2850 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 03:24:38 -0800 Subject: blk-mq: rework I/O completions Rework I/O completions to work more like the old code path. blk_mq_end_io now stays out of the business of deferring completions to others CPUs and calling blk_mark_rq_complete. The latter is very important to allow completing requests that have timed out and thus are already marked completed, the former allows using the IPI callout even for driver specific completions instead of having to reimplement them. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 52 ++++++++++++++++++++++++++++++-------------------- block/blk-mq.h | 3 +-- block/blk-timeout.c | 2 +- include/linux/blk-mq.h | 4 ++++ 4 files changed, 37 insertions(+), 24 deletions(-) (limited to 'include/linux/blk-mq.h') diff --git a/block/blk-mq.c b/block/blk-mq.c index cee96234bf5..14c8f35946e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -326,7 +326,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) bio_endio(bio, error); } -void blk_mq_complete_request(struct request *rq, int error) +void blk_mq_end_io(struct request *rq, int error) { struct bio *bio = rq->bio; unsigned int bytes = 0; @@ -351,46 +351,53 @@ void blk_mq_complete_request(struct request *rq, int error) else blk_mq_free_request(rq); } +EXPORT_SYMBOL(blk_mq_end_io); -void __blk_mq_end_io(struct request *rq, int error) -{ - if (!blk_mark_rq_complete(rq)) - blk_mq_complete_request(rq, error); -} - -static void blk_mq_end_io_remote(void *data) +static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; - __blk_mq_end_io(rq, rq->errors); + rq->q->softirq_done_fn(rq); } -/* - * End IO on this request on a multiqueue enabled driver. We'll either do - * it directly inline, or punt to a local IPI handler on the matching - * remote CPU. - */ -void blk_mq_end_io(struct request *rq, int error) +void __blk_mq_complete_request(struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; int cpu; - if (!ctx->ipi_redirect) - return __blk_mq_end_io(rq, error); + if (!ctx->ipi_redirect) { + rq->q->softirq_done_fn(rq); + return; + } cpu = get_cpu(); if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { - rq->errors = error; - rq->csd.func = blk_mq_end_io_remote; + rq->csd.func = __blk_mq_complete_request_remote; rq->csd.info = rq; rq->csd.flags = 0; __smp_call_function_single(ctx->cpu, &rq->csd, 0); } else { - __blk_mq_end_io(rq, error); + rq->q->softirq_done_fn(rq); } put_cpu(); } -EXPORT_SYMBOL(blk_mq_end_io); + +/** + * blk_mq_complete_request - end I/O on a request + * @rq: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions. + * The actual completion happens out-of-order, through a IPI handler. + **/ +void blk_mq_complete_request(struct request *rq) +{ + if (unlikely(blk_should_fake_timeout(rq->q))) + return; + if (!blk_mark_rq_complete(rq)) + __blk_mq_complete_request(rq); +} +EXPORT_SYMBOL(blk_mq_complete_request); static void blk_mq_start_request(struct request *rq) { @@ -1399,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, if (reg->timeout) blk_queue_rq_timeout(q, reg->timeout); + if (reg->ops->complete) + blk_queue_softirq_done(q, reg->ops->complete); + blk_mq_init_flush(q); blk_mq_init_cpu_queues(q, reg->nr_hw_queues); diff --git a/block/blk-mq.h b/block/blk-mq.h index 5c3917984b0..f29b645f0e1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -22,8 +22,7 @@ struct blk_mq_ctx { struct kobject kobj; }; -void __blk_mq_end_io(struct request *rq, int error); -void blk_mq_complete_request(struct request *rq, int error); +void __blk_mq_complete_request(struct request *rq); void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index bba81c9348e..d96f7061c6f 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req) case BLK_EH_HANDLED: /* Can we use req->errors here? */ if (q->mq_ops) - blk_mq_complete_request(req, req->errors); + __blk_mq_complete_request(req); else __blk_complete_request(req); break; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b7638be5859..468be242db9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,6 +86,8 @@ struct blk_mq_ops { */ rq_timed_out_fn *timeout; + softirq_done_fn *complete; + /* * Override for hctx allocations (should probably go) */ @@ -137,6 +139,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); +void blk_mq_complete_request(struct request *rq); + void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -- cgit v1.2.3-70-g09d2 From 18741986a4b1dc4b1f171634c4191abc3b0fa023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 09:29:00 -0700 Subject: blk-mq: rework flush sequencing logic Witch to using a preallocated flush_rq for blk-mq similar to what's done with the old request path. This allows us to set up the request properly with a tag from the actually allowed range and ->rq_disk as needed by some drivers. To make life easier we also switch to dynamic allocation of ->flush_rq for the old path. This effectively reverts most of "blk-mq: fix for flush deadlock" and "blk-mq: Don't reserve a tag for flush request" Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 15 +++++-- block/blk-flush.c | 105 ++++++++++++++++++------------------------------- block/blk-mq.c | 54 +++++++++---------------- block/blk-mq.h | 1 + block/blk-sysfs.c | 2 + include/linux/blk-mq.h | 5 +-- include/linux/blkdev.h | 11 ++---- 7 files changed, 76 insertions(+), 117 deletions(-) (limited to 'include/linux/blk-mq.h') diff --git a/block/blk-core.c b/block/blk-core.c index 06636f3ad42..853f9274920 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; + uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); + if (!uninit_q->flush_rq) + goto out_cleanup_queue; + q = blk_init_allocated_queue(uninit_q, rfn, lock); if (!q) - blk_cleanup_queue(uninit_q); - + goto out_free_flush_rq; return q; + +out_free_flush_rq: + kfree(uninit_q->flush_rq); +out_cleanup_queue: + blk_cleanup_queue(uninit_q); + return NULL; } EXPORT_SYMBOL(blk_init_queue_node); @@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { if (q->mq_ops) - return blk_mq_alloc_request(q, rw, gfp_mask, false); + return blk_mq_alloc_request(q, rw, gfp_mask); else return blk_old_get_request(q, rw, gfp_mask); } diff --git a/block/blk-flush.c b/block/blk-flush.c index 9143e85226c..66e2b697f5d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq) blk_clear_rq_complete(rq); } -static void mq_flush_data_run(struct work_struct *work) +static void mq_flush_run(struct work_struct *work) { struct request *rq; - rq = container_of(work, struct request, mq_flush_data); + rq = container_of(work, struct request, mq_flush_work); memset(&rq->csd, 0, sizeof(rq->csd)); blk_mq_run_request(rq, true, false); } -static void blk_mq_flush_data_insert(struct request *rq) +static bool blk_flush_queue_rq(struct request *rq) { - INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); - kblockd_schedule_work(rq->q, &rq->mq_flush_data); + if (rq->q->mq_ops) { + INIT_WORK(&rq->mq_flush_work, mq_flush_run); + kblockd_schedule_work(rq->q, &rq->mq_flush_work); + return false; + } else { + list_add_tail(&rq->queuelist, &rq->q->queue_head); + return true; + } } /** @@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, case REQ_FSEQ_DATA: list_move_tail(&rq->flush.list, &q->flush_data_in_flight); - if (q->mq_ops) - blk_mq_flush_data_insert(rq); - else { - list_add(&rq->queuelist, &q->queue_head); - queued = true; - } + queued = blk_flush_queue_rq(rq); break; case REQ_FSEQ_DONE: @@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, } kicked = blk_kick_flush(q); - /* blk_mq_run_flush will run queue */ - if (q->mq_ops) - return queued; return kicked | queued; } @@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error) struct request *rq, *n; unsigned long flags = 0; - if (q->mq_ops) { - blk_mq_free_request(flush_rq); + if (q->mq_ops) spin_lock_irqsave(&q->mq_flush_lock, flags); - } + running = &q->flush_queue[q->flush_running_idx]; BUG_ON(q->flush_pending_idx == q->flush_running_idx); @@ -263,48 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error) * kblockd. */ if (queued || q->flush_queue_delayed) { - if (!q->mq_ops) - blk_run_queue_async(q); - else - /* - * This can be optimized to only run queues with requests - * queued if necessary. - */ - blk_mq_run_queues(q, true); + WARN_ON(q->mq_ops); + blk_run_queue_async(q); } q->flush_queue_delayed = 0; if (q->mq_ops) spin_unlock_irqrestore(&q->mq_flush_lock, flags); } -static void mq_flush_work(struct work_struct *work) -{ - struct request_queue *q; - struct request *rq; - - q = container_of(work, struct request_queue, mq_flush_work); - - rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, - __GFP_WAIT|GFP_ATOMIC, false); - rq->cmd_type = REQ_TYPE_FS; - rq->end_io = flush_end_io; - - blk_mq_run_request(rq, true, false); -} - -/* - * We can't directly use q->flush_rq, because it doesn't have tag and is not in - * hctx->rqs[]. so we must allocate a new request, since we can't sleep here, - * so offload the work to workqueue. - * - * Note: we assume a flush request finished in any hardware queue will flush - * the whole disk cache. - */ -static void mq_run_flush(struct request_queue *q) -{ - kblockd_schedule_work(q, &q->mq_flush_work); -} - /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked @@ -339,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q) * different from running_idx, which means flush is in flight. */ q->flush_pending_idx ^= 1; + if (q->mq_ops) { - mq_run_flush(q); - return true; + struct blk_mq_ctx *ctx = first_rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); + + blk_mq_rq_init(hctx, q->flush_rq); + q->flush_rq->mq_ctx = ctx; + + /* + * Reuse the tag value from the fist waiting request, + * with blk-mq the tag is generated during request + * allocation and drivers can rely on it being inside + * the range they asked for. + */ + q->flush_rq->tag = first_rq->tag; + } else { + blk_rq_init(q, q->flush_rq); } - blk_rq_init(q, &q->flush_rq); - q->flush_rq.cmd_type = REQ_TYPE_FS; - q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; - q->flush_rq.rq_disk = first_rq->rq_disk; - q->flush_rq.end_io = flush_end_io; + q->flush_rq->cmd_type = REQ_TYPE_FS; + q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; + q->flush_rq->rq_disk = first_rq->rq_disk; + q->flush_rq->end_io = flush_end_io; - list_add_tail(&q->flush_rq.queuelist, &q->queue_head); - return true; + return blk_flush_queue_rq(q->flush_rq); } static void flush_data_end_io(struct request *rq, int error) @@ -407,11 +382,8 @@ void blk_insert_flush(struct request *rq) /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. - * We keep REQ_FLUSH for mq to track flush requests. For !FUA, - * we never dispatch the request directly. */ - if (rq->cmd_flags & REQ_FUA) - rq->cmd_flags &= ~REQ_FLUSH; + rq->cmd_flags &= ~REQ_FLUSH; if (!(fflags & REQ_FUA)) rq->cmd_flags &= ~REQ_FUA; @@ -560,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush); void blk_mq_init_flush(struct request_queue *q) { spin_lock_init(&q->mq_flush_lock); - INIT_WORK(&q->mq_flush_work, mq_flush_work); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 14c8f35946e..a59b0565e94 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -194,27 +194,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, } static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, - gfp_t gfp, bool reserved, - int rw) + gfp_t gfp, bool reserved) { - struct request *req; - bool is_flush = false; - /* - * flush need allocate a request, leave at least one request for - * non-flush IO to avoid deadlock - */ - if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) { - if (atomic_inc_return(&hctx->pending_flush) >= - hctx->queue_depth - hctx->reserved_tags - 1) { - atomic_dec(&hctx->pending_flush); - return NULL; - } - is_flush = true; - } - req = blk_mq_alloc_rq(hctx, gfp, reserved); - if (!req && is_flush) - atomic_dec(&hctx->pending_flush); - return req; + return blk_mq_alloc_rq(hctx, gfp, reserved); } static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, @@ -227,7 +209,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw); + rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); if (rq) { blk_mq_rq_ctx_init(q, ctx, rq, rw); break; @@ -244,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, return rq; } -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, - gfp_t gfp, bool reserved) +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) { struct request *rq; if (blk_mq_queue_enter(q)) return NULL; - rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); + rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); if (rq) blk_mq_put_ctx(rq->mq_ctx); return rq; @@ -276,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request); /* * Re-init and set pdu, if we have it */ -static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) +void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) { blk_rq_init(hctx->queue, rq); @@ -290,9 +271,6 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, const int tag = rq->tag; struct request_queue *q = rq->q; - if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ)) - atomic_dec(&hctx->pending_flush); - blk_mq_rq_init(hctx, rq); blk_mq_put_tag(hctx->tags, tag); @@ -946,14 +924,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) hctx = q->mq_ops->map_queue(q, ctx->cpu); trace_block_getrq(q, bio, rw); - rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw); + rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); if (likely(rq)) - blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw); + blk_mq_rq_ctx_init(q, ctx, rq, rw); else { blk_mq_put_ctx(ctx); trace_block_sleeprq(q, bio, rw); - rq = blk_mq_alloc_request_pinned(q, bio->bi_rw, - __GFP_WAIT|GFP_ATOMIC, false); + rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, + false); ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); } @@ -1230,9 +1208,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, hctx->queue_num = i; hctx->flags = reg->flags; hctx->queue_depth = reg->queue_depth; - hctx->reserved_tags = reg->reserved_tags; hctx->cmd_size = reg->cmd_size; - atomic_set(&hctx->pending_flush, 0); blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); @@ -1412,9 +1388,14 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, blk_mq_init_flush(q); blk_mq_init_cpu_queues(q, reg->nr_hw_queues); - if (blk_mq_init_hw_queues(q, reg, driver_data)) + q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, + cache_line_size()), GFP_KERNEL); + if (!q->flush_rq) goto err_hw; + if (blk_mq_init_hw_queues(q, reg, driver_data)) + goto err_flush_rq; + blk_mq_map_swqueue(q); mutex_lock(&all_q_mutex); @@ -1422,6 +1403,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, mutex_unlock(&all_q_mutex); return q; + +err_flush_rq: + kfree(q->flush_rq); err_hw: kfree(q->mq_map); err_map: diff --git a/block/blk-mq.h b/block/blk-mq.h index f29b645f0e1..ed0035cd458 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,6 +28,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); +void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); /* * CPU hotplug helpers diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8095c4a21fc..7500f876dae 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj) if (q->mq_ops) blk_mq_free_queue(q); + kfree(q->flush_rq); + blk_trace_shutdown(q); bdi_destroy(&q->backing_dev_info); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 468be242db9..18ba8a627f4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -36,15 +36,12 @@ struct blk_mq_hw_ctx { struct list_head page_list; struct blk_mq_tags *tags; - atomic_t pending_flush; - unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int queue_depth; - unsigned int reserved_tags; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ @@ -129,7 +126,7 @@ void blk_mq_insert_request(struct request_queue *, struct request *, void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0375654adb2..b2d25ecbcbc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -101,7 +101,7 @@ struct request { }; union { struct call_single_data csd; - struct work_struct mq_flush_data; + struct work_struct mq_flush_work; }; struct request_queue *q; @@ -451,13 +451,8 @@ struct request_queue { unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; - union { - struct request flush_rq; - struct { - spinlock_t mq_flush_lock; - struct work_struct mq_flush_work; - }; - }; + struct request *flush_rq; + spinlock_t mq_flush_lock; struct mutex sysfs_lock; -- cgit v1.2.3-70-g09d2