From 4d92a9beb39d80a7d8ff7c04ae12a10290105ae5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 May 2014 08:09:00 -0600 Subject: block: remove 'magic' from struct blk_plug I don't think we've ever caught any bugs with this, and there's the list poisoning for the plug lists to catch uninitialized cases. So remove the magic member and save 8 bytes in the struct. Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index d87be5b4e55..40d654861c3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2957,8 +2957,6 @@ int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, } EXPORT_SYMBOL(kblockd_schedule_delayed_work_on); -#define PLUG_MAGIC 0x91827364 - /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized @@ -2977,7 +2975,6 @@ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; - plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); @@ -3074,8 +3071,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) LIST_HEAD(list); unsigned int depth; - BUG_ON(plug->magic != PLUG_MAGIC); - flush_plug_callbacks(plug, from_schedule); if (!list_empty(&plug->mq_list)) -- cgit v1.2.3-70-g09d2 From 05f1dd5315217398fc8d122bdee80f96a9f21274 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 May 2014 09:53:32 -0600 Subject: block: add queue flag for disabling SG merging If devices are not SG starved, we waste a lot of time potentially collapsing SG segments. Enough that 1.5% of the CPU time goes to this, at only 400K IOPS. Add a queue flag, QUEUE_FLAG_NO_SG_MERGE, which just returns the number of vectors in a bio instead of looping over all segments and checking for collapsible ones. Add a BLK_MQ_F_SG_MERGE flag so that drivers can opt-in on the sg merging, if they so desire. Signed-off-by: Jens Axboe --- block/blk-merge.c | 28 +++++++++++++++++++++------- block/blk-mq.c | 3 +++ include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + 4 files changed, 26 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 6c583f9c5b6..b3bf0df0f4c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -13,7 +13,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { struct bio_vec bv, bvprv = { NULL }; - int cluster, high, highprv = 1; + int cluster, high, highprv = 1, no_sg_merge; unsigned int seg_size, nr_phys_segs; struct bio *fbio, *bbio; struct bvec_iter iter; @@ -35,12 +35,21 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, cluster = blk_queue_cluster(q); seg_size = 0; nr_phys_segs = 0; + no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); + high = 0; for_each_bio(bio) { bio_for_each_segment(bv, bio, iter) { + /* + * If SG merging is disabled, each bio vector is + * a segment + */ + if (no_sg_merge) + goto new_segment; + /* * the trick here is making sure that a high page is - * never considered part of another segment, since that - * might change with the bounce page. + * never considered part of another segment, since + * that might change with the bounce page. */ high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); if (!high && !highprv && cluster) { @@ -84,11 +93,16 @@ void blk_recalc_rq_segments(struct request *rq) void blk_recount_segments(struct request_queue *q, struct bio *bio) { - struct bio *nxt = bio->bi_next; + if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) + bio->bi_phys_segments = bio->bi_vcnt; + else { + struct bio *nxt = bio->bi_next; + + bio->bi_next = NULL; + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); + bio->bi_next = nxt; + } - bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); - bio->bi_next = nxt; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); diff --git a/block/blk-mq.c b/block/blk-mq.c index f27fe44230c..f98d977fd15 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1829,6 +1829,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) q->mq_ops = set->ops; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; + if (!(set->flags & BLK_MQ_F_SG_MERGE)) + q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE; + q->sg_reserved_size = INT_MAX; INIT_WORK(&q->requeue_work, blk_mq_requeue_work); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 91dfb75ce39..95de239444d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -129,6 +129,7 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_SHOULD_SORT = 1 << 1, BLK_MQ_F_TAG_SHARED = 1 << 2, + BLK_MQ_F_SG_MERGE = 1 << 3, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 098304576d5..695b9fd41ef 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -510,6 +510,7 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ +#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3-70-g09d2 From 4b570521be54666e6ad7e5f47af92fd609fbd8b5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 May 2014 11:00:11 -0600 Subject: blk-mq: request initialization optimizations We currently clear a lot more than we need to, so make that a bit more clever. Make some of the init dependent on features, like only setting start_time if we are going to use it. Signed-off-by: Jens Axboe --- block/blk-mq.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index f98d977fd15..6160128085f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -199,19 +199,12 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->q = q; rq->mq_ctx = ctx; rq->cmd_flags |= rw_flags; - rq->cmd_type = 0; /* do not touch atomic flags, it needs atomic ops against the timer */ rq->cpu = -1; - rq->__data_len = 0; - rq->__sector = (sector_t) -1; - rq->bio = NULL; - rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); - memset(&rq->flush, 0, max(sizeof(rq->flush), sizeof(rq->elv))); rq->rq_disk = NULL; rq->part = NULL; - rq->start_time = jiffies; #ifdef CONFIG_BLK_CGROUP rq->rl = NULL; set_start_time_ns(rq); @@ -221,23 +214,16 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; #endif - rq->ioprio = 0; rq->special = NULL; /* tag was already set */ rq->errors = 0; - memset(rq->__cmd, 0, sizeof(rq->__cmd)); - rq->cmd = rq->__cmd; - rq->cmd_len = BLK_MAX_CDB; rq->extra_len = 0; rq->sense_len = 0; rq->resid_len = 0; rq->sense = NULL; - rq->deadline = 0; INIT_LIST_HEAD(&rq->timeout_list); - rq->timeout = 0; - rq->retries = 0; rq->end_io = NULL; rq->end_io_data = NULL; rq->next_rq = NULL; @@ -449,8 +435,10 @@ static void blk_mq_start_request(struct request *rq, bool last) * complete. So be sure to clear complete again when we start * the request, otherwise we'll ignore the completion event. */ - set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); - clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); + if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) + clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); if (q->dma_drain_size && blk_rq_bytes(rq)) { /* @@ -1112,7 +1100,11 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) { init_request_from_bio(rq, bio); - blk_account_io_start(rq, 1); + + if (blk_do_io_stat(rq)) { + rq->start_time = jiffies; + blk_account_io_start(rq, 1); + } } static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, -- cgit v1.2.3-70-g09d2 From da52f22fa924b4a21d8e11fbfd3eeebd7a90a366 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 29 May 2014 15:11:30 -0400 Subject: block: remove dead code in scsi_ioctl:blk_verify_command filter gets assigned the address of blk_default_cmd_filter on entry to this function, so the !filter condition can never be true. Signed-off-by: Dave Jones Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 26487972ac5..9c28a5b3804 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -205,10 +205,6 @@ int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm) if (capable(CAP_SYS_RAWIO)) return 0; - /* if there's no filter set, assume we're filtering everything out */ - if (!filter) - return -EPERM; - /* Anybody who can open the device can do a read-safe command */ if (test_bit(cmd[0], filter->read_ok)) return 0; -- cgit v1.2.3-70-g09d2 From 2230237500821aedfcf2bba2a79d9cbca389233c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 30 May 2014 08:06:42 -0600 Subject: blk-mq: blk_mq_tag_to_rq should handle flush request flush request is special, which borrows the tag from the parent request. Hence blk_mq_tag_to_rq needs special handling to return the flush request from the tag. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 +++- block/blk-mq.c | 12 +++++++++--- include/linux/blk-mq.h | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index ef608b35d9b..ff87c664b7d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -223,8 +223,10 @@ static void flush_end_io(struct request *flush_rq, int error) struct request *rq, *n; unsigned long flags = 0; - if (q->mq_ops) + if (q->mq_ops) { spin_lock_irqsave(&q->mq_flush_lock, flags); + q->flush_rq->cmd_flags = 0; + } running = &q->flush_queue[q->flush_running_idx]; BUG_ON(q->flush_pending_idx == q->flush_running_idx); diff --git a/block/blk-mq.c b/block/blk-mq.c index 6160128085f..21f952ab358 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -541,9 +541,15 @@ void blk_mq_kick_requeue_list(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_kick_requeue_list); -struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) +struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag) { - return tags->rqs[tag]; + struct request_queue *q = hctx->queue; + + if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) && + q->flush_rq->tag == tag) + return q->flush_rq; + + return hctx->tags->rqs[tag]; } EXPORT_SYMBOL(blk_mq_tag_to_rq); @@ -572,7 +578,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) if (tag >= hctx->tags->nr_tags) break; - rq = blk_mq_tag_to_rq(hctx->tags, tag++); + rq = blk_mq_tag_to_rq(hctx, tag++); if (rq->q != hctx->queue) continue; if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 95de239444d..ad3adb73cc7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -154,7 +154,7 @@ void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); -struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); +struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); -- cgit v1.2.3-70-g09d2