From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Apr 2011 13:32:46 +0200 Subject: block: get rid of QUEUE_FLAG_REENTER We are currently using this flag to check whether it's safe to call into ->request_fn(). If it is set, we punt to kblockd. But we get a lot of false positives and excessive punts to kblockd, which hurts performance. The only real abuser of this infrastructure is SCSI. So export the async queue run and convert SCSI over to use that. There's room for improvement in that SCSI need not always use the async call, but this fixes our performance issue and they can fix that up in due time. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cbbfd98ad4a..2ad95fa1d13 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -388,20 +388,19 @@ struct request_queue #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ -#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ -#define QUEUE_FLAG_ELVSWITCH 7 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_BIDI 8 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 9 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 10 /* force complete on same CPU */ -#define QUEUE_FLAG_FAIL_IO 11 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 12 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 13 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ -#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ -#define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ -#define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ +#define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ +#define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ +#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); extern void blk_run_queue(struct request_queue *); +extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3-70-g09d2 From f3876930952390a31c3a7fd68dd621464a36eb80 Mon Sep 17 00:00:00 2001 From: "shaohua.li@intel.com" Date: Fri, 6 May 2011 11:34:32 -0600 Subject: block: add a non-queueable flush flag flush request isn't queueable in some drives. Add a flag to let driver notify block layer about this. We can optimize flush performance with the knowledge. Stable: 2.6.39 only Cc: stable@kernel.org Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 ++++++ include/linux/blkdev.h | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 1fa76929359..cd3c428e194 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,6 +790,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); +void blk_queue_flush_queueable(struct request_queue *q, bool queueable) +{ + q->flush_not_queueable = !queueable; +} +EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cbbfd98ad4a..8bd2a271b2d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -364,6 +364,7 @@ struct request_queue * for flush operations */ unsigned int flush_flags; + unsigned int flush_not_queueable:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; unsigned long flush_pending_since; @@ -843,6 +844,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); +extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); @@ -1111,6 +1113,11 @@ static inline unsigned int block_size(struct block_device *bdev) return bdev->bd_block_size; } +static inline bool queue_flush_queueable(struct request_queue *q) +{ + return !q->flush_not_queueable; +} + typedef struct {struct page *v;} Sector; unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); -- cgit v1.2.3-70-g09d2 From 3ac0cc4508709d42ec9aa351086c7d38bfc0660c Mon Sep 17 00:00:00 2001 From: "shaohua.li@intel.com" Date: Fri, 6 May 2011 11:34:41 -0600 Subject: block: hold queue if flush is running for non-queueable flush drive In some drives, flush requests are non-queueable. When flush request is running, normal read/write requests can't run. If block layer dispatches such request, driver can't handle it and requeue it. Tejun suggested we can hold the queue when flush is running. This can avoid unnecessary requeue. Also this can improve performance. For example, we have request flush1, write1, flush 2. flush1 is dispatched, then queue is hold, write1 isn't inserted to queue. After flush1 is finished, flush2 will be dispatched. Since disk cache is already clean, flush2 will be finished very soon, so looks like flush2 is folded to flush1. In my test, the queue holding completely solves a regression introduced by commit 53d63e6b0dfb95882ec0219ba6bbd50cde423794: block: make the flush insertion use the tail of the dispatch list It's not a preempt type request, in fact we have to insert it behind requests that do specify INSERT_FRONT. which causes about 20% regression running a sysbench fileio workload. Stable: 2.6.39 only Cc: stable@kernel.org Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 16 +++++++++++----- block/blk.h | 21 ++++++++++++++++++++- include/linux/blkdev.h | 1 + 3 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-flush.c b/block/blk-flush.c index 6c9b5e189e6..bb21e4c36f7 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error) } /* - * Moving a request silently to empty queue_head may stall the - * queue. Kick the queue in those cases. This function is called - * from request completion path and calling directly into - * request_fn may confuse the driver. Always use kblockd. + * Kick the queue to avoid stall for two cases: + * 1. Moving a request silently to empty queue_head may stall the + * queue. + * 2. When flush request is running in non-queueable queue, the + * queue is hold. Restart the queue after flush request is finished + * to avoid stall. + * This function is called from request completion path and calling + * directly into request_fn may confuse the driver. Always use + * kblockd. */ - if (queued) + if (queued || q->flush_queue_delayed) blk_run_queue_async(q); + q->flush_queue_delayed = 0; } /** diff --git a/block/blk.h b/block/blk.h index c9df8fc3c99..83e4bff3620 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); return rq; } - + /* + * Flush request is running and flush request isn't queueable + * in the drive, we can hold the queue till flush request is + * finished. Even we don't do this, driver can't dispatch next + * requests and will requeue them. And this can improve + * throughput too. For example, we have request flush1, write1, + * flush 2. flush1 is dispatched, then queue is hold, write1 + * isn't inserted to queue. After flush1 is finished, flush2 + * will be dispatched. Since disk cache is already clean, + * flush2 will be finished very soon, so looks like flush2 is + * folded to flush1. + * Since the queue is hold, a flag is set to indicate the queue + * should be restarted later. Please see flush_end_io() for + * details. + */ + if (q->flush_pending_idx != q->flush_running_idx && + !queue_flush_queueable(q)) { + q->flush_queue_delayed = 1; + return NULL; + } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) return NULL; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bd2a271b2d..9f921bf4bf8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -365,6 +365,7 @@ struct request_queue */ unsigned int flush_flags; unsigned int flush_not_queueable:1; + unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; unsigned long flush_pending_since; -- cgit v1.2.3-70-g09d2 From a934a00a69e940b126b9bdbf83e630ef5fe43523 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 18 May 2011 10:37:35 +0200 Subject: block: Fix discard topology stacking and reporting In some cases we would end up stacking discard_zeroes_data incorrectly. Fix this by enabling the feature by default for stacking drivers and clearing it for low-level drivers. Incorporating a device that does not support dzd will then cause the feature to be disabled in the stacking driver. Also ensure that the maximum discard value does not overflow when exported in sysfs and return 0 in the alignment and dzd fields for devices that don't support discard. Reported-by: Lukas Czerner Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 ++- block/blk-sysfs.c | 3 ++- include/linux/blkdev.h | 7 +++++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index cd3c428e194..fa1eb0449a0 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->discard_granularity = 0; lim->discard_alignment = 0; lim->discard_misaligned = 0; - lim->discard_zeroes_data = -1; + lim->discard_zeroes_data = 1; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) blk_set_default_limits(&q->limits); blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); + q->limits.discard_zeroes_data = 0; /* * by default assume old behaviour and bounce for any highmem page diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6d735122bc5..53bd0c77bfd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag static ssize_t queue_discard_max_show(struct request_queue *q, char *page) { - return queue_var_show(q->limits.max_discard_sectors << 9, page); + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_discard_sectors << 9); } static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9f921bf4bf8..520d8618ed7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -257,7 +257,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char cluster; - signed char discard_zeroes_data; + unsigned char discard_zeroes_data; }; struct request_queue @@ -1069,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector { unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + if (!lim->max_discard_sectors) + return 0; + return (lim->discard_granularity + lim->discard_alignment - alignment) & (lim->discard_granularity - 1); } static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) { - if (q->limits.discard_zeroes_data == 1) + if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) return 1; return 0; -- cgit v1.2.3-70-g09d2 From ea9d6553b3b3044e7374774cc33bb1b2eee19dd3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 31 May 2011 13:45:53 +0200 Subject: block: remove unwanted semicolons Since those defined functions require additional semicolon from the caller, they could cause potential syntax errors when used in if-else statements. Signed-off-by: Namhyung Kim Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ae9091a6848..1a23722e887 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1282,8 +1282,8 @@ queue_max_integrity_segments(struct request_queue *q) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) -#define blk_integrity_unregister(a) do { } while (0); -#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define blk_integrity_unregister(a) do { } while (0) +#define blk_queue_max_integrity_segments(a, b) do { } while (0) #define queue_max_integrity_segments(a) (0) #define blk_integrity_merge_rq(a, b, c) (0) #define blk_integrity_merge_bio(a, b, c) (0) -- cgit v1.2.3-70-g09d2