From 986fe6c7f50974e871b8ab5a800f5310ea25b361 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 6 Oct 2010 03:10:59 -0500 Subject: [SCSI] Fix regressions in scsi_internal_device_block Deleting a SCSI device on a blocked fc_remote_port (before fast_io_fail_tmo fires) results in a hanging thread: STACK: 0 schedule+1108 [0x5cac48] 1 schedule_timeout+528 [0x5cb7fc] 2 wait_for_common+266 [0x5ca6be] 3 blk_execute_rq+160 [0x354054] 4 scsi_execute+324 [0x3b7ef4] 5 scsi_execute_req+162 [0x3b80ca] 6 sd_sync_cache+138 [0x3cf662] 7 sd_shutdown+138 [0x3cf91a] 8 sd_remove+112 [0x3cfe4c] 9 __device_release_driver+124 [0x3a08b8] 10 device_release_driver+60 [0x3a0a5c] 11 bus_remove_device+266 [0x39fa76] 12 device_del+340 [0x39d818] 13 __scsi_remove_device+204 [0x3bcc48] 14 scsi_remove_device+66 [0x3bcc8e] 15 sysfs_schedule_callback_work+50 [0x260d66] 16 worker_thread+622 [0x162326] 17 kthread+160 [0x1680b0] 18 kernel_thread_starter+6 [0x10aaea] During the delete, the SCSI device is in moved to SDEV_CANCEL. When the FC transport class later calls scsi_target_unblock, this has no effect, since scsi_internal_device_unblock ignores SCSI devics in this state. It looks like all these are regressions caused by: 5c10e63c943b4c67561ddc6bf61e01d4141f881f [SCSI] limit state transitions in scsi_internal_device_unblock Fix by rejecting offline and cancel in the state transition. Signed-off-by: Christof Schmitt [jejb: Original patch by Christof Schmitt, modified by Mike Christie] Cc: Stable Tree Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/scsi/scsi_lib.c') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8041fe1ab17..eafeeda6e19 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2438,7 +2438,8 @@ scsi_internal_device_unblock(struct scsi_device *sdev) sdev->sdev_state = SDEV_RUNNING; else if (sdev->sdev_state == SDEV_CREATED_BLOCK) sdev->sdev_state = SDEV_CREATED; - else + else if (sdev->sdev_state != SDEV_CANCEL && + sdev->sdev_state != SDEV_OFFLINE) return -EINVAL; spin_lock_irqsave(q->queue_lock, flags); -- cgit v1.2.3-70-g09d2 From 459dbf72e4d2b4aa13620e6b70d54f098547bf13 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 17 Nov 2010 10:10:57 -0600 Subject: [SCSI] Eliminate error handler overload of the SCSI serial number The error handler is using the test cmd->serial_number == 0 in the abort routines to signal that the command to be aborted has already completed normally. This design was to close a race window in the original error handler where a command could go through the normal completion routines after it timed out but before error handling was started. Mike Anderson pointed out that when we converted our timeout and softirq completions, we picked up atomicity here because the block layer now mediates this with the REQ_ATOM_COMPLETE flag and guarantees that *either* the command times out or our done routine is called, but ensures we can't get both occurring. That makes the serial number zero check redundant and it can be removed. Signed-off-by: James Bottomley --- drivers/scsi/scsi_error.c | 26 ++------------------------ drivers/scsi/scsi_lib.c | 5 ----- 2 files changed, 2 insertions(+), 29 deletions(-) (limited to 'drivers/scsi/scsi_lib.c') diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 824b8fc03ce..30ac116186f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -615,7 +615,7 @@ static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) return rtn; } -static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) +static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) { if (!scmd->device->host->hostt->eh_abort_handler) return FAILED; @@ -623,31 +623,9 @@ static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) return scmd->device->host->hostt->eh_abort_handler(scmd); } -/** - * scsi_try_to_abort_cmd - Ask host to abort a running command. - * @scmd: SCSI cmd to abort from Lower Level. - * - * Notes: - * This function will not return until the user's completion function - * has been called. there is no timeout on this operation. if the - * author of the low-level driver wishes this operation to be timed, - * they can provide this facility themselves. helper functions in - * scsi_error.c can be supplied to make this easier to do. - */ -static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) -{ - /* - * scsi_done was called just after the command timed out and before - * we had a chance to process it. (db) - */ - if (scmd->serial_number == 0) - return SUCCESS; - return __scsi_try_to_abort_cmd(scmd); -} - static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd) { - if (__scsi_try_to_abort_cmd(scmd) != SUCCESS) + if (scsi_try_to_abort_cmd(scmd) != SUCCESS) if (scsi_try_bus_device_reset(scmd) != SUCCESS) if (scsi_try_target_reset(scmd) != SUCCESS) if (scsi_try_bus_reset(scmd) != SUCCESS) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eafeeda6e19..5b6bbaea59f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1403,11 +1403,6 @@ static void scsi_softirq_done(struct request *rq) INIT_LIST_HEAD(&cmd->eh_entry); - /* - * Set the serial numbers back to zero - */ - cmd->serial_number = 0; - atomic_inc(&cmd->device->iodone_cnt); if (cmd->result) atomic_inc(&cmd->device->ioerr_cnt); -- cgit v1.2.3-70-g09d2 From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Dec 2010 19:41:49 +0100 Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead When stacking devices, a request_queue is not always available. This forced us to have a no_cluster flag in the queue_limits that could be used as a carrier until the request_queue had been set up for a metadevice. There were several problems with that approach. First of all it was up to the stacking device to remember to set queue flag after stacking had completed. Also, the queue flag and the queue limits had to be kept in sync at all times. We got that wrong, which could lead to us issuing commands that went beyond the max scatterlist limit set by the driver. The proper fix is to avoid having two flags for tracking the same thing. We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the block layer merging functions. The queue_limit 'no_cluster' is turned into 'cluster' to avoid double negatives and to ease stacking. Clustering defaults to being enabled as before. The queue flag logic is removed from the stacking function, and explicitly setting the cluster flag is no longer necessary in DM and MD. Reported-by: Ed Lin Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-merge.c | 6 +++--- block/blk-settings.c | 25 ++----------------------- block/blk-sysfs.c | 2 +- drivers/md/dm-table.c | 5 ----- drivers/md/md.c | 3 --- drivers/scsi/scsi_lib.c | 3 +-- include/linux/blkdev.h | 9 ++++++--- 7 files changed, 13 insertions(+), 40 deletions(-) (limited to 'drivers/scsi/scsi_lib.c') diff --git a/block/blk-merge.c b/block/blk-merge.c index 77b7c26df6b..74bc4a768f3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -21,7 +21,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, return 0; fbio = bio; - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + cluster = blk_queue_cluster(q); seg_size = 0; nr_phys_segs = 0; for_each_bio(bio) { @@ -87,7 +87,7 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + if (!blk_queue_cluster(q)) return 0; if (bio->bi_seg_back_size + nxt->bi_seg_front_size > @@ -123,7 +123,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nsegs, cluster; nsegs = 0; - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + cluster = blk_queue_cluster(q); /* * for each bio in rq diff --git a/block/blk-settings.c b/block/blk-settings.c index 701859fb964..e55f5fc4ca2 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -126,7 +126,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; - lim->no_cluster = 0; + lim->cluster = 1; } EXPORT_SYMBOL(blk_set_default_limits); @@ -464,15 +464,6 @@ EXPORT_SYMBOL(blk_queue_io_opt); void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { blk_stack_limits(&t->limits, &b->limits, 0); - - if (!t->queue_lock) - WARN_ON_ONCE(1); - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { - unsigned long flags; - spin_lock_irqsave(t->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(t->queue_lock, flags); - } } EXPORT_SYMBOL(blk_queue_stack_limits); @@ -545,7 +536,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm(t->io_opt, b->io_opt); - t->no_cluster |= b->no_cluster; + t->cluster &= b->cluster; t->discard_zeroes_data &= b->discard_zeroes_data; /* Physical block size a multiple of the logical block size? */ @@ -641,7 +632,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset) { struct request_queue *t = disk->queue; - struct request_queue *b = bdev_get_queue(bdev); if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; @@ -652,17 +642,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", top, bottom); } - - if (!t->queue_lock) - WARN_ON_ONCE(1); - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { - unsigned long flags; - - spin_lock_irqsave(t->queue_lock, flags); - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(t->queue_lock, flags); - } } EXPORT_SYMBOL(disk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 013457f47fd..41fb69150b4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -119,7 +119,7 @@ static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char * static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) { - if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + if (blk_queue_cluster(q)) return queue_var_show(queue_max_segment_size(q), (page)); return queue_var_show(PAGE_CACHE_SIZE, (page)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 90267f8d64e..e2da1912a2c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1131,11 +1131,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, */ q->limits = *limits; - if (limits->no_cluster) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); - else - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); - if (!dm_table_supports_discards(t)) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); else diff --git a/drivers/md/md.c b/drivers/md/md.c index 84c46a16192..52694d29663 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4296,9 +4296,6 @@ static int md_alloc(dev_t dev, char *name) goto abort; mddev->queue->queuedata = mddev; - /* Can be unlocked because the queue is new: no concurrency */ - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); - blk_queue_make_request(mddev->queue, md_make_request); disk = alloc_disk(1 << shift); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eafeeda6e19..9d7ba07dc5e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1642,9 +1642,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); - /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); + q->limits.cluster = 0; /* * set a reasonable default alignment on word boundaries: the diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aae86fd10c4..95aeeeb49e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; - unsigned char no_cluster; + unsigned char cluster; signed char discard_zeroes_data; }; @@ -380,7 +380,6 @@ struct request_queue #endif }; -#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ @@ -403,7 +402,6 @@ struct request_queue #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_ADD_RANDOM)) @@ -510,6 +508,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define rq_data_dir(rq) ((rq)->cmd_flags & 1) +static inline unsigned int blk_queue_cluster(struct request_queue *q) +{ + return q->limits.cluster; +} + /* * We regard a request as sync, if either a read or a sync write */ -- cgit v1.2.3-70-g09d2