From 6000a368cd8e6da1caf101411bdb494cd6fb8b09 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Aug 2008 18:45:30 -0500 Subject: [SCSI] block: separate failfast into multiple bits. Multipath is best at handling transport errors. If it gets a device error then there is not much the multipath layer can do. It will just access the same device but from a different path. This patch breaks up failfast into device, transport and driver errors. The multipath layers (md and dm mutlipath) only ask the lower levels to fast fail transport errors. The user of failfast, read ahead, will ask to fast fail on all errors. Note that blk_noretry_request will return true if any failfast bit is set. This allows drivers that do not support the multipath failfast bits to continue to fail on any failfast error like before. Drivers like scsi that are able to fail fast specific errors can check for the specific fail fast type. In the next patch I will convert scsi. Signed-off-by: Mike Christie Cc: Jens Axboe Signed-off-by: James Bottomley --- block/blk-core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 2d053b58441..9e79a485e4f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1075,8 +1075,15 @@ void init_request_from_bio(struct request *req, struct bio *bio) /* * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) */ - if (bio_rw_ahead(bio) || bio_failfast(bio)) - req->cmd_flags |= REQ_FAILFAST; + if (bio_rw_ahead(bio)) + req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER); + if (bio_failfast_dev(bio)) + req->cmd_flags |= REQ_FAILFAST_DEV; + if (bio_failfast_transport(bio)) + req->cmd_flags |= REQ_FAILFAST_TRANSPORT; + if (bio_failfast_driver(bio)) + req->cmd_flags |= REQ_FAILFAST_DRIVER; /* * REQ_BARRIER implies no merging, but lets make it explicit -- cgit v1.2.3-70-g09d2 From 1ff9f542e5f87c299226557ce5e67a402ed4b502 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Jul 2008 20:03:34 -0700 Subject: device create: block: convert device_create_drvdata to device_create Now that device_create() has been audited, rename things back to the original call to be sane. Signed-off-by: Greg Kroah-Hartman --- block/bsg.c | 3 +-- drivers/block/aoe/aoechr.c | 6 +++--- drivers/block/paride/pg.c | 5 ++--- drivers/block/paride/pt.c | 10 ++++------ drivers/block/pktcdvd.c | 5 ++--- 5 files changed, 12 insertions(+), 17 deletions(-) (limited to 'block') diff --git a/block/bsg.c b/block/bsg.c index 56cb343c76d..034112bfe1f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -1024,8 +1024,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, bcd->release = release; kref_init(&bcd->ref); dev = MKDEV(bsg_major, bcd->minor); - class_dev = device_create_drvdata(bsg_class, parent, dev, NULL, - "%s", devname); + class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname); if (IS_ERR(class_dev)) { ret = PTR_ERR(class_dev); goto put_dev; diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 1f56d2c5b7f..200efc4d2c1 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -284,9 +284,9 @@ aoechr_init(void) return PTR_ERR(aoe_class); } for (i = 0; i < ARRAY_SIZE(chardevs); ++i) - device_create_drvdata(aoe_class, NULL, - MKDEV(AOE_MAJOR, chardevs[i].minor), - NULL, chardevs[i].name); + device_create(aoe_class, NULL, + MKDEV(AOE_MAJOR, chardevs[i].minor), NULL, + chardevs[i].name); return 0; } diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index d731ca42f80..9dfa2716300 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -686,9 +686,8 @@ static int __init pg_init(void) for (unit = 0; unit < PG_UNITS; unit++) { struct pg *dev = &devices[unit]; if (dev->present) - device_create_drvdata(pg_class, NULL, - MKDEV(major, unit), NULL, - "pg%u", unit); + device_create(pg_class, NULL, MKDEV(major, unit), NULL, + "pg%u", unit); } err = 0; goto out; diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 673b8b2fd33..5ae229656ea 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -979,12 +979,10 @@ static int __init pt_init(void) for (unit = 0; unit < PT_UNITS; unit++) if (pt[unit].present) { - device_create_drvdata(pt_class, NULL, - MKDEV(major, unit), NULL, - "pt%d", unit); - device_create_drvdata(pt_class, NULL, - MKDEV(major, unit + 128), NULL, - "pt%dn", unit); + device_create(pt_class, NULL, MKDEV(major, unit), NULL, + "pt%d", unit); + device_create(pt_class, NULL, MKDEV(major, unit + 128), + NULL, "pt%dn", unit); } goto out; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 0e077150568..195ca7c720f 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -302,9 +302,8 @@ static struct kobj_type kobj_pkt_type_wqueue = { static void pkt_sysfs_dev_new(struct pktcdvd_device *pd) { if (class_pktcdvd) { - pd->dev = device_create_drvdata(class_pktcdvd, NULL, - pd->pkt_dev, NULL, - "%s", pd->name); + pd->dev = device_create(class_pktcdvd, NULL, pd->pkt_dev, NULL, + "%s", pd->name); if (IS_ERR(pd->dev)) pd->dev = NULL; } -- cgit v1.2.3-70-g09d2 From 8677142710516d986d932d6f1fba7be8382c1fec Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 13 Oct 2008 14:19:05 +0200 Subject: block: fix nr_phys_segments miscalculation bug This fixes the bug reported by Nikanth Karthikesan : http://lkml.org/lkml/2008/10/2/203 The root cause of the bug is that blk_phys_contig_segment miscalculates q->max_segment_size. blk_phys_contig_segment checks: req->biotail->bi_size + next_req->bio->bi_size > q->max_segment_size But blk_recalc_rq_segments might expect that req->biotail and the previous bio in the req are supposed be merged into one segment. blk_recalc_rq_segments might also expect that next_req->bio and the next bio in the next_req are supposed be merged into one segment. In such case, we merge two requests that can't be merged here. Later, blk_rq_map_sg gives more segments than it should. We need to keep track of segment size in blk_recalc_rq_segments and use it to see if two requests can be merged. This patch implements it in the similar way that we used to do for hw merging (virtual merging). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-merge.c | 20 ++++++++++++++++++-- include/linux/bio.h | 7 +++++++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 908d3e11ac5..8681cd6f991 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -77,12 +77,20 @@ void blk_recalc_rq_segments(struct request *rq) continue; } new_segment: + if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + rq->bio->bi_seg_front_size = seg_size; + nr_phys_segs++; bvprv = bv; seg_size = bv->bv_len; highprv = high; } + if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + rq->bio->bi_seg_front_size = seg_size; + if (seg_size > rq->biotail->bi_seg_back_size) + rq->biotail->bi_seg_back_size = seg_size; + rq->nr_phys_segments = nr_phys_segs; } @@ -106,7 +114,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; - if (bio->bi_size + nxt->bi_size > q->max_segment_size) + if (bio->bi_seg_back_size + nxt->bi_seg_front_size > + q->max_segment_size) return 0; if (!bio_has_data(bio)) @@ -309,6 +318,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; + unsigned int seg_size = + req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; /* * First check if the either of the requests are re-queued @@ -324,8 +335,13 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; - if (blk_phys_contig_segment(q, req->biotail, next->bio)) + if (blk_phys_contig_segment(q, req->biotail, next->bio)) { + if (req->nr_phys_segments == 1) + req->bio->bi_seg_front_size = seg_size; + if (next->nr_phys_segments == 1) + next->biotail->bi_seg_back_size = seg_size; total_phys_segments--; + } if (total_phys_segments > q->max_phys_segments) return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index ff5b4cf9e2d..dc3cec386a9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -79,6 +79,13 @@ struct bio { unsigned int bi_size; /* residual I/O count */ + /* + * To keep track of the max segment size, we account for the + * sizes of the first and last mergeable segments in this bio. + */ + unsigned int bi_seg_front_size; + unsigned int bi_seg_back_size; + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ unsigned int bi_comp_cpu; /* completion CPU */ -- cgit v1.2.3-70-g09d2 From e6d63840ba55ffd3a79aea6792aac6f29f338083 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Oct 2008 08:49:34 +0200 Subject: block: fix kernel-doc for blk_alloc_devt() No argument 'gfp_mask' for blk_alloc_devt(). Signed-off-by: Li Zefan Signed-off-by: Jens Axboe --- block/genhd.c | 1 - 1 file changed, 1 deletion(-) (limited to 'block') diff --git a/block/genhd.c b/block/genhd.c index 4cd3433c99a..b8defae2ec0 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -358,7 +358,6 @@ static int blk_mangle_minor(int minor) /** * blk_alloc_devt - allocate a dev_t for a partition * @part: partition to allocate dev_t for - * @gfp_mask: memory allocation flag * @devt: out parameter for resulting dev_t * * Allocate a dev_t for block device. -- cgit v1.2.3-70-g09d2 From ee2e992cc28553f6c4dd1ab5483c8733c393626b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Oct 2008 08:49:56 +0200 Subject: block: simplify string handling in elv_iosched_store() strlcpy() guarantees the dest buffer is NULL teminated. Signed-off-by: Li Zefan Signed-off-by: Jens Axboe --- block/elevator.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index 04518921db3..9482ffa1aae 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1166,15 +1166,10 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, size_t count) { char elevator_name[ELV_NAME_MAX]; - size_t len; struct elevator_type *e; - elevator_name[sizeof(elevator_name) - 1] = '\0'; - strncpy(elevator_name, name, sizeof(elevator_name) - 1); - len = strlen(elevator_name); - - if (len && elevator_name[len - 1] == '\n') - elevator_name[len - 1] = '\0'; + strlcpy(elevator_name, name, sizeof(elevator_name)); + strstrip(elevator_name); e = elevator_get(elevator_name); if (!e) { -- cgit v1.2.3-70-g09d2 From 80a4b58e36b63d7b0b592beb1bd6410aadeeb63c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Oct 2008 09:51:06 +0200 Subject: block: only call ->request_fn when the queue is not stopped Callers should use either blk_run_queue/__blk_run_queue, or blk_start_queueing() to invoke request handling instead of calling ->request_fn() directly as that does not take the queue stopped flag into account. Also add appropriate comments on the above functions to detail their usage. Signed-off-by: Jens Axboe --- block/blk-core.c | 19 +++++++++++++++++-- block/elevator.c | 7 +++---- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 2d053b58441..91532f2d2fa 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -325,6 +325,9 @@ EXPORT_SYMBOL(blk_unplug); static void blk_invoke_request_fn(struct request_queue *q) { + if (unlikely(blk_queue_stopped(q))) + return; + /* * one level of recursion is ok and is much faster than kicking * the unplug handling @@ -399,8 +402,13 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** - * blk_run_queue - run a single device queue + * __blk_run_queue - run a single device queue * @q: The queue to run + * + * Description: + * See @blk_run_queue. This variant must be called with the queue lock + * held and interrupts disabled. + * */ void __blk_run_queue(struct request_queue *q) { @@ -418,6 +426,12 @@ EXPORT_SYMBOL(__blk_run_queue); /** * blk_run_queue - run a single device queue * @q: The queue to run + * + * Description: + * Invoke request handling on this queue, if it has pending work to do. + * May be used to restart queueing when a request has completed. Also + * See @blk_start_queueing. + * */ void blk_run_queue(struct request_queue *q) { @@ -884,7 +898,8 @@ EXPORT_SYMBOL(blk_get_request); * * This is basically a helper to remove the need to know whether a queue * is plugged or not if someone just wants to initiate dispatch of requests - * for this queue. + * for this queue. Should be used to start queueing on a device outside + * of ->request_fn() context. Also see @blk_run_queue. * * The queue lock must be held with interrupts disabled. */ diff --git a/block/elevator.c b/block/elevator.c index 9482ffa1aae..59173a69ebd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -612,7 +612,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) * processing. */ blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); break; case ELEVATOR_INSERT_SORT: @@ -950,7 +950,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - q->request_fn(q); + blk_start_queueing(q); } } } @@ -1109,8 +1109,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) elv_drain_elevator(q); while (q->rq.elvpriv) { - blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); -- cgit v1.2.3-70-g09d2 From 496aa8a98f5ab22ced46be5dc2087cdf3d029bd7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 16 Oct 2008 07:46:23 +0200 Subject: block: fix current kernel-doc warnings Fix block kernel-doc warnings: Warning(linux-2.6.27-git4//fs/block_dev.c:1272): No description found for parameter 'path' Warning(linux-2.6.27-git4//block/blk-core.c:1021): No description found for parameter 'cpu' Warning(linux-2.6.27-git4//block/blk-core.c:1021): No description found for parameter 'part' Warning(/var/linsrc/linux-2.6.27-git4//block/genhd.c:544): No description found for parameter 'partno' Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- block/genhd.c | 2 +- fs/block_dev.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 91532f2d2fa..8517264eb71 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1018,8 +1018,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part, } /** - * part_round_stats() - Round off the performance stats on a struct - * disk_stats. + * part_round_stats() - Round off the performance stats on a struct disk_stats. + * @cpu: cpu number for stats access + * @part: target partition * * The average IO queue length and utilisation statistics are maintained * by observing the current state of the queue length and the amount of diff --git a/block/genhd.c b/block/genhd.c index b8defae2ec0..646e1d2507c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -534,7 +534,7 @@ void unlink_gendisk(struct gendisk *disk) /** * get_gendisk - get partitioning information for a given device * @devt: device to get partitioning information for - * @part: returned partition index + * @partno: returned partition index * * This function gets the structure containing partitioning * information for the given device @devt. diff --git a/fs/block_dev.c b/fs/block_dev.c index d84f0469a01..218408eed1b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name - * @pathname: special file representing the block device + * @path: special file representing the block device * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) -- cgit v1.2.3-70-g09d2 From 713ada9ba94f2ad874cffd074b83e3dc681ca82f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 13:44:57 +0200 Subject: block: move q->unplug_work initialization modprobe loop; rmmod loop effectively creates a blk_queue and destroys it which results in q->unplug_work being canceled without it ever being initialized. Therefore, move the initialization of q->unplug_work from blk_queue_make_request() to blk_alloc_queue*(). Reported-by: Alexey Dobriyan Signed-off-by: Peter Zijlstra Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + block/blk-settings.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 8517264eb71..fcbd56dd41f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -515,6 +515,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); diff --git a/block/blk-settings.c b/block/blk-settings.c index b21dcdb6415..41392fbe19f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -141,8 +141,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) if (q->unplug_delay == 0) q->unplug_delay = 1; - INIT_WORK(&q->unplug_work, blk_unplug_work); - q->unplug_timer.function = blk_unplug_timeout; q->unplug_timer.data = (unsigned long)q; -- cgit v1.2.3-70-g09d2 From f73e2d13a16cc88c4faa4729967f92bfeec8a142 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Oct 2008 14:03:08 +0200 Subject: block: remove __generic_unplug_device() from exports The only out-of-core user is IDE, and that should be using blk_start_queueing() instead. Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk.h | 1 + drivers/ide/ide-io.c | 4 ++-- include/linux/blkdev.h | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index fcbd56dd41f..81a49600497 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -257,7 +257,6 @@ void __generic_unplug_device(struct request_queue *q) q->request_fn(q); } -EXPORT_SYMBOL(__generic_unplug_device); /** * generic_unplug_device - fire a request queue diff --git a/block/blk.h b/block/blk.h index e5c57976996..d2e49af90db 100644 --- a/block/blk.h +++ b/block/blk.h @@ -20,6 +20,7 @@ void blk_unplug_timeout(unsigned long data); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); +void __generic_unplug_device(struct request_queue *); /* * Internal atomic flags for request handling diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 77c6eaeacef..7162d67562a 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1493,8 +1493,8 @@ void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq) spin_lock_irqsave(&ide_lock, flags); hwgroup->rq = NULL; - __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 1); - __generic_unplug_device(drive->queue); + __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); + blk_start_queueing(drive->queue); spin_unlock_irqrestore(&ide_lock, flags); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a92d9e4ea96..8eed8b15f99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -856,7 +856,6 @@ extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern void generic_unplug_device(struct request_queue *); -extern void __generic_unplug_device(struct request_queue *); extern long nr_blockdev_pages(void); int blk_get_queue(struct request_queue *); -- cgit v1.2.3-70-g09d2