From eae9acd13a8d14b50c00a961fa959606f34bbd92 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:08:25 +0100 Subject: Support 'discard sectors' operation in translation layer support core Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 9ff007c4962..681d5aca2af 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,6 +32,14 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; +static int blktrans_discard_request(struct request_queue *q, + struct request *req) +{ + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_DISCARD; + return 0; +} + static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -44,6 +52,10 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, buf = req->buffer; + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_DISCARD) + return !tr->discard(dev, block, nsect); + if (!blk_fs_request(req)) return 0; @@ -367,6 +379,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkcore_priv->rq->queuedata = tr; blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); + if (tr->discard) + blk_queue_set_discard(tr->blkcore_priv->rq, + blktrans_discard_request); + tr->blkshift = ffs(tr->blksize) - 1; tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, -- cgit v1.2.3-70-g09d2 From fdc53971bce56d299cb5f1f06ecbff30b34cbaf2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:08:56 +0100 Subject: Support 'discard sectors' operation. We can benefit from knowing that the file system no longer cares about the contents of certain sectors, by throwing them away immediately and then never having to garbage collect them, and using the extra free space to make our operations more efficient. Do so. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/mtd/ftl.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index f34f20c7891..9bf581c4f74 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1005,6 +1005,29 @@ static int ftl_writesect(struct mtd_blktrans_dev *dev, return ftl_write((void *)dev, buf, block, 1); } +static int ftl_discardsect(struct mtd_blktrans_dev *dev, + unsigned long sector, unsigned nr_sects) +{ + partition_t *part = (void *)dev; + uint32_t bsize = 1 << part->header.EraseUnitSize; + + DEBUG(1, "FTL erase sector %ld for %d sectors\n", + sector, nr_sects); + + while (nr_sects) { + uint32_t old_addr = part->VirtualBlockMap[sector]; + if (old_addr != 0xffffffff) { + part->VirtualBlockMap[sector] = 0xffffffff; + part->EUNInfo[old_addr/bsize].Deleted++; + if (set_bam_entry(part, old_addr, 0)) + return -EIO; + } + nr_sects--; + sector++; + } + + return 0; +} /*====================================================================*/ static void ftl_freepart(partition_t *part) @@ -1069,6 +1092,7 @@ static struct mtd_blktrans_ops ftl_tr = { .blksize = SECTOR_SIZE, .readsect = ftl_readsect, .writesect = ftl_writesect, + .discard = ftl_discardsect, .getgeo = ftl_getgeo, .add_mtd = ftl_add_mtd, .remove_dev = ftl_remove_dev, -- cgit v1.2.3-70-g09d2 From 1a8e2bddd5c29008f311613e75925fecbf522c5b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Aug 2008 12:35:09 +0100 Subject: Kill REQ_TYPE_FLUSH It was only used by ps3disk, and it should probably have been REQ_TYPE_LINUX_BLOCK + REQ_LB_OP_FLUSH. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 9 ++++++--- include/linux/blkdev.h | 6 +----- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index d797e209951..4b0d6c7f4c6 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, if (blk_fs_request(req)) { if (ps3disk_submit_request_sg(dev, req)) break; - } else if (req->cmd_type == REQ_TYPE_FLUSH) { + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { if (ps3disk_submit_flush_request(dev, req)) break; } else { @@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) return IRQ_HANDLED; } - if (req->cmd_type == REQ_TYPE_FLUSH) { + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { read = 0; num_sectors = req->hard_cur_sectors; op = "flush"; @@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - req->cmd_type = REQ_TYPE_FLUSH; + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_FLUSH; } static unsigned long ps3disk_mask; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9eb35c9bf2..f131776f029 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -54,7 +54,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_FLUSH, /* flush request */ REQ_TYPE_SPECIAL, /* driver defined type */ REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* @@ -76,11 +75,8 @@ enum rq_cmd_type_bits { * */ enum { - /* - * just examples for now - */ REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_FLUSH = 0x41, /* flush request */ REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; -- cgit v1.2.3-70-g09d2 From 766ca4428d1239a970926856c447310c9c191af2 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vázquez Cao Date: Thu, 14 Aug 2008 09:59:13 +0200 Subject: virtio_blk: use a wrapper function to access io context information of IO requests struct request has an ioprio member but it is never updated because currently bios do not hold io context information. The implication of this is that virtio_blk ends up passing useless information to the backend driver. That said, some IO schedulers such as CFQ do store io context information in struct request, but use private members for that, which means that that information cannot be directly accessed in a IO scheduler-independent way. This patch adds a function to obtain the ioprio of a request. We should avoid accessing ioprio directly and use this function instead, so that its users do not have to care about future changes in block layer structures or what the currently active IO controller is. This patch does not introduce any functional changes but paves the way for future clean-ups and enhancements. Signed-off-by: Fernando Luis Vazquez Cao Acked-by: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 ++-- include/linux/blkdev.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 42251095134..879506a2c23 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -84,11 +84,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_fs_request(vbr->req)) { vbr->out_hdr.type = 0; vbr->out_hdr.sector = vbr->req->sector; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else if (blk_pc_request(vbr->req)) { vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else { /* We don't put anything else in the queue. */ BUG(); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f131776f029..490ce458b03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -232,6 +232,11 @@ struct request { struct request *next_rq; }; +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + /* * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. -- cgit v1.2.3-70-g09d2 From 5df97b91b5d7ed426034fcc84cb6e7cf682b8838 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:20:02 +0200 Subject: drop vmerge accounting Remove hw_segments field from struct bio and struct request. Without virtual merge accounting they have no purpose. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-merge.c | 31 ++++--------------------------- block/elevator.c | 2 -- drivers/md/raid1.c | 3 --- drivers/md/raid10.c | 3 --- fs/bio.c | 12 +----------- include/linux/bio.h | 16 +--------------- include/linux/blkdev.h | 7 ------- 8 files changed, 6 insertions(+), 69 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index 1261516dd42..2616cdd049a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2026,7 +2026,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, if (bio_has_data(bio)) { rq->nr_phys_segments = bio_phys_segments(q, bio); - rq->nr_hw_segments = bio_hw_segments(q, bio); rq->buffer = bio_data(bio); } rq->current_nr_sectors = bio_cur_sectors(bio); diff --git a/block/blk-merge.c b/block/blk-merge.c index 2c2a2ee716e..d81d91419ff 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) void blk_recalc_rq_segments(struct request *rq) { int nr_phys_segs; - int nr_hw_segs; unsigned int phys_size; - unsigned int hw_size; struct bio_vec *bv, *bvprv = NULL; int seg_size; - int hw_seg_size; int cluster; struct req_iterator iter; int high, highprv = 1; @@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq) return; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); - hw_seg_size = seg_size = 0; - phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; + seg_size = 0; + phys_size = nr_phys_segs = 0; rq_for_each_segment(bv, rq, iter) { /* * the trick here is making sure that a high page is never @@ -76,30 +73,17 @@ void blk_recalc_rq_segments(struct request *rq) goto new_segment; seg_size += bv->bv_len; - hw_seg_size += bv->bv_len; bvprv = bv; continue; } new_segment: - if (nr_hw_segs == 1 && - hw_seg_size > rq->bio->bi_hw_front_size) - rq->bio->bi_hw_front_size = hw_seg_size; - hw_seg_size = bv->bv_len; - nr_hw_segs++; - nr_phys_segs++; bvprv = bv; seg_size = bv->bv_len; highprv = high; } - if (nr_hw_segs == 1 && - hw_seg_size > rq->bio->bi_hw_front_size) - rq->bio->bi_hw_front_size = hw_seg_size; - if (hw_seg_size > rq->biotail->bi_hw_back_size) - rq->biotail->bi_hw_back_size = hw_seg_size; rq->nr_phys_segments = nr_phys_segs; - rq->nr_hw_segments = nr_hw_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) @@ -112,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio) blk_recalc_rq_segments(&rq); bio->bi_next = nxt; bio->bi_phys_segments = rq.nr_phys_segments; - bio->bi_hw_segments = rq.nr_hw_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); @@ -255,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) { - int nr_hw_segs = bio_hw_segments(q, bio); int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments + if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) @@ -270,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q, * This will form the start of a new hw segment. Bump both * counters. */ - req->nr_hw_segments += nr_hw_segs; req->nr_phys_segments += nr_phys_segs; return 1; } @@ -328,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; - int total_hw_segments; /* * First check if the either of the requests are re-queued @@ -350,14 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > q->max_phys_segments) return 0; - total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; - - if (total_hw_segments > q->max_hw_segments) + if (total_phys_segments > q->max_hw_segments) return 0; /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; - req->nr_hw_segments = total_hw_segments; return 1; } diff --git a/block/elevator.c b/block/elevator.c index 4f5127054e3..269615e6dbf 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -790,7 +790,6 @@ struct request *elv_next_request(struct request_queue *q) * device can handle */ rq->nr_phys_segments++; - rq->nr_hw_segments++; } if (!q->prep_rq_fn) @@ -813,7 +812,6 @@ struct request *elv_next_request(struct request_queue *q) * so that we don't add it again */ --rq->nr_phys_segments; - --rq->nr_hw_segments; } rq = NULL; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03a5ab705c2..28a3869dcfd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1302,9 +1302,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) sbio->bi_size = r1_bio->sectors << 9; sbio->bi_idx = 0; sbio->bi_phys_segments = 0; - sbio->bi_hw_segments = 0; - sbio->bi_hw_front_size = 0; - sbio->bi_hw_back_size = 0; sbio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bi_flags |= 1 << BIO_UPTODATE; sbio->bi_next = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e34cd0e6247..0f40688503e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1345,9 +1345,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) tbio->bi_size = r10_bio->sectors << 9; tbio->bi_idx = 0; tbio->bi_phys_segments = 0; - tbio->bi_hw_segments = 0; - tbio->bi_hw_front_size = 0; - tbio->bi_hw_back_size = 0; tbio->bi_flags &= ~(BIO_POOL_MASK - 1); tbio->bi_flags |= 1 << BIO_UPTODATE; tbio->bi_next = NULL; diff --git a/fs/bio.c b/fs/bio.c index 4ac7c59d1c6..bee4deca774 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -208,14 +208,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) return bio->bi_phys_segments; } -inline int bio_hw_segments(struct request_queue *q, struct bio *bio) -{ - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) - blk_recount_segments(q, bio); - - return bio->bi_hw_segments; -} - /** * __bio_clone - clone a bio * @bio: destination bio @@ -350,7 +342,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page */ while (bio->bi_phys_segments >= q->max_phys_segments - || bio->bi_hw_segments >= q->max_hw_segments) { + || bio->bi_phys_segments >= q->max_hw_segments) { if (retried_segments) return 0; @@ -399,7 +391,6 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page bio->bi_vcnt++; bio->bi_phys_segments++; - bio->bi_hw_segments++; done: bio->bi_size += len; return len; @@ -1381,7 +1372,6 @@ EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(__bio_clone); EXPORT_SYMBOL(bio_clone); EXPORT_SYMBOL(bio_phys_segments); -EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_add_pc_page); EXPORT_SYMBOL(bio_get_nr_vecs); diff --git a/include/linux/bio.h b/include/linux/bio.h index 894d16ce002..dfc3556d311 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -77,21 +77,8 @@ struct bio { */ unsigned short bi_phys_segments; - /* Number of segments after physical and DMA remapping - * hardware coalescing is performed. - */ - unsigned short bi_hw_segments; - unsigned int bi_size; /* residual I/O count */ - /* - * To keep track of the max hw size, we account for the - * sizes of the first and last virtually mergeable segments - * in this bio - */ - unsigned int bi_hw_front_size; - unsigned int bi_hw_back_size; - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -113,7 +100,7 @@ struct bio { #define BIO_UPTODATE 0 /* ok after I/O completion */ #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ #define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ @@ -324,7 +311,6 @@ extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone(struct bio *, gfp_t); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 490ce458b03..1adb03827bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -189,13 +189,6 @@ struct request { */ unsigned short nr_phys_segments; - /* Number of scatter-gather addr+len pairs after - * physical and DMA remapping hardware coalescing is performed. - * This is the number of scatter-gather entries the driver - * will actually have to deal with after DMA mapping is done. - */ - unsigned short nr_hw_segments; - unsigned short ioprio; void *special; -- cgit v1.2.3-70-g09d2 From 960e739d9e9f1c2346d8bdc65299ee2e1ed42218 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:41:18 +0200 Subject: block: raid fixups for removal of bi_hw_segments Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 1 - drivers/md/raid10.c | 1 - drivers/md/raid5.c | 66 +++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 51 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 28a3869dcfd..0b82030c265 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1787,7 +1787,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; bio->bi_end_io = NULL; bio->bi_private = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0f40688503e..d3b9aa09628 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1944,7 +1944,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 224de022e7c..05b22925cce 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -101,6 +101,40 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif +/* + * We maintain a biased count of active stripes in the bottom 8 bits of + * bi_phys_segments, and a count of processed stripes in the upper 8 bits + */ +static inline int raid5_bi_phys_segments(struct bio *bio) +{ + return bio->bi_phys_segments & 0xff; +} + +static inline int raid5_bi_hw_segments(struct bio *bio) +{ + return (bio->bi_phys_segments >> 8) & 0xff; +} + +static inline int raid5_dec_bi_phys_segments(struct bio *bio) +{ + --bio->bi_phys_segments; + return raid5_bi_phys_segments(bio); +} + +static inline int raid5_dec_bi_hw_segments(struct bio *bio) +{ + unsigned short val = raid5_bi_hw_segments(bio); + + --val; + bio->bi_phys_segments = (val << 8) | raid5_bi_phys_segments(bio); + return val; +} + +static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) +{ + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 8); +} + static inline int raid6_next_disk(int disk, int raid_disks) { disk++; @@ -507,7 +541,7 @@ static void ops_complete_biofill(void *stripe_head_ref) while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { rbi2 = r5_next_bio(rbi, dev->sector); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip) bi->bi_next = *bip; *bip = bi; - bi->bi_phys_segments ++; + bi->bi_phys_segments++; spin_unlock_irq(&conf->device_lock); spin_unlock(&sh->lock); @@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { bi->bi_next = *return_bi; *return_bi = bi; } @@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); - if (--wbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(wbi)) { md_write_end(conf->mddev); wbi->bi_next = *return_bi; *return_bi = wbi; @@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) copy_data(0, rbi, dev->page, dev->sector); rbi2 = r5_next_bio(rbi, dev->sector); spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf) if(bi) { conf->retry_read_aligned_list = bi->bi_next; bi->bi_next = NULL; + /* + * this sets the active strip count to 1 and the processed + * strip count to zero (upper 8 bits) + */ bi->bi_phys_segments = 1; /* biased count of active stripes */ - bi->bi_hw_segments = 0; /* count of processed stripes */ } return bi; @@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi) if ((bi->bi_size>>9) > q->max_sectors) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments || - bi->bi_hw_segments > q->max_hw_segments) + if (bi->bi_phys_segments > q->max_phys_segments) return 0; if (q->merge_bvec_fn) @@ -3468,7 +3504,7 @@ static int make_request(struct request_queue *q, struct bio * bi) } spin_lock_irq(&conf->device_lock); - remaining = --bi->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(bi); spin_unlock_irq(&conf->device_lock); if (remaining == 0) { @@ -3752,7 +3788,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) sector += STRIPE_SECTORS, scnt++) { - if (scnt < raid_bio->bi_hw_segments) + if (scnt < raid5_bi_hw_segments(raid_bio)) /* already done this stripe */ continue; @@ -3760,7 +3796,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) if (!sh) { /* failed to get a stripe - must wait */ - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3768,7 +3804,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) set_bit(R5_ReadError, &sh->dev[dd_idx].flags); if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { release_stripe(sh); - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3778,7 +3814,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) handled++; } spin_lock_irq(&conf->device_lock); - remaining = --raid_bio->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(raid_bio); spin_unlock_irq(&conf->device_lock); if (remaining == 0) bio_endio(raid_bio, 0); -- cgit v1.2.3-70-g09d2 From 5b99c2ffa980528a197f26c7d876cceeccce8dd5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:56:11 +0200 Subject: block: make bi_phys_segments an unsigned int instead of short raid5 can overflow with more than 255 stripes, and we can increase it to an int for free on both 32 and 64-bit archs due to the padding. Signed-off-by: Jens Axboe --- drivers/md/raid5.c | 12 ++++++------ include/linux/bio.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 05b22925cce..37e546528f9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -102,17 +102,17 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif /* - * We maintain a biased count of active stripes in the bottom 8 bits of - * bi_phys_segments, and a count of processed stripes in the upper 8 bits + * We maintain a biased count of active stripes in the bottom 16 bits of + * bi_phys_segments, and a count of processed stripes in the upper 16 bits */ static inline int raid5_bi_phys_segments(struct bio *bio) { - return bio->bi_phys_segments & 0xff; + return bio->bi_phys_segments & 0xffff; } static inline int raid5_bi_hw_segments(struct bio *bio) { - return (bio->bi_phys_segments >> 8) & 0xff; + return (bio->bi_phys_segments >> 16) & 0xffff; } static inline int raid5_dec_bi_phys_segments(struct bio *bio) @@ -126,13 +126,13 @@ static inline int raid5_dec_bi_hw_segments(struct bio *bio) unsigned short val = raid5_bi_hw_segments(bio); --val; - bio->bi_phys_segments = (val << 8) | raid5_bi_phys_segments(bio); + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); return val; } static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) { - bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 8); + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); } static inline int raid6_next_disk(int disk, int raid_disks) diff --git a/include/linux/bio.h b/include/linux/bio.h index dfc3556d311..2c0c09034fd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -75,7 +75,7 @@ struct bio { /* Number of segments in this BIO after * physical address coalescing is performed. */ - unsigned short bi_phys_segments; + unsigned int bi_phys_segments; unsigned int bi_size; /* residual I/O count */ -- cgit v1.2.3-70-g09d2 From 5a3ceb861663040f9ef0176df4aaa494bba5e352 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:50:19 +0200 Subject: driver-core: use klist for class device list and implement iterator Iterating over entries using callback usually isn't too fun especially when the entry being iterated over can't be manipulated freely. This patch converts class->p->class_devices to klist and implements class device iterator so that the users can freely build their own control structure. The users are also free to call back into class code without worrying about locking. class_for_each_device() and class_find_device() are converted to use the new iterators, so their users don't have to worry about locking anymore either. Note: This depends on klist-dont-iterate-over-deleted-entries patch because class_intf->add/remove_dev() depends on proper synchronization with device removal. Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/base/base.h | 2 +- drivers/base/class.c | 136 +++++++++++++++++++++++++++++++++++++------------ drivers/base/core.c | 6 +-- include/linux/device.h | 14 ++++- 4 files changed, 120 insertions(+), 38 deletions(-) (limited to 'drivers') diff --git a/drivers/base/base.h b/drivers/base/base.h index 31dc0cd84af..0a5f055dffb 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -54,7 +54,7 @@ struct driver_private { */ struct class_private { struct kset class_subsys; - struct list_head class_devices; + struct klist class_devices; struct list_head class_interfaces; struct kset class_dirs; struct mutex class_mutex; diff --git a/drivers/base/class.c b/drivers/base/class.c index cc5e28c8885..eb85e431230 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -135,6 +135,20 @@ static void remove_class_attrs(struct class *cls) } } +static void klist_class_dev_get(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_class); + + get_device(dev); +} + +static void klist_class_dev_put(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_class); + + put_device(dev); +} + int __class_register(struct class *cls, struct lock_class_key *key) { struct class_private *cp; @@ -145,7 +159,7 @@ int __class_register(struct class *cls, struct lock_class_key *key) cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; - INIT_LIST_HEAD(&cp->class_devices); + klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put); INIT_LIST_HEAD(&cp->class_interfaces); kset_init(&cp->class_dirs); __mutex_init(&cp->class_mutex, "struct class mutex", key); @@ -268,6 +282,71 @@ char *make_class_name(const char *name, struct kobject *kobj) } #endif +/** + * class_dev_iter_init - initialize class device iterator + * @iter: class iterator to initialize + * @class: the class we wanna iterate over + * @start: the device to start iterating from, if any + * @type: device_type of the devices to iterate over, NULL for all + * + * Initialize class iterator @iter such that it iterates over devices + * of @class. If @start is set, the list iteration will start there, + * otherwise if it is NULL, the iteration starts at the beginning of + * the list. + */ +void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, + struct device *start, const struct device_type *type) +{ + struct klist_node *start_knode = NULL; + + if (start) + start_knode = &start->knode_class; + klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode); + iter->type = type; +} +EXPORT_SYMBOL_GPL(class_dev_iter_init); + +/** + * class_dev_iter_next - iterate to the next device + * @iter: class iterator to proceed + * + * Proceed @iter to the next device and return it. Returns NULL if + * iteration is complete. + * + * The returned device is referenced and won't be released till + * iterator is proceed to the next device or exited. The caller is + * free to do whatever it wants to do with the device including + * calling back into class code. + */ +struct device *class_dev_iter_next(struct class_dev_iter *iter) +{ + struct klist_node *knode; + struct device *dev; + + while (1) { + knode = klist_next(&iter->ki); + if (!knode) + return NULL; + dev = container_of(knode, struct device, knode_class); + if (!iter->type || iter->type == dev->type) + return dev; + } +} +EXPORT_SYMBOL_GPL(class_dev_iter_next); + +/** + * class_dev_iter_exit - finish iteration + * @iter: class iterator to finish + * + * Finish an iteration. Always call this function after iteration is + * complete whether the iteration ran till the end or not. + */ +void class_dev_iter_exit(struct class_dev_iter *iter) +{ + klist_iter_exit(&iter->ki); +} +EXPORT_SYMBOL_GPL(class_dev_iter_exit); + /** * class_for_each_device - device iterator * @class: the class we're iterating @@ -283,13 +362,13 @@ char *make_class_name(const char *name, struct kobject *kobj) * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * - * Note, we hold class->class_mutex in this function, so it can not be - * re-acquired in @fn, otherwise it will self-deadlocking. For - * example, calls to add or remove class members would be verboten. + * @fn is allowed to do anything including calling back into class + * code. There's no locking restriction. */ int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *, void *)) { + struct class_dev_iter iter; struct device *dev; int error = 0; @@ -301,20 +380,13 @@ int class_for_each_device(struct class *class, struct device *start, return -EINVAL; } - mutex_lock(&class->p->class_mutex); - list_for_each_entry(dev, &class->p->class_devices, node) { - if (start) { - if (start == dev) - start = NULL; - continue; - } - dev = get_device(dev); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { error = fn(dev, data); - put_device(dev); if (error) break; } - mutex_unlock(&class->p->class_mutex); + class_dev_iter_exit(&iter); return error; } @@ -337,16 +409,15 @@ EXPORT_SYMBOL_GPL(class_for_each_device); * * Note, you will need to drop the reference with put_device() after use. * - * We hold class->class_mutex in this function, so it can not be - * re-acquired in @match, otherwise it will self-deadlocking. For - * example, calls to add or remove class members would be verboten. + * @fn is allowed to do anything including calling back into class + * code. There's no locking restriction. */ struct device *class_find_device(struct class *class, struct device *start, void *data, int (*match)(struct device *, void *)) { + struct class_dev_iter iter; struct device *dev; - int found = 0; if (!class) return NULL; @@ -356,29 +427,23 @@ struct device *class_find_device(struct class *class, struct device *start, return NULL; } - mutex_lock(&class->p->class_mutex); - list_for_each_entry(dev, &class->p->class_devices, node) { - if (start) { - if (start == dev) - start = NULL; - continue; - } - dev = get_device(dev); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { - found = 1; + get_device(dev); break; - } else - put_device(dev); + } } - mutex_unlock(&class->p->class_mutex); + class_dev_iter_exit(&iter); - return found ? dev : NULL; + return dev; } EXPORT_SYMBOL_GPL(class_find_device); int class_interface_register(struct class_interface *class_intf) { struct class *parent; + struct class_dev_iter iter; struct device *dev; if (!class_intf || !class_intf->class) @@ -391,8 +456,10 @@ int class_interface_register(struct class_interface *class_intf) mutex_lock(&parent->p->class_mutex); list_add_tail(&class_intf->node, &parent->p->class_interfaces); if (class_intf->add_dev) { - list_for_each_entry(dev, &parent->p->class_devices, node) + class_dev_iter_init(&iter, parent, NULL, NULL); + while ((dev = class_dev_iter_next(&iter))) class_intf->add_dev(dev, class_intf); + class_dev_iter_exit(&iter); } mutex_unlock(&parent->p->class_mutex); @@ -402,6 +469,7 @@ int class_interface_register(struct class_interface *class_intf) void class_interface_unregister(struct class_interface *class_intf) { struct class *parent = class_intf->class; + struct class_dev_iter iter; struct device *dev; if (!parent) @@ -410,8 +478,10 @@ void class_interface_unregister(struct class_interface *class_intf) mutex_lock(&parent->p->class_mutex); list_del_init(&class_intf->node); if (class_intf->remove_dev) { - list_for_each_entry(dev, &parent->p->class_devices, node) + class_dev_iter_init(&iter, parent, NULL, NULL); + while ((dev = class_dev_iter_next(&iter))) class_intf->remove_dev(dev, class_intf); + class_dev_iter_exit(&iter); } mutex_unlock(&parent->p->class_mutex); diff --git a/drivers/base/core.c b/drivers/base/core.c index d021c98605b..b98cb1416a2 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -536,7 +536,6 @@ void device_initialize(struct device *dev) klist_init(&dev->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->dma_pools); - INIT_LIST_HEAD(&dev->node); init_MUTEX(&dev->sem); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); @@ -916,7 +915,8 @@ int device_add(struct device *dev) if (dev->class) { mutex_lock(&dev->class->p->class_mutex); /* tie the class to the device */ - list_add_tail(&dev->node, &dev->class->p->class_devices); + klist_add_tail(&dev->knode_class, + &dev->class->p->class_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, @@ -1032,7 +1032,7 @@ void device_del(struct device *dev) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ - list_del_init(&dev->node); + klist_del(&dev->knode_class); mutex_unlock(&dev->class->p->class_mutex); } device_remove_file(dev, &uevent_attr); diff --git a/include/linux/device.h b/include/linux/device.h index 4d8372d135d..246937c9cbc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -199,6 +199,11 @@ struct class { struct class_private *p; }; +struct class_dev_iter { + struct klist_iter ki; + const struct device_type *type; +}; + extern struct kobject *sysfs_dev_block_kobj; extern struct kobject *sysfs_dev_char_kobj; extern int __must_check __class_register(struct class *class, @@ -213,6 +218,13 @@ extern void class_unregister(struct class *class); __class_register(class, &__key); \ }) +extern void class_dev_iter_init(struct class_dev_iter *iter, + struct class *class, + struct device *start, + const struct device_type *type); +extern struct device *class_dev_iter_next(struct class_dev_iter *iter); +extern void class_dev_iter_exit(struct class_dev_iter *iter); + extern int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); @@ -396,7 +408,7 @@ struct device { spinlock_t devres_lock; struct list_head devres_head; - struct list_head node; + struct klist_node knode_class; struct class *class; dev_t devt; /* dev_t, creates the sysfs "dev" */ struct attribute_group **groups; /* optional groups */ -- cgit v1.2.3-70-g09d2 From 310a2c1012934f590192377f65940cad4aa72b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:17 +0900 Subject: block: misc updates This patch makes the following misc updates in preparation for disk->part dereference fix and extended block devt support. * implment part_to_disk() * fix comment about gendisk->part indexing * rename get_part() to disk_map_sector() * don't use n which is always zero while printing disk information in diskstats_show() Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 7 ++++--- block/blk-merge.c | 4 ++-- block/genhd.c | 4 ++-- drivers/block/aoe/aoecmd.c | 2 +- include/linux/genhd.h | 13 ++++++++++--- 5 files changed, 19 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index 86d22e7d65c..a0dc2e72fcb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,7 +60,7 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - part = get_part(rq->rq_disk, rq->sector); + part = disk_map_sector(rq->rq_disk, rq->sector); if (!new_io) __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); else { @@ -1557,7 +1557,8 @@ static int __end_that_request_first(struct request *req, int error, } if (blk_fs_request(req) && req->rq_disk) { - struct hd_struct *part = get_part(req->rq_disk, req->sector); + struct hd_struct *part = + disk_map_sector(req->rq_disk, req->sector); const int rw = rq_data_dir(req); all_stat_add(req->rq_disk, part, sectors[rw], @@ -1745,7 +1746,7 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); - struct hd_struct *part = get_part(disk, req->sector); + struct hd_struct *part = disk_map_sector(disk, req->sector); __all_stat_inc(disk, part, ios[rw], req->sector); __all_stat_add(disk, part, ticks[rw], duration, req->sector); diff --git a/block/blk-merge.c b/block/blk-merge.c index d81d91419ff..9b17da698d7 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -387,8 +387,8 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { - struct hd_struct *part - = get_part(req->rq_disk, req->sector); + struct hd_struct *part = + disk_map_sector(req->rq_disk, req->sector); disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; if (part) { diff --git a/block/genhd.c b/block/genhd.c index 8b9a9ff1a84..11038fbc75e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -568,7 +568,7 @@ static int diskstats_show(struct seq_file *s, void *v) { struct gendisk *gp = v; char buf[BDEVNAME_SIZE]; - int n = 0; + int n; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -582,7 +582,7 @@ static int diskstats_show(struct seq_file *s, void *v) disk_round_stats(gp); preempt_enable(); seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor, disk_name(gp, n, buf), + gp->major, gp->first_minor, disk_name(gp, 0, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), jiffies_to_msecs(disk_stat_read(gp, ticks[0])), diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f1746295d0..885d1409521 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,7 +757,7 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = get_part(disk, sector); + part = disk_map_sector(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index be4f5e5bfe0..c64e659c984 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,7 +116,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor] */ + struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -145,14 +145,21 @@ struct gendisk { #endif }; +static inline struct gendisk *part_to_disk(struct hd_struct *part) +{ + if (likely(part)) + return dev_to_disk((part)->dev.parent); + return NULL; +} + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *get_part(struct gendisk *gendiskp, - sector_t sector) +static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, + sector_t sector) { struct hd_struct *part; int i; -- cgit v1.2.3-70-g09d2 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 107 +++++++++++++++++++++++++----------- block/ioctl.c | 6 +- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/char/random.c | 6 +- drivers/md/dm-ioctl.c | 4 +- drivers/md/dm-stripe.c | 4 +- drivers/md/dm.c | 7 ++- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/card/block.c | 2 +- drivers/s390/block/dasd_proc.c | 3 +- drivers/s390/block/dcssblk.c | 4 +- drivers/scsi/sr.c | 2 +- fs/block_dev.c | 2 +- fs/partitions/check.c | 19 ++++--- include/linux/genhd.h | 27 +++++++-- 16 files changed, 132 insertions(+), 67 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index dc9ad4c171e..fa32d09fda2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -186,13 +186,14 @@ void add_disk(struct gendisk *disk) int retval; disk->flags |= GENHD_FL_UP; - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); + disk->dev.devt = MKDEV(disk->major, disk->first_minor); + blk_register_region(disk_devt(disk), disk->minors, NULL, + exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); bdi = &disk->queue->backing_dev_info; - bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); + bdi_register_dev(bdi, disk_devt(disk)); retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); WARN_ON(retval); } @@ -205,8 +206,7 @@ void unlink_gendisk(struct gendisk *disk) sysfs_remove_link(&disk->dev.kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); + blk_unregister_region(disk_devt(disk), disk->minors); } /** @@ -225,6 +225,38 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) return kobj ? dev_to_disk(dev) : NULL; } +/** + * bdget_disk - do bdget() by gendisk and partition number + * @disk: gendisk of interest + * @partno: partition number + * + * Find partition @partno from @disk, do bdget() on it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Resulting block_device on success, NULL on failure. + */ +extern struct block_device *bdget_disk(struct gendisk *disk, int partno) +{ + dev_t devt = MKDEV(0, 0); + + if (partno == 0) + devt = disk_devt(disk); + else { + struct hd_struct *part = disk->part[partno - 1]; + + if (part && part->nr_sects) + devt = part_devt(part); + } + + if (likely(devt != MKDEV(0, 0))) + return bdget(devt); + return NULL; +} +EXPORT_SYMBOL(bdget_disk); + /* * print a full list of all partitions - intended for places where the root * filesystem can't be mounted and thus to give the victim some idea of what @@ -255,7 +287,7 @@ void __init printk_all_partitions(void) * option takes. */ printk("%02x%02x %10llu %s", - disk->major, disk->first_minor, + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)), (unsigned long long)get_capacity(disk) >> 1, disk_name(disk, 0, buf)); if (disk->driverfs_dev != NULL && @@ -266,15 +298,15 @@ void __init printk_all_partitions(void) printk(" (driver?)\n"); /* now show the partitions */ - for (n = 0; n < disk->minors - 1; ++n) { - if (disk->part[n] == NULL) - continue; - if (disk->part[n]->nr_sects == 0) + for (n = 0; n < disk_max_parts(disk); ++n) { + struct hd_struct *part = disk->part[n]; + + if (!part || !part->nr_sects) continue; printk(" %02x%02x %10llu %s\n", - disk->major, n + 1 + disk->first_minor, - (unsigned long long)disk->part[n]->nr_sects >> 1, - disk_name(disk, n + 1, buf)); + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(disk, part->partno, buf)); } } class_dev_iter_exit(&iter); @@ -343,26 +375,27 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || - (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) + if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) return 0; /* show the full disk and all non-0 size partitions of it */ seq_printf(seqf, "%4d %4d %10llu %s\n", - sgp->major, sgp->first_minor, + MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); - for (n = 0; n < sgp->minors - 1; n++) { - if (!sgp->part[n]) + for (n = 0; n < disk_max_parts(sgp); n++) { + struct hd_struct *part = sgp->part[n]; + if (!part) continue; - if (sgp->part[n]->nr_sects == 0) + if (part->nr_sects == 0) continue; seq_printf(seqf, "%4d %4d %10llu %s\n", - sgp->major, n + 1 + sgp->first_minor, - (unsigned long long)sgp->part[n]->nr_sects >> 1 , - disk_name(sgp, n + 1, buf)); + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(sgp, part->partno, buf)); } return 0; @@ -578,7 +611,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_round_stats(gp); preempt_enable(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - gp->major, gp->first_minor, disk_name(gp, 0, buf), + MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), + disk_name(gp, 0, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), jiffies_to_msecs(disk_stat_read(gp, ticks[0])), @@ -590,7 +624,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); /* now show all non-0 size partitions of it */ - for (n = 0; n < gp->minors - 1; n++) { + for (n = 0; n < disk_max_parts(gp); n++) { struct hd_struct *hd = gp->part[n]; if (!hd || !hd->nr_sects) @@ -601,8 +635,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) preempt_enable(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor + 1, - disk_name(gp, n + 1, buf), + MAJOR(part_devt(hd)), MINOR(part_devt(hd)), + disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[0]), part_stat_read(hd, merges[0]), (unsigned long long)part_stat_read(hd, sectors[0]), @@ -661,11 +695,22 @@ dev_t blk_lookup_devt(const char *name, int partno) while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - if (!strcmp(dev->bus_id, name) && partno < disk->minors) { - devt = MKDEV(MAJOR(dev->devt), - MINOR(dev->devt) + partno); - break; + if (strcmp(dev->bus_id, name)) + continue; + if (partno < 0 || partno > disk_max_parts(disk)) + continue; + + if (partno == 0) + devt = disk_devt(disk); + else { + struct hd_struct *part = disk->part[partno - 1]; + + if (!part || !part->nr_sects) + continue; + + devt = part_devt(part); } + break; } class_dev_iter_exit(&iter); return devt; @@ -755,7 +800,7 @@ void set_disk_ro(struct gendisk *disk, int flag) { int i; disk->policy = flag; - for (i = 0; i < disk->minors - 1; i++) + for (i = 0; i < disk_max_parts(disk); i++) if (disk->part[i]) disk->part[i]->policy = flag; } diff --git a/block/ioctl.c b/block/ioctl.c index d77f5e280a6..403f7d7e0c2 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -29,7 +29,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user if (bdev != bdev->bd_contains) return -EINVAL; partno = p.pno; - if (partno <= 0 || partno >= disk->minors) + if (partno <= 0 || partno > disk_max_parts(disk)) return -EINVAL; switch (a.op) { case BLKPG_ADD_PARTITION: @@ -47,7 +47,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user mutex_lock(&bdev->bd_mutex); /* overlap? */ - for (i = 0; i < disk->minors - 1; i++) { + for (i = 0; i < disk_max_parts(disk); i++) { struct hd_struct *s = disk->part[i]; if (!s) @@ -96,7 +96,7 @@ static int blkdev_reread_part(struct block_device *bdev) struct gendisk *disk = bdev->bd_disk; int res; - if (disk->minors == 1 || bdev != bdev->bd_contains) + if (!disk_max_parts(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 29b7a648cc6..e1a90bbb474 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2911,7 +2911,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) if (!disk->queue) goto out_mem2; - pd->pkt_dev = MKDEV(disk->major, disk->first_minor); + pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) goto out_new_dev; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 4b0d6c7f4c6..936466f62af 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -541,7 +541,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv = dev->sbd.core.driver_data; mutex_lock(&ps3disk_mask_mutex); - __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, + __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); diff --git a/drivers/char/random.c b/drivers/char/random.c index 7ce1ac4baa6..6af435b8986 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -661,10 +661,10 @@ void add_disk_randomness(struct gendisk *disk) if (!disk || !disk->random) return; /* first major is 1, so we get >= 0x200 here */ - DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); + DEBUG_ENT("disk event %d:%d\n", + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk))); - add_timer_randomness(disk->random, - 0x100 + MKDEV(disk->major, disk->first_minor)); + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de..c3de311117a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + nl->dev = huge_encode_dev(disk_devt(disk)); nl->next = 0; strcpy(nl->name, hc->name); @@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (dm_suspended(md)) param->flags |= DM_SUSPEND_FLAG; - param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + param->dev = huge_encode_dev(disk_devt(disk)); /* * Yes, this will be out of date by the time it gets back diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4de90ab3968..b745d8ac625 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, memset(major_minor, 0, sizeof(major_minor)); sprintf(major_minor, "%d:%d", - bio->bi_bdev->bd_disk->major, - bio->bi_bdev->bd_disk->first_minor); + MAJOR(disk_devt(bio->bi_bdev->bd_disk)), + MINOR(disk_devt(bio->bi_bdev->bd_disk))); /* * Test to see which stripe drive triggered the event diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998ce59f..a78caad2999 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1146,7 +1146,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { - int minor = md->disk->first_minor; + int minor = MINOR(disk_devt(md->disk)); if (md->suspended_bdev) { unlock_fs(md); @@ -1267,7 +1267,7 @@ static struct mapped_device *dm_find_md(dev_t dev) md = idr_find(&_minor_idr, minor); if (md && (md == MINOR_ALLOCED || - (dm_disk(md)->first_minor != minor) || + (MINOR(disk_devt(dm_disk(md))) != minor) || test_bit(DMF_FREEING, &md->flags))) { md = NULL; goto out; @@ -1318,7 +1318,8 @@ void dm_put(struct mapped_device *md) if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { map = dm_get_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); + idr_replace(&_minor_idr, MINOR_ALLOCED, + MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); if (!dm_suspended(md)) { diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index d2d2318dafa..82bf649ef13 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -197,7 +197,7 @@ static int mspro_block_bd_open(struct inode *inode, struct file *filp) static int mspro_block_disk_release(struct gendisk *disk) { struct mspro_block_data *msb = disk->private_data; - int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; + int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; mutex_lock(&mspro_block_disk_lock); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index ebc8b9d7761..97156b689e8 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -83,7 +83,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_lock(&open_lock); md->usage--; if (md->usage == 0) { - int devidx = md->disk->first_minor >> MMC_SHIFT; + int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; __clear_bit(devidx, dev_use); put_disk(md->disk); diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 03c0e40a92f..e3b5c4d3036 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -76,7 +76,8 @@ dasd_devices_show(struct seq_file *m, void *v) /* Print kdev. */ if (block->gdp) seq_printf(m, " at (%3d:%6d)", - block->gdp->major, block->gdp->first_minor); + MAJOR(disk_devt(block->gdp)), + MINOR(disk_devt(block->gdp))); else seq_printf(m, " at (???:??????)"); /* Print device name. */ diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 711b3004b3e..9481e4a3f76 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -114,7 +114,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info) found = 0; // test if minor available list_for_each_entry(entry, &dcssblk_devices, lh) - if (minor == entry->gd->first_minor) + if (minor == MINOR(disk_devt(entry->gd))) found++; if (!found) break; // got unused minor } @@ -397,7 +397,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char goto unload_seg; } sprintf(dev_info->gd->disk_name, "dcssblk%d", - dev_info->gd->first_minor); + MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 27f5bfd1def..8dbe3798d5f 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -878,7 +878,7 @@ static void sr_kref_release(struct kref *kref) struct gendisk *disk = cd->disk; spin_lock(&sr_index_lock); - clear_bit(disk->first_minor, sr_index_bits); + clear_bit(MINOR(disk_devt(disk)), sr_index_bits); spin_unlock(&sr_index_lock); unregister_cdrom(&cd->cdi); diff --git a/fs/block_dev.c b/fs/block_dev.c index de0776cd721..72e0a2887cb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -892,7 +892,7 @@ int check_disk_change(struct block_device *bdev) if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (bdev->bd_disk->minors > 1) + if (disk_max_parts(bdev->bd_disk)) bdev->bd_invalidated = 1; return 1; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b86aab1b0df..e77fa144a07 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -134,7 +134,11 @@ char *disk_name(struct gendisk *hd, int partno, char *buf) const char *bdevname(struct block_device *bdev, char *buf) { - int partno = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; + int partno = 0; + + if (bdev->bd_part) + partno = bdev->bd_part->partno; + return disk_name(bdev->bd_disk, partno, buf); } @@ -169,7 +173,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); - state->limit = hd->minors; + state->limit = disk_max_parts(hd) + 1; i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); @@ -416,7 +420,6 @@ void register_disk(struct gendisk *disk) int err; disk->dev.parent = disk->driverfs_dev; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ @@ -440,7 +443,7 @@ void register_disk(struct gendisk *disk) disk_sysfs_add_subdirs(disk); /* No minors to use for partitions */ - if (disk->minors == 1) + if (!disk_max_parts(disk)) goto exit; /* No such device (e.g., media were just removed) */ @@ -463,8 +466,8 @@ exit: kobject_uevent(&disk->dev.kobj, KOBJ_ADD); /* announce possible partitions */ - for (i = 1; i < disk->minors; i++) { - p = disk->part[i-1]; + for (i = 0; i < disk_max_parts(disk); i++) { + p = disk->part[i]; if (!p || !p->nr_sects) continue; kobject_uevent(&p->dev.kobj, KOBJ_ADD); @@ -482,7 +485,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (res) return res; bdev->bd_invalidated = 0; - for (p = 1; p < disk->minors; p++) + for (p = 1; p <= disk_max_parts(disk); p++) delete_partition(disk, p); if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); @@ -545,7 +548,7 @@ void del_gendisk(struct gendisk *disk) int p; /* invalidate stuff */ - for (p = disk->minors - 1; p > 0; p--) { + for (p = disk_max_parts(disk); p > 0; p--) { invalidate_partition(disk, p); delete_partition(disk, p); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d1723c0a860..0ff75329199 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -111,10 +111,14 @@ struct hd_struct { #define GENHD_FL_FAIL 64 struct gendisk { + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). + */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + char disk_name[32]; /* name of major driver */ struct hd_struct **part; /* [indexed by minor - 1] */ struct block_device_operations *fops; @@ -152,6 +156,21 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline int disk_max_parts(struct gendisk *disk) +{ + return disk->minors - 1; +} + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return disk->dev.devt; +} + +static inline dev_t part_devt(struct hd_struct *part) +{ + return part->dev.devt; +} + /* * Macros to operate on percpu disk statistics: * @@ -163,7 +182,7 @@ static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, { struct hd_struct *part; int i; - for (i = 0; i < gendiskp->minors - 1; i++) { + for (i = 0; i < disk_max_parts(gendiskp); i++) { part = gendiskp->part[i]; if (part && part->start_sect <= sector && sector < part->start_sect + part->nr_sects) @@ -366,6 +385,7 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); +extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -553,11 +573,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int partno) -{ - return bdget(MKDEV(disk->major, disk->first_minor) + partno); -} - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3-70-g09d2 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 20 ++++- block/blk-merge.c | 9 +- block/genhd.c | 218 ++++++++++++++++++++++++++++++++++++++------- block/ioctl.c | 26 +++--- drivers/block/aoe/aoecmd.c | 6 +- fs/block_dev.c | 15 ++-- fs/partitions/check.c | 70 +++++++++------ include/linux/genhd.h | 53 ++++++++--- 8 files changed, 323 insertions(+), 94 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index a0dc2e72fcb..d6128d9ad60 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,7 +60,9 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - part = disk_map_sector(rq->rq_disk, rq->sector); + rcu_read_lock(); + + part = disk_map_sector_rcu(rq->rq_disk, rq->sector); if (!new_io) __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); else { @@ -71,6 +73,8 @@ static void drive_stat_acct(struct request *rq, int new_io) part->in_flight++; } } + + rcu_read_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -1557,12 +1561,14 @@ static int __end_that_request_first(struct request *req, int error, } if (blk_fs_request(req) && req->rq_disk) { - struct hd_struct *part = - disk_map_sector(req->rq_disk, req->sector); const int rw = rq_data_dir(req); + struct hd_struct *part; + rcu_read_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); all_stat_add(req->rq_disk, part, sectors[rw], nr_bytes >> 9, req->sector); + rcu_read_unlock(); } total_bytes = bio_nbytes = 0; @@ -1746,7 +1752,11 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); - struct hd_struct *part = disk_map_sector(disk, req->sector); + struct hd_struct *part; + + rcu_read_lock(); + + part = disk_map_sector_rcu(disk, req->sector); __all_stat_inc(disk, part, ios[rw], req->sector); __all_stat_add(disk, part, ticks[rw], duration, req->sector); @@ -1756,6 +1766,8 @@ static void end_that_request_last(struct request *req, int error) part_round_stats(part); part->in_flight--; } + + rcu_read_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index 9b17da698d7..eb2a3ca5830 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -387,14 +387,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { - struct hd_struct *part = - disk_map_sector(req->rq_disk, req->sector); + struct hd_struct *part; + + rcu_read_lock(); + + part = disk_map_sector_rcu(req->rq_disk, req->sector); disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; if (part) { part_round_stats(part); part->in_flight--; } + + rcu_read_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index fa32d09fda2..b431d654394 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -26,6 +26,158 @@ struct kobject *block_depr; static struct device_type disk_type; +/** + * disk_get_part - get partition + * @disk: disk to look partition from + * @partno: partition number + * + * Look for partition @partno from @disk. If found, increment + * reference count and return it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Pointer to the found partition on success, NULL if not found. + */ +struct hd_struct *disk_get_part(struct gendisk *disk, int partno) +{ + struct hd_struct *part; + + if (unlikely(partno < 1 || partno > disk_max_parts(disk))) + return NULL; + rcu_read_lock(); + part = rcu_dereference(disk->__part[partno - 1]); + if (part) + get_device(&part->dev); + rcu_read_unlock(); + + return part; +} +EXPORT_SYMBOL_GPL(disk_get_part); + +/** + * disk_part_iter_init - initialize partition iterator + * @piter: iterator to initialize + * @disk: disk to iterate over + * @flags: DISK_PITER_* flags + * + * Initialize @piter so that it iterates over partitions of @disk. + * + * CONTEXT: + * Don't care. + */ +void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, + unsigned int flags) +{ + piter->disk = disk; + piter->part = NULL; + + if (flags & DISK_PITER_REVERSE) + piter->idx = disk_max_parts(piter->disk) - 1; + else + piter->idx = 0; + + piter->flags = flags; +} +EXPORT_SYMBOL_GPL(disk_part_iter_init); + +/** + * disk_part_iter_next - proceed iterator to the next partition and return it + * @piter: iterator of interest + * + * Proceed @piter to the next partition and return it. + * + * CONTEXT: + * Don't care. + */ +struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) +{ + int inc, end; + + /* put the last partition */ + disk_put_part(piter->part); + piter->part = NULL; + + rcu_read_lock(); + + /* determine iteration parameters */ + if (piter->flags & DISK_PITER_REVERSE) { + inc = -1; + end = -1; + } else { + inc = 1; + end = disk_max_parts(piter->disk); + } + + /* iterate to the next partition */ + for (; piter->idx != end; piter->idx += inc) { + struct hd_struct *part; + + part = rcu_dereference(piter->disk->__part[piter->idx]); + if (!part) + continue; + if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + continue; + + get_device(&part->dev); + piter->part = part; + piter->idx += inc; + break; + } + + rcu_read_unlock(); + + return piter->part; +} +EXPORT_SYMBOL_GPL(disk_part_iter_next); + +/** + * disk_part_iter_exit - finish up partition iteration + * @piter: iter of interest + * + * Called when iteration is over. Cleans up @piter. + * + * CONTEXT: + * Don't care. + */ +void disk_part_iter_exit(struct disk_part_iter *piter) +{ + disk_put_part(piter->part); + piter->part = NULL; +} +EXPORT_SYMBOL_GPL(disk_part_iter_exit); + +/** + * disk_map_sector_rcu - map sector to partition + * @disk: gendisk of interest + * @sector: sector to map + * + * Find out which partition @sector maps to on @disk. This is + * primarily used for stats accounting. + * + * CONTEXT: + * RCU read locked. The returned partition pointer is valid only + * while preemption is disabled. + * + * RETURNS: + * Found partition on success, NULL if there's no matching partition. + */ +struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) +{ + int i; + + for (i = 0; i < disk_max_parts(disk); i++) { + struct hd_struct *part = rcu_dereference(disk->__part[i]); + + if (part && part->start_sect <= sector && + sector < part->start_sect + part->nr_sects) + return part; + } + return NULL; +} +EXPORT_SYMBOL_GPL(disk_map_sector_rcu); + /* * Can be deleted altogether. Later. * @@ -245,10 +397,12 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno) if (partno == 0) devt = disk_devt(disk); else { - struct hd_struct *part = disk->part[partno - 1]; + struct hd_struct *part; + part = disk_get_part(disk, partno); if (part && part->nr_sects) devt = part_devt(part); + disk_put_part(part); } if (likely(devt != MKDEV(0, 0))) @@ -270,8 +424,9 @@ void __init printk_all_partitions(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); + struct disk_part_iter piter; + struct hd_struct *part; char buf[BDEVNAME_SIZE]; - int n; /* * Don't show empty devices or things that have been @@ -298,16 +453,13 @@ void __init printk_all_partitions(void) printk(" (driver?)\n"); /* now show the partitions */ - for (n = 0; n < disk_max_parts(disk); ++n) { - struct hd_struct *part = disk->part[n]; - - if (!part || !part->nr_sects) - continue; + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) printk(" %02x%02x %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), (unsigned long long)part->nr_sects >> 1, disk_name(disk, part->partno, buf)); - } + disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); } @@ -371,7 +523,8 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos) static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; - int n; + struct disk_part_iter piter; + struct hd_struct *part; char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ @@ -386,17 +539,14 @@ static int show_partition(struct seq_file *seqf, void *v) MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); - for (n = 0; n < disk_max_parts(sgp); n++) { - struct hd_struct *part = sgp->part[n]; - if (!part) - continue; - if (part->nr_sects == 0) - continue; + + disk_part_iter_init(&piter, sgp, 0); + while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %4d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), (unsigned long long)part->nr_sects >> 1, disk_name(sgp, part->partno, buf)); - } + disk_part_iter_exit(&piter); return 0; } @@ -571,7 +721,7 @@ static void disk_release(struct device *dev) struct gendisk *disk = dev_to_disk(dev); kfree(disk->random); - kfree(disk->part); + kfree(disk->__part); free_disk_stats(disk); kfree(disk); } @@ -596,8 +746,9 @@ static struct device_type disk_type = { static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; + struct disk_part_iter piter; + struct hd_struct *hd; char buf[BDEVNAME_SIZE]; - int n; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -624,12 +775,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); /* now show all non-0 size partitions of it */ - for (n = 0; n < disk_max_parts(gp); n++) { - struct hd_struct *hd = gp->part[n]; - - if (!hd || !hd->nr_sects) - continue; - + disk_part_iter_init(&piter, gp, 0); + while ((hd = disk_part_iter_next(&piter))) { preempt_disable(); part_round_stats(hd); preempt_enable(); @@ -650,6 +797,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); } + disk_part_iter_exit(&piter); return 0; } @@ -703,12 +851,16 @@ dev_t blk_lookup_devt(const char *name, int partno) if (partno == 0) devt = disk_devt(disk); else { - struct hd_struct *part = disk->part[partno - 1]; + struct hd_struct *part; - if (!part || !part->nr_sects) + part = disk_get_part(disk, partno); + if (!part || !part->nr_sects) { + disk_put_part(part); continue; + } devt = part_devt(part); + disk_put_part(part); } break; } @@ -735,9 +887,9 @@ struct gendisk *alloc_disk_node(int minors, int node_id) } if (minors > 1) { int size = (minors - 1) * sizeof(struct hd_struct *); - disk->part = kmalloc_node(size, + disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, node_id); - if (!disk->part) { + if (!disk->__part) { free_disk_stats(disk); kfree(disk); return NULL; @@ -798,10 +950,14 @@ EXPORT_SYMBOL(set_device_ro); void set_disk_ro(struct gendisk *disk, int flag) { - int i; + struct disk_part_iter piter; + struct hd_struct *part; + disk->policy = flag; - for (i = 0; i < disk_max_parts(disk); i++) - if (disk->part[i]) disk->part[i]->policy = flag; + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + part->policy = flag; + disk_part_iter_exit(&piter); } EXPORT_SYMBOL(set_disk_ro); diff --git a/block/ioctl.c b/block/ioctl.c index 403f7d7e0c2..a5f672ad55f 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -12,11 +12,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user { struct block_device *bdevp; struct gendisk *disk; + struct hd_struct *part; struct blkpg_ioctl_arg a; struct blkpg_partition p; + struct disk_part_iter piter; long long start, length; int partno; - int i; int err; if (!capable(CAP_SYS_ADMIN)) @@ -47,28 +48,33 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user mutex_lock(&bdev->bd_mutex); /* overlap? */ - for (i = 0; i < disk_max_parts(disk); i++) { - struct hd_struct *s = disk->part[i]; - - if (!s) - continue; - if (!(start+length <= s->start_sect || - start >= s->start_sect + s->nr_sects)) { + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) { + if (!(start + length <= part->start_sect || + start >= part->start_sect + part->nr_sects)) { + disk_part_iter_exit(&piter); mutex_unlock(&bdev->bd_mutex); return -EBUSY; } } + disk_part_iter_exit(&piter); + /* all seems OK */ err = add_partition(disk, partno, start, length, ADDPART_FLAG_NONE); mutex_unlock(&bdev->bd_mutex); return err; case BLKPG_DEL_PARTITION: - if (!disk->part[partno - 1]) + part = disk_get_part(disk, partno); + if (!part) return -ENXIO; - bdevp = bdget_disk(disk, partno); + + bdevp = bdget(part_devt(part)); + disk_put_part(part); if (!bdevp) return -ENOMEM; + mutex_lock(&bdevp->bd_mutex); if (bdevp->bd_openers) { mutex_unlock(&bdevp->bd_mutex); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 885d1409521..84c03d65dcc 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,11 +757,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = disk_map_sector(disk, sector); + rcu_read_lock(); + + part = disk_map_sector_rcu(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); all_stat_add(disk, part, io_ticks, duration, sector); + + rcu_read_unlock(); } void diff --git a/fs/block_dev.c b/fs/block_dev.c index 72e0a2887cb..2f2873b9a04 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -929,6 +929,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; + struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -978,7 +979,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (bdev->bd_invalidated) rescan_partitions(disk, bdev); } else { - struct hd_struct *p; struct block_device *whole; whole = bdget_disk(disk, 0); ret = -ENOMEM; @@ -989,16 +989,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (ret) goto out_first; bdev->bd_contains = whole; - p = disk->part[partno - 1]; + part = disk_get_part(disk, partno); bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; - if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { + if (!(disk->flags & GENHD_FL_UP) || + !part || !part->nr_sects) { ret = -ENXIO; goto out_first; } - kobject_get(&p->dev.kobj); - bdev->bd_part = p; - bd_set_size(bdev, (loff_t) p->nr_sects << 9); + bdev->bd_part = part; + bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { put_disk(disk); @@ -1027,6 +1027,7 @@ out_first: __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; put_disk(disk); + disk_put_part(part); module_put(owner); out: mutex_unlock(&bdev->bd_mutex); @@ -1119,7 +1120,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) module_put(owner); if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->dev.kobj); + disk_put_part(bdev->bd_part); bdev->bd_part = NULL; } bdev->bd_disk = NULL; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e77fa144a07..96c8bf41e45 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -314,19 +314,29 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) kobject_put(k); } +static void delete_partition_rcu_cb(struct rcu_head *head) +{ + struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); + + part->start_sect = 0; + part->nr_sects = 0; + part_stat_set_all(part, 0); + put_device(&part->dev); +} + void delete_partition(struct gendisk *disk, int partno) { - struct hd_struct *p = disk->part[partno - 1]; + struct hd_struct *part; - if (!p) + part = disk->__part[partno-1]; + if (!part) return; - disk->part[partno - 1] = NULL; - p->start_sect = 0; - p->nr_sects = 0; - part_stat_set_all(p, 0); - kobject_put(p->holder_dir); - device_del(&p->dev); - put_device(&p->dev); + + rcu_assign_pointer(disk->__part[partno-1], NULL); + kobject_put(part->holder_dir); + device_del(&part->dev); + + call_rcu(&part->rcu_head, delete_partition_rcu_cb); } static ssize_t whole_disk_show(struct device *dev, @@ -343,7 +353,7 @@ int add_partition(struct gendisk *disk, int partno, struct hd_struct *p; int err; - if (disk->part[partno - 1]) + if (disk->__part[partno - 1]) return -EBUSY; p = kzalloc(sizeof(*p), GFP_KERNEL); @@ -391,7 +401,8 @@ int add_partition(struct gendisk *disk, int partno, } /* everything is up and running, commence */ - disk->part[partno - 1] = p; + INIT_RCU_HEAD(&p->rcu_head); + rcu_assign_pointer(disk->__part[partno - 1], p); /* suppress uevent if the disk supresses it */ if (!disk->dev.uevent_suppress) @@ -414,9 +425,9 @@ out_put: void register_disk(struct gendisk *disk) { struct block_device *bdev; + struct disk_part_iter piter; + struct hd_struct *part; char *s; - int i; - struct hd_struct *p; int err; disk->dev.parent = disk->driverfs_dev; @@ -466,16 +477,16 @@ exit: kobject_uevent(&disk->dev.kobj, KOBJ_ADD); /* announce possible partitions */ - for (i = 0; i < disk_max_parts(disk); i++) { - p = disk->part[i]; - if (!p || !p->nr_sects) - continue; - kobject_uevent(&p->dev.kobj, KOBJ_ADD); - } + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) + kobject_uevent(&part->dev.kobj, KOBJ_ADD); + disk_part_iter_exit(&piter); } int rescan_partitions(struct gendisk *disk, struct block_device *bdev) { + struct disk_part_iter piter; + struct hd_struct *part; struct parsed_partitions *state; int p, res; @@ -485,8 +496,12 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (res) return res; bdev->bd_invalidated = 0; - for (p = 1; p <= disk_max_parts(disk); p++) - delete_partition(disk, p); + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + delete_partition(disk, part->partno); + disk_part_iter_exit(&piter); + if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) @@ -545,13 +560,18 @@ EXPORT_SYMBOL(read_dev_sector); void del_gendisk(struct gendisk *disk) { - int p; + struct disk_part_iter piter; + struct hd_struct *part; /* invalidate stuff */ - for (p = disk_max_parts(disk); p > 0; p--) { - invalidate_partition(disk, p); - delete_partition(disk, p); + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); + while ((part = disk_part_iter_next(&piter))) { + invalidate_partition(disk, part->partno); + delete_partition(disk, part->partno); } + disk_part_iter_exit(&piter); + invalidate_partition(disk, 0); disk->capacity = 0; disk->flags &= ~GENHD_FL_UP; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0ff75329199..7fbba19e076 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_BLOCK @@ -100,6 +101,7 @@ struct hd_struct { #else struct disk_stats dkstats; #endif + struct rcu_head rcu_head; }; #define GENHD_FL_REMOVABLE 1 @@ -120,7 +122,14 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor - 1] */ + + /* Array of pointers to partitions indexed by partno - 1. + * Protected with matching bdev lock but stat and other + * non-critical accesses use RCU. Always access through + * helpers. + */ + struct hd_struct **__part; + struct block_device_operations *fops; struct request_queue *queue; void *private_data; @@ -171,25 +180,41 @@ static inline dev_t part_devt(struct hd_struct *part) return part->dev.devt; } +extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); + +static inline void disk_put_part(struct hd_struct *part) +{ + if (likely(part)) + put_device(&part->dev); +} + +/* + * Smarter partition iterator without context limits. + */ +#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ +#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ + +struct disk_part_iter { + struct gendisk *disk; + struct hd_struct *part; + int idx; + unsigned int flags; +}; + +extern void disk_part_iter_init(struct disk_part_iter *piter, + struct gendisk *disk, unsigned int flags); +extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +extern void disk_part_iter_exit(struct disk_part_iter *piter); + +extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, + sector_t sector); + /* * Macros to operate on percpu disk statistics: * * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ -static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, - sector_t sector) -{ - struct hd_struct *part; - int i; - for (i = 0; i < disk_max_parts(gendiskp); i++) { - part = gendiskp->part[i]; - if (part && part->start_sect <= sector - && sector < part->start_sect + part->nr_sects) - return part; - } - return NULL; -} #ifdef CONFIG_SMP #define __disk_stat_add(gendiskp, field, addnd) \ -- cgit v1.2.3-70-g09d2 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- block/blk-core.c | 52 +++++++++-------- block/blk-merge.c | 11 ++-- block/genhd.c | 20 ++++--- drivers/block/aoe/aoecmd.c | 15 ++--- drivers/md/dm.c | 26 +++++---- drivers/md/linear.c | 7 ++- drivers/md/multipath.c | 7 ++- drivers/md/raid0.c | 7 ++- drivers/md/raid1.c | 8 ++- drivers/md/raid10.c | 7 ++- drivers/md/raid5.c | 8 ++- fs/partitions/check.c | 7 ++- include/linux/genhd.h | 139 +++++++++++++++++++-------------------------- 13 files changed, 158 insertions(+), 156 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index d6128d9ad60..e0a5ee36849 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -56,25 +56,26 @@ static void drive_stat_acct(struct request *rq, int new_io) { struct hd_struct *part; int rw = rq_data_dir(rq); + int cpu; if (!blk_fs_request(rq) || !rq->rq_disk) return; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(rq->rq_disk, rq->sector); + if (!new_io) - __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); + all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector); else { - disk_round_stats(rq->rq_disk); + disk_round_stats(cpu, rq->rq_disk); rq->rq_disk->in_flight++; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight++; } } - rcu_read_unlock(); + disk_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -997,7 +998,7 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(struct gendisk *disk) +void disk_round_stats(int cpu, struct gendisk *disk) { unsigned long now = jiffies; @@ -1005,15 +1006,15 @@ void disk_round_stats(struct gendisk *disk) return; if (disk->in_flight) { - __disk_stat_add(disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - __disk_stat_add(disk, io_ticks, (now - disk->stamp)); + disk_stat_add(cpu, disk, time_in_queue, + disk->in_flight * (now - disk->stamp)); + disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp)); } disk->stamp = now; } EXPORT_SYMBOL_GPL(disk_round_stats); -void part_round_stats(struct hd_struct *part) +void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; @@ -1021,9 +1022,9 @@ void part_round_stats(struct hd_struct *part) return; if (part->in_flight) { - __part_stat_add(part, time_in_queue, - part->in_flight * (now - part->stamp)); - __part_stat_add(part, io_ticks, (now - part->stamp)); + part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; } @@ -1563,12 +1564,13 @@ static int __end_that_request_first(struct request *req, int error, if (blk_fs_request(req) && req->rq_disk) { const int rw = rq_data_dir(req); struct hd_struct *part; + int cpu; - rcu_read_lock(); + cpu = disk_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - all_stat_add(req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); - rcu_read_unlock(); + all_stat_add(cpu, req->rq_disk, part, sectors[rw], + nr_bytes >> 9, req->sector); + disk_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1753,21 +1755,21 @@ static void end_that_request_last(struct request *req, int error) unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, req->sector); - __all_stat_inc(disk, part, ios[rw], req->sector); - __all_stat_add(disk, part, ticks[rw], duration, req->sector); - disk_round_stats(disk); + all_stat_inc(cpu, disk, part, ios[rw], req->sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector); + disk_round_stats(cpu, disk); disk->in_flight--; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight--; } - rcu_read_unlock(); + disk_stat_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index eb2a3ca5830..d926a24bf1f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -388,18 +388,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (req->rq_disk) { struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - disk_round_stats(req->rq_disk); + + disk_round_stats(cpu, req->rq_disk); req->rq_disk->in_flight--; if (part) { - part_round_stats(part); + part_round_stats(cpu, part); part->in_flight--; } - rcu_read_unlock(); + disk_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index b431d654394..430626e440f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -633,10 +633,11 @@ static ssize_t disk_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); + int cpu; - preempt_disable(); - disk_round_stats(disk); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, disk); + disk_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -749,6 +750,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) struct disk_part_iter piter; struct hd_struct *hd; char buf[BDEVNAME_SIZE]; + int cpu; /* if (&gp->dev.kobj.entry == block_class.devices.next) @@ -758,9 +760,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - preempt_disable(); - disk_round_stats(gp); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, gp); + disk_stat_unlock(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), disk_name(gp, 0, buf), @@ -777,9 +779,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) /* now show all non-0 size partitions of it */ disk_part_iter_init(&piter, gp, 0); while ((hd = disk_part_iter_next(&piter))) { - preempt_disable(); - part_round_stats(hd); - preempt_enable(); + cpu = disk_stat_lock(); + part_round_stats(cpu, hd); + disk_stat_unlock(); seq_printf(seqf, "%4d %4d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 84c03d65dcc..17eed8c025d 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -756,16 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(disk, part, ios[rw], sector); - all_stat_add(disk, part, ticks[rw], duration, sector); - all_stat_add(disk, part, sectors[rw], n_sect, sector); - all_stat_add(disk, part, io_ticks, duration, sector); - rcu_read_unlock(); + all_stat_inc(cpu, disk, part, ios[rw], sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, sector); + all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); + all_stat_add(cpu, disk, part, io_ticks, duration, sector); + + disk_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a78caad2999..653624792ea 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -377,12 +377,13 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; + int cpu; io->start_time = jiffies; - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_unlock(); dm_disk(md)->in_flight = atomic_inc_return(&md->pending); } @@ -391,15 +392,15 @@ static int end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending; + int pending, cpu; int rw = bio_data_dir(bio); - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); + disk_stat_unlock(); - disk_stat_add(dm_disk(md), ticks[rw], duration); + dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); return !pending; } @@ -885,6 +886,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + int cpu; /* * There is no use in forwarding any barrier request since we can't @@ -897,8 +899,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - disk_stat_inc(dm_disk(md), ios[rw]); - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, dm_disk(md), ios[rw]); + disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf88c20..00cbc8e4729 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -318,14 +318,17 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; sector_t block; + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c4779ccba1c..182f5a94cdc 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -147,6 +147,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -158,8 +159,10 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 18361063566..e26030fa59a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,14 +399,17 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) sector_t chunk; sector_t block, rsect; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b82030c265..babb13036f9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -779,7 +779,7 @@ static int make_request(struct request_queue *q, struct bio * bio) struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); - int do_barriers; + int cpu, do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -804,8 +804,10 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d3b9aa09628..5ec80da0a9d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -789,6 +789,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; + int cpu; int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); @@ -843,8 +844,10 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37e546528f9..5899f211515 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3387,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; const int rw = bio_data_dir(bi); - int remaining; + int cpu, remaining; if (unlikely(bio_barrier(bi))) { bio_endio(bi, -EOPNOTSUPP); @@ -3396,8 +3396,10 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); + disk_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 96c8bf41e45..c442f0aadac 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -219,10 +219,11 @@ static ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); + int cpu; - preempt_disable(); - part_round_stats(p); - preempt_enable(); + cpu = disk_stat_lock(); + part_round_stats(cpu, p); + disk_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7fbba19e076..ac8a901f200 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -209,16 +209,24 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -/* +/* * Macros to operate on percpu disk statistics: * - * The __ variants should only be called in critical sections. The full - * variants disable/enable preemption. + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. */ - #ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) #define disk_stat_read(gendiskp, field) \ ({ \ @@ -229,7 +237,8 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, res; \ }) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ int i; for_each_possible_cpu(i) @@ -237,14 +246,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { sizeof(struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) +#define part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr(part->dkstats, cpu)->field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - __part_stat_add(part, field, addnd); \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) \ @@ -264,10 +273,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } - + #else /* !CONFIG_SMP */ -#define __disk_stat_add(gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) +#define disk_stat_lock() ({ rcu_read_lock(); 0; }) +#define disk_stat_unlock() rcu_read_unlock() + +#define disk_stat_add(cpu, gendiskp, field, addnd) \ + (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) @@ -275,14 +287,14 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } -#define __part_stat_add(part, field, addnd) \ +#define part_stat_add(cpu, part, field, addnd) \ (part->dkstats.field += addnd) -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part->dkstats.field += addnd; \ - __disk_stat_add(gendiskp, field, addnd); \ +#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ +({ \ + if (part) \ + part_stat_add(cpu, part, field, addnd); \ + disk_stat_add(cpu, gendiskp, field, addnd); \ }) #define part_stat_read(part, field) (part->dkstats.field) @@ -294,63 +306,26 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #endif /* CONFIG_SMP */ -#define disk_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __disk_stat_add(gendiskp, field, addnd); \ - preempt_enable(); \ - } while (0) - -#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) -#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) - -#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) -#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) - -#define __disk_stat_sub(gendiskp, field, subnd) \ - __disk_stat_add(gendiskp, field, -subnd) -#define disk_stat_sub(gendiskp, field, subnd) \ - disk_stat_add(gendiskp, field, -subnd) - -#define part_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __part_stat_add(gendiskp, field, addnd);\ - preempt_enable(); \ - } while (0) - -#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) -#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) - -#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) -#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) - -#define __part_stat_sub(gendiskp, field, subnd) \ - __part_stat_add(gendiskp, field, -subnd) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define all_stat_add(gendiskp, part, field, addnd, sector) \ - do { \ - preempt_disable(); \ - __all_stat_add(gendiskp, part, field, addnd, sector); \ - preempt_enable(); \ - } while (0) - -#define __all_stat_dec(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, -1, sector) -#define all_stat_dec(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, -1, sector) - -#define __all_stat_inc(gendiskp, part, field, sector) \ - __all_stat_add(gendiskp, part, field, 1, sector) -#define all_stat_inc(gendiskp, part, field, sector) \ - all_stat_add(gendiskp, part, field, 1, sector) - -#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ - __all_stat_add(gendiskp, part, field, -subnd, sector) -#define all_stat_sub(gendiskp, part, field, subnd, sector) \ - all_stat_add(gendiskp, part, field, -subnd, sector) +#define disk_stat_dec(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, -1) +#define disk_stat_inc(cpu, gendiskp, field) \ + disk_stat_add(cpu, gendiskp, field, 1) +#define disk_stat_sub(cpu, gendiskp, field, subnd) \ + disk_stat_add(cpu, gendiskp, field, -subnd) + +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +#define all_stat_dec(cpu, gendiskp, field, sector) \ + all_stat_add(cpu, gendiskp, field, -1, sector) +#define all_stat_inc(cpu, gendiskp, part, field, sector) \ + all_stat_add(cpu, gendiskp, part, field, 1, sector) +#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ + all_stat_add(cpu, gendiskp, part, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -401,8 +376,8 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(struct gendisk *disk); -extern void part_round_stats(struct hd_struct *part); +extern void disk_round_stats(int cpu, struct gendisk *disk); +extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); -- cgit v1.2.3-70-g09d2 From f615b48cc7df7cac3865ec76ac1a5bb04d3e07f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:24 +0900 Subject: sd/ide-disk: apply extended minors to sd and ide Update sd and ide-disk such that they can take advantage of extended minors. ide-disk already has 64 minors per device and currently doesn't use extended minors although after this patch it can be turned on by simply tweaking constants. sd only had 16 minors per device causing problems on certain peculiar configurations. This patch lifts the restriction and enables it to use upto 64 minors. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 11 ++++++++--- drivers/scsi/sd.c | 9 +++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 07ef88bd109..7a88de9ada2 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -41,6 +41,10 @@ #include #include +#define IDE_DISK_PARTS (1 << PARTN_BITS) +#define IDE_DISK_MINORS IDE_DISK_PARTS +#define IDE_DISK_EXT_MINORS (IDE_DISK_PARTS - IDE_DISK_MINORS) + struct ide_disk_obj { ide_drive_t *drive; ide_driver_t *driver; @@ -1151,8 +1155,8 @@ static int ide_disk_probe(ide_drive_t *drive) if (!idkp) goto failed; - g = alloc_disk_node(1 << PARTN_BITS, - hwif_to_node(drive->hwif)); + g = alloc_disk_ext_node(IDE_DISK_MINORS, IDE_DISK_EXT_MINORS, + hwif_to_node(drive->hwif)); if (!g) goto out_free_idkp; @@ -1178,7 +1182,8 @@ static int ide_disk_probe(ide_drive_t *drive) } else drive->attach = 1; - g->minors = 1 << PARTN_BITS; + g->minors = IDE_DISK_MINORS; + g->ext_minors = IDE_DISK_EXT_MINORS; g->driverfs_dev = &drive->gendev; g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; set_capacity(g, idedisk_capacity(drive)); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e5e7d785645..d1bb0e1d2d2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -86,6 +86,10 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); +#define SD_PARTS 64 +#define SD_MINORS 16 +#define SD_EXT_MINORS (SD_PARTS - SD_MINORS) + static int sd_revalidate_disk(struct gendisk *); static int sd_probe(struct device *); static int sd_remove(struct device *); @@ -1801,7 +1805,7 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - gd = alloc_disk(16); + gd = alloc_disk_ext(SD_MINORS, SD_EXT_MINORS); if (!gd) goto out_free; @@ -1845,7 +1849,8 @@ static int sd_probe(struct device *dev) gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = 16; + gd->minors = SD_MINORS; + gd->ext_minors = SD_EXT_MINORS; gd->fops = &sd_fops; if (index < 26) { -- cgit v1.2.3-70-g09d2 From 870d6656126add8e383645732b03df2b7ccd4f94 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:25 +0900 Subject: block: implement CONFIG_DEBUG_BLOCK_EXT_DEVT Extended devt introduces non-contiguos device numbers. This patch implements a debug option which forces most devt allocations to be from the extended area and spreads them out. This is enabled by default if DEBUG_KERNEL is set and achieves... 1. Detects code paths in kernel or userland which expect predetermined consecutive device numbers. 2. When something goes wrong, avoid corruption as adding to the minor of earlier partition won't lead to the wrong but valid device. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 38 +++++++++++++++++++++++++++++++++++--- drivers/ide/ide-disk.c | 6 ++++++ drivers/scsi/sd.c | 6 ++++++ lib/Kconfig.debug | 16 ++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index ee4b13520e5..67e5a59ced2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -298,6 +298,38 @@ EXPORT_SYMBOL(unregister_blkdev); static struct kobj_map *bdev_map; +/** + * blk_mangle_minor - scatter minor numbers apart + * @minor: minor number to mangle + * + * Scatter consecutively allocated @minor number apart if MANGLE_DEVT + * is enabled. Mangling twice gives the original value. + * + * RETURNS: + * Mangled value. + * + * CONTEXT: + * Don't care. + */ +static int blk_mangle_minor(int minor) +{ +#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT + int i; + + for (i = 0; i < MINORBITS / 2; i++) { + int low = minor & (1 << i); + int high = minor & (1 << (MINORBITS - 1 - i)); + int distance = MINORBITS - 1 - 2 * i; + + minor ^= low | high; /* clear both bits */ + low <<= distance; /* swap the positions */ + high >>= distance; + minor |= low | high; /* and set */ + } +#endif + return minor; +} + /** * blk_alloc_devt - allocate a dev_t for a partition * @part: partition to allocate dev_t for @@ -339,7 +371,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) return -EBUSY; } - *devt = MKDEV(BLOCK_EXT_MAJOR, idx); + *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); return 0; } @@ -361,7 +393,7 @@ void blk_free_devt(dev_t devt) if (MAJOR(devt) == BLOCK_EXT_MAJOR) { mutex_lock(&ext_devt_mutex); - idr_remove(&ext_devt_idr, MINOR(devt)); + idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); mutex_unlock(&ext_devt_mutex); } } @@ -473,7 +505,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) struct hd_struct *part; mutex_lock(&ext_devt_mutex); - part = idr_find(&ext_devt_idr, MINOR(devt)); + part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); if (part && get_disk(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7a88de9ada2..a072df5053a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -42,7 +42,13 @@ #include #define IDE_DISK_PARTS (1 << PARTN_BITS) + +#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define IDE_DISK_MINORS IDE_DISK_PARTS +#else +#define IDE_DISK_MINORS 1 +#endif + #define IDE_DISK_EXT_MINORS (IDE_DISK_PARTS - IDE_DISK_MINORS) struct ide_disk_obj { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d1bb0e1d2d2..280d231a86e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -87,7 +87,13 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); #define SD_PARTS 64 + +#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 +#else +#define SD_MINORS 1 +#endif + #define SD_EXT_MINORS (SD_PARTS - SD_MINORS) static int sd_revalidate_disk(struct gendisk *); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0b504814e37..5a536f703a8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -624,6 +624,22 @@ config BACKTRACE_SELF_TEST Say N if you are unsure. +config DEBUG_BLOCK_EXT_DEVT + bool "Force extended block device numbers and spread them" + depends on DEBUG_KERNEL + depends on BLOCK + default y + help + Conventionally, block device numbers are allocated from + predetermined contiguous area. However, extended block area + may introduce non-contiguous block device numbers. This + option forces most block device numbers to be allocated from + the extended space and spreads them to discover kernel or + userland code paths which assume predetermined contiguous + device number allocation. + + Say N if you are unsure. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_KERNEL -- cgit v1.2.3-70-g09d2 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-integrity.c | 5 +-- block/blk-sysfs.c | 4 +-- block/genhd.c | 27 ++++++++-------- drivers/block/aoe/aoeblk.c | 4 +-- drivers/block/nbd.c | 4 +-- drivers/ide/ide-probe.c | 2 +- drivers/md/dm.c | 4 +-- drivers/md/md.c | 10 +++--- fs/block_dev.c | 4 +-- fs/partitions/check.c | 79 ++++++++++++++++++++++++---------------------- include/linux/genhd.h | 20 ++++++------ 11 files changed, 86 insertions(+), 77 deletions(-) (limited to 'drivers') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index d87606eaca1..69023da6315 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) return -1; if (kobject_init_and_add(&bi->kobj, &integrity_ktype, - &disk->dev.kobj, "%s", "integrity")) { + &disk_to_dev(disk)->kobj, + "%s", "integrity")) { kmem_cache_free(integrity_cachep, bi); return -1; } @@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk) kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_del(&bi->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); kmem_cache_free(integrity_cachep, bi); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 304ec73ab82..b9a6ed16664 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -310,7 +310,7 @@ int blk_register_queue(struct gendisk *disk) if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), + ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), "%s", "queue"); if (ret < 0) return ret; @@ -339,6 +339,6 @@ void blk_unregister_queue(struct gendisk *disk) kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } } diff --git a/block/genhd.c b/block/genhd.c index 67e5a59ced2..0a2f16bd54b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -59,7 +59,7 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno) rcu_read_lock(); part = rcu_dereference(disk->__part[partno - 1]); if (part) - get_device(&part->dev); + get_device(part_to_dev(part)); rcu_read_unlock(); return part; @@ -130,7 +130,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) continue; - get_device(&part->dev); + get_device(part_to_dev(part)); piter->part = part; piter->idx += inc; break; @@ -435,7 +435,7 @@ static struct kobject *exact_match(dev_t devt, int *partno, void *data) { struct gendisk *p = data; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t devt, void *data) @@ -460,7 +460,7 @@ void add_disk(struct gendisk *disk) int retval; disk->flags |= GENHD_FL_UP; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); + disk_to_dev(disk)->devt = MKDEV(disk->major, disk->first_minor); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); @@ -468,7 +468,8 @@ void add_disk(struct gendisk *disk) bdi = &disk->queue->backing_dev_info; bdi_register_dev(bdi, disk_devt(disk)); - retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); + retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, + "bdi"); WARN_ON(retval); } @@ -477,7 +478,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ void unlink_gendisk(struct gendisk *disk) { - sysfs_remove_link(&disk->dev.kobj, "bdi"); + sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); blk_unregister_region(disk_devt(disk), disk->minors); @@ -903,7 +904,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) int cpu; /* - if (&gp->dev.kobj.entry == block_class.devices.next) + if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) seq_puts(seqf, "major minor name" " rio rmerge rsect ruse wio wmerge " "wsect wuse running use aveq" @@ -972,7 +973,7 @@ static void media_change_notify_thread(struct work_struct *work) * set enviroment vars to indicate which event this is for * so that user space will know to go check the media status. */ - kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); + kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); put_device(gd->driverfs_dev); } @@ -1062,9 +1063,9 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->minors = minors; disk->ext_minors = ext_minors; rand_initialize_disk(disk); - disk->dev.class = &block_class; - disk->dev.type = &disk_type; - device_initialize(&disk->dev); + disk_to_dev(disk)->class = &block_class; + disk_to_dev(disk)->type = &disk_type; + device_initialize(disk_to_dev(disk)); INIT_WORK(&disk->async_notify, media_change_notify_thread); } @@ -1086,7 +1087,7 @@ struct kobject *get_disk(struct gendisk *disk) owner = disk->fops->owner; if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&disk->dev.kobj); + kobj = kobject_get(&disk_to_dev(disk)->kobj); if (kobj == NULL) { module_put(owner); return NULL; @@ -1100,7 +1101,7 @@ EXPORT_SYMBOL(get_disk); void put_disk(struct gendisk *disk) { if (disk) - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } EXPORT_SYMBOL(put_disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0c39782b266..3edb6cb7d68 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -109,12 +109,12 @@ static const struct attribute_group attr_group = { static int aoedisk_add_sysfs(struct aoedev *d) { - return sysfs_create_group(&d->gd->dev.kobj, &attr_group); + return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); } void aoedisk_rm_sysfs(struct aoedev *d) { - sysfs_remove_group(&d->gd->dev.kobj, &attr_group); + sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); } static int diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1778e4a2c67..7b3351260d5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); lo->pid = current->pid; - ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); + ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); if (ret) { printk(KERN_ERR "nbd: sysfs_create_file failed!"); return ret; @@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo) while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); - sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); + sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); return 0; } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index a51a30e9eab..70aa86c8807 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1188,7 +1188,7 @@ static struct kobject *exact_match(dev_t dev, int *part, void *data) { struct gendisk *p = data; *part &= (1 << PARTN_BITS) - 1; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t dev, void *data) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 653624792ea..637806695bb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1186,7 +1186,7 @@ static void event_callback(void *context) list_splice_init(&md->uevent_list, &uevents); spin_unlock_irqrestore(&md->uevent_lock, flags); - dm_send_uevents(&uevents, &md->disk->dev.kobj); + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); wake_up(&md->eventq); @@ -1643,7 +1643,7 @@ out: *---------------------------------------------------------------*/ void dm_kobject_uevent(struct mapped_device *md) { - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); } uint32_t dm_next_uevent_seq(struct mapped_device *md) diff --git a/drivers/md/md.c b/drivers/md/md.c index deeac4b4417..96e9fccd2ea 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1465,9 +1465,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) goto fail; if (rdev->bdev->bd_part) - ko = &rdev->bdev->bd_part->dev.kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; else - ko = &rdev->bdev->bd_disk->dev.kobj; + ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; @@ -3470,8 +3470,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, - "%s", "md"); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, + &disk_to_dev(disk)->kobj, "%s", "md"); mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", @@ -3761,7 +3761,7 @@ static int do_md_run(mddev_t * mddev) sysfs_notify(&mddev->kobj, NULL, "array_state"); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 2f2873b9a04..a02df22f37c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -543,9 +543,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->dev.kobj); + return kobject_get(&part_to_dev(bdev->bd_part)->kobj); else - return kobject_get(&bdev->bd_disk->dev.kobj); + return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0d4b7f28f13..ac0df3acdcd 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -309,7 +309,7 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) { struct kobject *k; - k = kobject_get(&disk->dev.kobj); + k = kobject_get(&disk_to_dev(disk)->kobj); disk->holder_dir = kobject_create_and_add("holders", k); disk->slave_dir = kobject_create_and_add("slaves", k); kobject_put(k); @@ -322,7 +322,7 @@ static void delete_partition_rcu_cb(struct rcu_head *head) part->start_sect = 0; part->nr_sects = 0; part_stat_set_all(part, 0); - put_device(&part->dev); + put_device(part_to_dev(part)); } void delete_partition(struct gendisk *disk, int partno) @@ -336,7 +336,7 @@ void delete_partition(struct gendisk *disk, int partno) blk_free_devt(part_devt(part)); rcu_assign_pointer(disk->__part[partno-1], NULL); kobject_put(part->holder_dir); - device_del(&part->dev); + device_del(part_to_dev(part)); call_rcu(&part->rcu_head, delete_partition_rcu_cb); } @@ -354,6 +354,9 @@ int add_partition(struct gendisk *disk, int partno, { struct hd_struct *p; dev_t devt = MKDEV(0, 0); + struct device *ddev = disk_to_dev(disk); + struct device *pdev; + const char *dname; int err; if (disk->__part[partno - 1]) @@ -367,42 +370,43 @@ int add_partition(struct gendisk *disk, int partno, err = -ENOMEM; goto out_free; } + pdev = part_to_dev(p); + p->start_sect = start; p->nr_sects = len; p->partno = partno; p->policy = disk->policy; - if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%sp%d", disk->dev.bus_id, partno); + dname = dev_name(ddev); + if (isdigit(dname[strlen(dname) - 1])) + snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); else - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%s%d", disk->dev.bus_id, partno); + snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); - device_initialize(&p->dev); - p->dev.class = &block_class; - p->dev.type = &part_type; - p->dev.parent = &disk->dev; + device_initialize(pdev); + pdev->class = &block_class; + pdev->type = &part_type; + pdev->parent = ddev; err = blk_alloc_devt(p, &devt); if (err) - goto out_put; - p->dev.devt = devt; + goto out_free; + pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - p->dev.uevent_suppress = 1; - err = device_add(&p->dev); + pdev->uevent_suppress = 1; + err = device_add(pdev); if (err) goto out_put; err = -ENOMEM; - p->holder_dir = kobject_create_and_add("holders", &p->dev.kobj); + p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); if (!p->holder_dir) goto out_del; - p->dev.uevent_suppress = 0; + pdev->uevent_suppress = 0; if (flags & ADDPART_FLAG_WHOLEDISK) { - err = device_create_file(&p->dev, &dev_attr_whole_disk); + err = device_create_file(pdev, &dev_attr_whole_disk); if (err) goto out_del; } @@ -412,8 +416,8 @@ int add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(disk->__part[partno - 1], p); /* suppress uevent if the disk supresses it */ - if (!disk->dev.uevent_suppress) - kobject_uevent(&p->dev.kobj, KOBJ_ADD); + if (!ddev->uevent_suppress) + kobject_uevent(&pdev->kobj, KOBJ_ADD); return 0; @@ -422,9 +426,9 @@ out_free: return err; out_del: kobject_put(p->holder_dir); - device_del(&p->dev); + device_del(pdev); out_put: - put_device(&p->dev); + put_device(pdev); blk_free_devt(devt); return err; } @@ -432,30 +436,31 @@ out_put: /* Not exported, helper to add_disk(). */ void register_disk(struct gendisk *disk) { + struct device *ddev = disk_to_dev(disk); struct block_device *bdev; struct disk_part_iter piter; struct hd_struct *part; char *s; int err; - disk->dev.parent = disk->driverfs_dev; + ddev->parent = disk->driverfs_dev; - strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); + strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ - s = strchr(disk->dev.bus_id, '/'); + s = strchr(ddev->bus_id, '/'); if (s) *s = '!'; /* delay uevents, until we scanned partition table */ - disk->dev.uevent_suppress = 1; + ddev->uevent_suppress = 1; - if (device_add(&disk->dev)) + if (device_add(ddev)) return; #ifndef CONFIG_SYSFS_DEPRECATED - err = sysfs_create_link(block_depr, &disk->dev.kobj, - kobject_name(&disk->dev.kobj)); + err = sysfs_create_link(block_depr, &ddev->kobj, + kobject_name(&ddev->kobj)); if (err) { - device_del(&disk->dev); + device_del(ddev); return; } #endif @@ -481,13 +486,13 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - disk->dev.uevent_suppress = 0; - kobject_uevent(&disk->dev.kobj, KOBJ_ADD); + ddev->uevent_suppress = 0; + kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part->dev.kobj, KOBJ_ADD); + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); disk_part_iter_exit(&piter); } @@ -518,7 +523,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) return -EIO; /* tell userspace that the media / partition table may have changed */ - kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; @@ -591,7 +596,7 @@ void del_gendisk(struct gendisk *disk) kobject_put(disk->slave_dir); disk->driverfs_dev = NULL; #ifndef CONFIG_SYSFS_DEPRECATED - sysfs_remove_link(block_depr, disk->dev.bus_id); + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); #endif - device_del(&disk->dev); + device_del(disk_to_dev(disk)); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fc53242406..e4e18c509ac 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -15,9 +15,11 @@ #ifdef CONFIG_BLOCK -#define kobj_to_dev(k) container_of(k, struct device, kobj) -#define dev_to_disk(device) container_of(device, struct gendisk, dev) -#define dev_to_part(device) container_of(device, struct hd_struct, dev) +#define kobj_to_dev(k) container_of((k), struct device, kobj) +#define dev_to_disk(device) container_of((device), struct gendisk, __dev) +#define dev_to_part(device) container_of((device), struct hd_struct, __dev) +#define disk_to_dev(disk) (&((disk)->__dev)) +#define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; extern struct kobject *block_depr; @@ -88,7 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; - struct device dev; + struct device __dev; struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -139,7 +141,7 @@ struct gendisk { int flags; struct device *driverfs_dev; // FIXME: remove - struct device dev; + struct device __dev; struct kobject *holder_dir; struct kobject *slave_dir; @@ -163,7 +165,7 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) - return dev_to_disk((part)->dev.parent); + return dev_to_disk(part_to_dev(part)->parent); return NULL; } @@ -174,12 +176,12 @@ static inline int disk_max_parts(struct gendisk *disk) static inline dev_t disk_devt(struct gendisk *disk) { - return disk->dev.devt; + return disk_to_dev(disk)->devt; } static inline dev_t part_devt(struct hd_struct *part) { - return part->dev.devt; + return part_to_dev(part)->devt; } extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); @@ -187,7 +189,7 @@ extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) { if (likely(part)) - put_device(&part->dev); + put_device(part_to_dev(part)); } /* -- cgit v1.2.3-70-g09d2 From 80795aefb76d10c5d698e60c7e7750b5330787da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:07 +0900 Subject: block: move capacity from disk to part0 Move disk->capacity to part0->nr_sects and convert all users who directly accessed the field to use {get|set}_capacity(). This is done early to allow the __dev field to be moved. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/aoe/aoecmd.c | 4 ++-- drivers/block/aoe/aoedev.c | 2 +- fs/partitions/check.c | 2 +- include/linux/genhd.h | 5 ++--- 5 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 3edb6cb7d68..aa69556c348 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp) gd->first_minor = d->sysminor * AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - gd->capacity = d->ssize; + set_capacity(gd, d->ssize); snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 17eed8c025d..934800f979c 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work) unsigned long flags; u64 ssize; - ssize = d->gd->capacity; + ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { @@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; if (d->gd != NULL) { - d->gd->capacity = ssize; + set_capacity(d->gd, ssize); d->flags |= DEVFL_NEWSIZE; } else d->flags |= DEVFL_GDALLOC; diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index a1d813ab0d6..6a8038d115b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -91,7 +91,7 @@ aoedev_downdev(struct aoedev *d) } if (d->gd) - d->gd->capacity = 0; + set_capacity(d->gd, 0); d->flags &= ~DEVFL_UP; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b60699c271a..902b95f1f9d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -586,7 +586,7 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_exit(&piter); invalidate_partition(disk, 0); - disk->capacity = 0; + set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); disk_stat_set_all(disk, 0); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9e866a2aee5..1cf828148ec 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -138,7 +138,6 @@ struct gendisk { struct block_device_operations *fops; struct request_queue *queue; void *private_data; - sector_t capacity; int flags; struct device *driverfs_dev; // FIXME: remove @@ -411,11 +410,11 @@ static inline sector_t get_start_sect(struct block_device *bdev) } static inline sector_t get_capacity(struct gendisk *disk) { - return disk->capacity; + return disk->part0.nr_sects; } static inline void set_capacity(struct gendisk *disk, sector_t size) { - disk->capacity = size; + disk->part0.nr_sects = size; } #ifdef CONFIG_SOLARIS_X86_PARTITION -- cgit v1.2.3-70-g09d2 From b7db9956e57c8151b930d5e5fe5c766e6aad3ff7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:10 +0900 Subject: block: move policy from disk to part0 Move disk->policy to part0->policy. Implement and use get_disk_ro(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 16 +++++----------- drivers/ide/ide-cd.c | 2 +- drivers/md/dm-ioctl.c | 2 +- fs/partitions/check.c | 2 +- include/linux/genhd.h | 6 +++++- 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index c70db35076a..70358f3c742 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -757,7 +757,7 @@ static ssize_t disk_ro_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", disk->policy ? 1 : 0); + return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); } static ssize_t disk_capability_show(struct device *dev, @@ -1090,10 +1090,7 @@ EXPORT_SYMBOL(put_disk); void set_device_ro(struct block_device *bdev, int flag) { - if (bdev->bd_contains != bdev) - bdev->bd_part->policy = flag; - else - bdev->bd_disk->policy = flag; + bdev->bd_part->policy = flag; } EXPORT_SYMBOL(set_device_ro); @@ -1103,8 +1100,8 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part; - disk->policy = flag; - disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) part->policy = flag; disk_part_iter_exit(&piter); @@ -1116,10 +1113,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - else if (bdev->bd_contains != bdev) - return bdev->bd_part->policy; - else - return bdev->bd_disk->policy; + return bdev->bd_part->policy; } EXPORT_SYMBOL(bdev_read_only); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f16bb466723..03c2cb6a58b 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1113,7 +1113,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) if (write) { /* disk has become write protected */ - if (cd->disk->policy) { + if (get_disk_ro(cd->disk)) { cdrom_end_request(drive, 0); return ide_stopped; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c3de311117a..5b919159f08 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -548,7 +548,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) */ param->open_count = dm_open_count(md); - if (disk->policy) + if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; param->event_nr = dm_get_event_nr(md); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 24d2c56d7d2..ace6d03602c 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -375,7 +375,7 @@ int add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->nr_sects = len; p->partno = partno; - p->policy = disk->policy; + p->policy = get_disk_ro(disk); dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9cb8380cf0e..4411bdd671d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,7 +145,6 @@ struct gendisk { struct kobject *slave_dir; struct timer_rand_state *random; - int policy; atomic_t sync_io; /* RAID */ unsigned long stamp; @@ -403,6 +402,11 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0.policy; +} + /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk); extern void rand_initialize_disk(struct gendisk *disk); -- cgit v1.2.3-70-g09d2 From 0762b8bde9729f10f8e6249809660ff2ec3ad735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:12 +0900 Subject: block: always set bdev->bd_part Till now, bdev->bd_part is set only if the bdev was for parts other than part0. This patch makes bdev->bd_part always set so that code paths don't have to differenciate common handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- drivers/md/md.c | 5 +--- fs/block_dev.c | 67 ++++++++++++++++++++++++--------------------------- fs/partitions/check.c | 7 +----- include/linux/genhd.h | 2 +- 5 files changed, 35 insertions(+), 48 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index e0a5ee36849..a4a7c08d2f2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1274,7 +1274,7 @@ __setup("fail_make_request=", setup_fail_make_request); static int should_fail_request(struct bio *bio) { if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || - (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) + bio->bi_bdev->bd_part->make_it_fail) return should_fail(&fail_make_request, bio->bi_size); return 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index 96e9fccd2ea..2bd9cf41612 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1464,10 +1464,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - if (rdev->bdev->bd_part) - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; - else - ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; diff --git a/fs/block_dev.c b/fs/block_dev.c index 57d57264285..c3fa19bd64d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -540,14 +540,6 @@ EXPORT_SYMBOL(bd_release); * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 */ -static struct kobject *bdev_get_kobj(struct block_device *bdev) -{ - if (bdev->bd_contains != bdev) - return kobject_get(&part_to_dev(bdev->bd_part)->kobj); - else - return kobject_get(&disk_to_dev(bdev->bd_disk)->kobj); -} - static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) @@ -596,7 +588,7 @@ static int bd_holder_grab_dirs(struct block_device *bdev, if (!bo->hdev) goto fail_put_sdir; - bo->sdev = bdev_get_kobj(bdev); + bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); if (!bo->sdev) goto fail_put_hdev; @@ -919,7 +911,6 @@ static int __blkdev_put(struct block_device *bdev, int for_part); static int do_open(struct block_device *bdev, struct file *file, int for_part) { - struct module *owner = NULL; struct gendisk *disk; struct hd_struct *part = NULL; int ret; @@ -941,25 +932,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; + lock_kernel(); + disk = get_gendisk(bdev->bd_dev, &partno); - if (!disk) { - unlock_kernel(); - bdput(bdev); - return ret; - } - owner = disk->fops->owner; + if (!disk) + goto out_unlock_kernel; + part = disk_get_part(disk, partno); + if (!part) + goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; + bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { ret = disk->fops->open(bdev->bd_inode, file); if (ret) - goto out_first; + goto out_clear; } if (!bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); @@ -975,31 +968,32 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) - goto out_first; + goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); if (ret) - goto out_first; + goto out_clear; bdev->bd_contains = whole; - part = disk_get_part(disk, partno); bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !part || !part->nr_sects) { ret = -ENXIO; - goto out_first; + goto out_clear; } - bdev->bd_part = part; bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { + disk_put_part(part); put_disk(disk); - module_put(owner); + module_put(disk->fops->owner); + part = NULL; + disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); if (ret) - goto out; + goto out_unlock_bdev; } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); @@ -1012,20 +1006,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) unlock_kernel(); return 0; -out_first: + out_clear: bdev->bd_disk = NULL; + bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; - put_disk(disk); - disk_put_part(part); - module_put(owner); -out: + out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); + out_unlock_kernel: unlock_kernel(); - if (ret) - bdput(bdev); + + disk_put_part(part); + if (disk) + module_put(disk->fops->owner); + put_disk(disk); + bdput(bdev); + return ret; } @@ -1110,11 +1108,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) put_disk(disk); module_put(owner); - - if (bdev->bd_contains != bdev) { - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; - } + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f0f604950ff..87298c0fc8c 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -134,12 +134,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf) const char *bdevname(struct block_device *bdev, char *buf) { - int partno = 0; - - if (bdev->bd_part) - partno = bdev->bd_part->partno; - - return disk_name(bdev->bd_disk, partno, buf); + return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); } EXPORT_SYMBOL(bdevname); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2c0e1b597ab..45a3682b5d8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -412,7 +412,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; + return bdev->bd_part->start_sect; } static inline sector_t get_capacity(struct gendisk *disk) { -- cgit v1.2.3-70-g09d2 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 84 ++++++++++-------------- block/blk-merge.c | 12 ++-- block/genhd.c | 97 +++++++-------------------- drivers/block/aoe/aoecmd.c | 12 ++-- drivers/md/dm.c | 27 ++++---- drivers/md/linear.c | 9 +-- drivers/md/md.c | 4 +- drivers/md/multipath.c | 9 +-- drivers/md/raid0.c | 9 +-- drivers/md/raid1.c | 9 +-- drivers/md/raid10.c | 9 +-- drivers/md/raid5.c | 9 +-- fs/partitions/check.c | 12 ++-- include/linux/genhd.h | 159 +++++++++++++-------------------------------- 14 files changed, 165 insertions(+), 296 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index 505ec61067d..98138f00252 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -61,21 +61,17 @@ static void drive_stat_acct(struct request *rq, int new_io) if (!blk_fs_request(rq) || !rq->rq_disk) return; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(rq->rq_disk, rq->sector); if (!new_io) - all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector); + part_stat_inc(cpu, part, merges[rw]); else { - disk_round_stats(cpu, rq->rq_disk); - rq->rq_disk->in_flight++; - if (part) { - part_round_stats(cpu, part); - part->in_flight++; - } + part_round_stats(cpu, part); + part_inc_in_flight(part); } - disk_stat_unlock(); + part_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -983,8 +979,22 @@ static inline void add_request(struct request_queue *q, struct request *req) __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); } -/* - * disk_round_stats() - Round off the performance stats on a struct +static void part_round_stats_single(int cpu, struct hd_struct *part, + unsigned long now) +{ + if (now == part->stamp) + return; + + if (part->in_flight) { + __part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); + } + part->stamp = now; +} + +/** + * part_round_stats() - Round off the performance stats on a struct * disk_stats. * * The average IO queue length and utilisation statistics are maintained @@ -998,36 +1008,15 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(int cpu, struct gendisk *disk) -{ - unsigned long now = jiffies; - - if (now == disk->stamp) - return; - - if (disk->in_flight) { - disk_stat_add(cpu, disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp)); - } - disk->stamp = now; -} -EXPORT_SYMBOL_GPL(disk_round_stats); - void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; - if (now == part->stamp) - return; - - if (part->in_flight) { - part_stat_add(cpu, part, time_in_queue, - part->in_flight * (now - part->stamp)); - part_stat_add(cpu, part, io_ticks, (now - part->stamp)); - } - part->stamp = now; + if (part->partno) + part_round_stats_single(cpu, &part_to_disk(part)->part0, now); + part_round_stats_single(cpu, part, now); } +EXPORT_SYMBOL_GPL(part_round_stats); /* * queue lock must be held @@ -1567,11 +1556,10 @@ static int __end_that_request_first(struct request *req, int error, struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - all_stat_add(cpu, req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); - disk_stat_unlock(); + part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); + part_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1758,19 +1746,15 @@ static void end_that_request_last(struct request *req, int error) struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, req->sector); - all_stat_inc(cpu, disk, part, ios[rw], req->sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector); - disk_round_stats(cpu, disk); - disk->in_flight--; - if (part) { - part_round_stats(cpu, part); - part->in_flight--; - } + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); - disk_stat_unlock(); + part_stat_unlock(); } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index d926a24bf1f..c77196d5589 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -390,17 +390,13 @@ static int attempt_merge(struct request_queue *q, struct request *req, struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(req->rq_disk, req->sector); - disk_round_stats(cpu, req->rq_disk); - req->rq_disk->in_flight--; - if (part) { - part_round_stats(cpu, part); - part->in_flight--; - } + part_round_stats(cpu, part); + part_dec_in_flight(part); - disk_stat_unlock(); + part_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index 06a252f2b96..e1cb96fb883 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); * while preemption is disabled. * * RETURNS: - * Found partition on success, NULL if there's no matching partition. + * Found partition on success, part0 is returned if no partition matches */ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { @@ -189,7 +189,7 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) sector < part->start_sect + part->nr_sects) return part; } - return NULL; + return &disk->part0; } EXPORT_SYMBOL_GPL(disk_map_sector_rcu); @@ -580,24 +580,24 @@ void __init printk_all_partitions(void) * numbers in hex - the same format as the root= * option takes. */ - printk("%s %10llu %s", - bdevt_str(disk_devt(disk), devt_buf), - (unsigned long long)get_capacity(disk) >> 1, - disk_name(disk, 0, name_buf)); - if (disk->driverfs_dev != NULL && - disk->driverfs_dev->driver != NULL) - printk(" driver: %s\n", - disk->driverfs_dev->driver->name); - else - printk(" (driver?)\n"); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); + while ((part = disk_part_iter_next(&piter))) { + bool is_part0 = part == &disk->part0; - /* now show the partitions */ - disk_part_iter_init(&piter, disk, 0); - while ((part = disk_part_iter_next(&piter))) - printk(" %s %10llu %s\n", + printk("%s%s %10llu %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), (unsigned long long)part->nr_sects >> 1, disk_name(disk, part->partno, name_buf)); + if (is_part0) { + if (disk->driverfs_dev != NULL && + disk->driverfs_dev->driver != NULL) + printk(" driver: %s\n", + disk->driverfs_dev->driver->name); + else + printk(" (driver?)\n"); + } else + printk("\n"); + } disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); @@ -674,12 +674,7 @@ static int show_partition(struct seq_file *seqf, void *v) return 0; /* show the full disk and all non-0 size partitions of it */ - seq_printf(seqf, "%4d %7d %10llu %s\n", - MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), - (unsigned long long)get_capacity(sgp) >> 1, - disk_name(sgp, 0, buf)); - - disk_part_iter_init(&piter, sgp, 0); + disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), @@ -768,40 +763,13 @@ static ssize_t disk_capability_show(struct device *dev, return sprintf(buf, "%x\n", disk->flags); } -static ssize_t disk_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - int cpu; - - cpu = disk_stat_lock(); - disk_round_stats(cpu, disk); - disk_stat_unlock(); - return sprintf(buf, - "%8lu %8lu %8llu %8u " - "%8lu %8lu %8llu %8u " - "%8u %8u %8u" - "\n", - disk_stat_read(disk, ios[READ]), - disk_stat_read(disk, merges[READ]), - (unsigned long long)disk_stat_read(disk, sectors[READ]), - jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), - disk_stat_read(disk, ios[WRITE]), - disk_stat_read(disk, merges[WRITE]), - (unsigned long long)disk_stat_read(disk, sectors[WRITE]), - jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), - disk->in_flight, - jiffies_to_msecs(disk_stat_read(disk, io_ticks)), - jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); -} - static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); -static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); +static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -836,7 +804,7 @@ static void disk_release(struct device *dev) kfree(disk->random); kfree(disk->__part); - free_disk_stats(disk); + free_part_stats(&disk->part0); kfree(disk); } struct class block_class = { @@ -873,28 +841,11 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - cpu = disk_stat_lock(); - disk_round_stats(cpu, gp); - disk_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), - disk_name(gp, 0, buf), - disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), - (unsigned long long)disk_stat_read(gp, sectors[0]), - jiffies_to_msecs(disk_stat_read(gp, ticks[0])), - disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), - (unsigned long long)disk_stat_read(gp, sectors[1]), - jiffies_to_msecs(disk_stat_read(gp, ticks[1])), - gp->in_flight, - jiffies_to_msecs(disk_stat_read(gp, io_ticks)), - jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); - - /* now show all non-0 size partitions of it */ - disk_part_iter_init(&piter, gp, 0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); while ((hd = disk_part_iter_next(&piter))) { - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part_round_stats(cpu, hd); - disk_stat_unlock(); + part_stat_unlock(); seq_printf(seqf, "%4d %7d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), @@ -1000,7 +951,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) int tot_minors = minors + ext_minors; int size = tot_minors * sizeof(struct hd_struct *); - if (!init_disk_stats(disk)) { + if (!init_part_stats(&disk->part0)) { kfree(disk); return NULL; } @@ -1008,7 +959,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, node_id); if (!disk->__part) { - free_disk_stats(disk); + free_part_stats(&disk->part0); kfree(disk); return NULL; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 934800f979c..961d29a53ca 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -758,15 +758,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(cpu, disk, part, ios[rw], sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, sector); - all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); - all_stat_add(cpu, disk, part, io_ticks, duration, sector); + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, sectors[rw], n_sect); + part_stat_add(cpu, part, io_ticks, duration); - disk_stat_unlock(); + part_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 637806695bb..327de03a5bd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -381,10 +381,10 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_unlock(); - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_unlock(); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static int end_io_acct(struct dm_io *io) @@ -395,12 +395,13 @@ static int end_io_acct(struct dm_io *io) int pending, cpu; int rw = bio_data_dir(bio); - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); + part_stat_unlock(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); return !pending; } @@ -899,10 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, dm_disk(md), ios[rw]); - disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 00cbc8e4729..c80ea90593d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -325,10 +325,11 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/md.c b/drivers/md/md.c index 2bd9cf41612..0a3a4bdcd4a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5546,8 +5546,8 @@ static int is_mddev_idle(mddev_t *mddev) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - + curr_events = part_stat_read(&disk->part0, sectors[0]) + + part_stat_read(&disk->part0, sectors[1]) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 182f5a94cdc..8bb8794129b 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -159,10 +159,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e26030fa59a..f52f442a735 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -406,10 +406,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index babb13036f9..b9764429d85 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -804,10 +804,11 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5ec80da0a9d..5f990133f5e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -844,10 +844,11 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5899f211515..ae16794bef2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3396,10 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bi)); + part_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 60592d9f43b..f517869e8d1 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -210,15 +210,15 @@ ssize_t part_size_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } -static ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part_round_stats(cpu, p); - disk_stat_unlock(); + part_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -575,8 +575,8 @@ void del_gendisk(struct gendisk *disk) set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); - disk_stat_set_all(disk, 0); - disk->stamp = 0; + part_stat_set_all(&disk->part0, 0); + disk->part0.stamp = 0; kobject_put(disk->part0.holder_dir); kobject_put(disk->slave_dir); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d15b42dc35..c90e1b4fbe5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -145,13 +145,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ - unsigned long stamp; - int in_flight; -#ifdef CONFIG_SMP - struct disk_stats *dkstats; -#else - struct disk_stats dkstats; -#endif struct work_struct async_notify; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; @@ -232,46 +225,18 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, * internal use only. */ #ifdef CONFIG_SMP -#define disk_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define disk_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, cpu)->field += addnd) - -#define disk_stat_read(gendiskp, field) \ -({ \ - typeof(gendiskp->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ - res; \ -}) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof(struct disk_stats)); -} +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr(part->dkstats, cpu)->field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) +#define __part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) #define part_stat_read(part, field) \ ({ \ - typeof(part->dkstats->field) res = 0; \ + typeof((part)->dkstats->field) res = 0; \ int i; \ for_each_possible_cpu(i) \ - res += per_cpu_ptr(part->dkstats, i)->field; \ + res += per_cpu_ptr((part)->dkstats, i)->field; \ res; \ }) @@ -284,109 +249,73 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) sizeof(struct disk_stats)); } -#else /* !CONFIG_SMP */ -#define disk_stat_lock() ({ rcu_read_lock(); 0; }) -#define disk_stat_unlock() rcu_read_unlock() - -#define disk_stat_add(cpu, gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) -#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) - -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +static inline int init_part_stats(struct hd_struct *part) { - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; } -#define part_stat_add(cpu, part, field, addnd) \ - (part->dkstats.field += addnd) - -#define all_stat_add(cpu, gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part_stat_add(cpu, part, field, addnd); \ - disk_stat_add(cpu, gendiskp, field, addnd); \ -}) - -#define part_stat_read(part, field) (part->dkstats.field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void free_part_stats(struct hd_struct *part) { - memset(&part->dkstats, value, sizeof(struct disk_stats)); + free_percpu(part->dkstats); } -#endif /* CONFIG_SMP */ - -#define disk_stat_dec(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, -1) -#define disk_stat_inc(cpu, gendiskp, field) \ - disk_stat_add(cpu, gendiskp, field, 1) -#define disk_stat_sub(cpu, gendiskp, field, subnd) \ - disk_stat_add(cpu, gendiskp, field, -subnd) - -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() -#define all_stat_dec(cpu, gendiskp, field, sector) \ - all_stat_add(cpu, gendiskp, field, -1, sector) -#define all_stat_inc(cpu, gendiskp, part, field, sector) \ - all_stat_add(cpu, gendiskp, part, field, 1, sector) -#define all_stat_sub(cpu, gendiskp, part, field, subnd, sector) \ - all_stat_add(cpu, gendiskp, part, field, -subnd, sector) +#define __part_stat_add(cpu, part, field, addnd) \ + ((part)->dkstats.field += addnd) -/* Inlines to alloc and free disk stats in struct gendisk */ -#ifdef CONFIG_SMP -static inline int init_disk_stats(struct gendisk *disk) -{ - disk->dkstats = alloc_percpu(struct disk_stats); - if (!disk->dkstats) - return 0; - return 1; -} +#define part_stat_read(part, field) ((part)->dkstats.field) -static inline void free_disk_stats(struct gendisk *disk) +static inline void part_stat_set_all(struct hd_struct *part, int value) { - free_percpu(disk->dkstats); + memset(&part->dkstats, value, sizeof(struct disk_stats)); } static inline int init_part_stats(struct hd_struct *part) { - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; return 1; } static inline void free_part_stats(struct hd_struct *part) { - free_percpu(part->dkstats); } -#else /* CONFIG_SMP */ -static inline int init_disk_stats(struct gendisk *disk) -{ - return 1; -} +#endif /* CONFIG_SMP */ -static inline void free_disk_stats(struct gendisk *disk) -{ -} +#define part_stat_add(cpu, part, field, addnd) do { \ + __part_stat_add((cpu), (part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add((cpu), &part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) -static inline int init_part_stats(struct hd_struct *part) +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +static inline void part_inc_in_flight(struct hd_struct *part) { - return 1; + part->in_flight++; + if (part->partno) + part_to_disk(part)->part0.in_flight++; } -static inline void free_part_stats(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part) { + part->in_flight--; + if (part->partno) + part_to_disk(part)->part0.in_flight--; } -#endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(int cpu, struct gendisk *disk); extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ @@ -595,6 +524,8 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3-70-g09d2 From 689d6fac40b41c7bf154f362deaf442548e4dc81 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:16 +0900 Subject: block: replace @ext_minors with GENHD_FL_EXT_DEVT With previous changes, it's meaningless to limit the number of partitions. Replace @ext_minors with GENHD_FL_EXT_DEVT such that setting the flag allows the disk to have maximum number of allowed partitions (only limited by the number of entries in parsed_partitions as determined by MAX_PART constant). This kills not-too-pretty alloc_disk_ext[_node]() functions and makes @minors parameter to alloc_disk[_node]() unnecessary. The parameter is left alone to avoid disturbing the users. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 16 +--------------- drivers/ide/ide-disk.c | 14 +++++--------- drivers/scsi/sd.c | 9 ++------- fs/partitions/check.h | 4 +--- include/linux/genhd.h | 16 ++++++++-------- 5 files changed, 17 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index c2b14aa69d5..eedab5b4685 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1024,18 +1024,9 @@ struct gendisk *alloc_disk(int minors) { return alloc_disk_node(minors, -1); } +EXPORT_SYMBOL(alloc_disk); struct gendisk *alloc_disk_node(int minors, int node_id) -{ - return alloc_disk_ext_node(minors, 0, node_id); -} - -struct gendisk *alloc_disk_ext(int minors, int ext_minors) -{ - return alloc_disk_ext_node(minors, ext_minors, -1); -} - -struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) { struct gendisk *disk; @@ -1054,7 +1045,6 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) disk->part_tbl->part[0] = &disk->part0; disk->minors = minors; - disk->ext_minors = ext_minors; rand_initialize_disk(disk); disk_to_dev(disk)->class = &block_class; disk_to_dev(disk)->type = &disk_type; @@ -1065,11 +1055,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id) } return disk; } - -EXPORT_SYMBOL(alloc_disk); EXPORT_SYMBOL(alloc_disk_node); -EXPORT_SYMBOL(alloc_disk_ext); -EXPORT_SYMBOL(alloc_disk_ext_node); struct kobject *get_disk(struct gendisk *disk) { diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a072df5053a..29c8ae75268 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -41,16 +41,12 @@ #include #include -#define IDE_DISK_PARTS (1 << PARTN_BITS) - #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) -#define IDE_DISK_MINORS IDE_DISK_PARTS +#define IDE_DISK_MINORS (1 << PARTN_BITS) #else #define IDE_DISK_MINORS 1 #endif -#define IDE_DISK_EXT_MINORS (IDE_DISK_PARTS - IDE_DISK_MINORS) - struct ide_disk_obj { ide_drive_t *drive; ide_driver_t *driver; @@ -1161,8 +1157,7 @@ static int ide_disk_probe(ide_drive_t *drive) if (!idkp) goto failed; - g = alloc_disk_ext_node(IDE_DISK_MINORS, IDE_DISK_EXT_MINORS, - hwif_to_node(drive->hwif)); + g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif)); if (!g) goto out_free_idkp; @@ -1189,9 +1184,10 @@ static int ide_disk_probe(ide_drive_t *drive) drive->attach = 1; g->minors = IDE_DISK_MINORS; - g->ext_minors = IDE_DISK_EXT_MINORS; g->driverfs_dev = &drive->gendev; - g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; + g->flags |= GENHD_FL_EXT_DEVT; + if (drive->removable) + g->flags |= GENHD_FL_REMOVABLE; set_capacity(g, idedisk_capacity(drive)); g->fops = &idedisk_ops; add_disk(g); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 280d231a86e..6598024531d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -86,16 +86,12 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); -#define SD_PARTS 64 - #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 #else #define SD_MINORS 1 #endif -#define SD_EXT_MINORS (SD_PARTS - SD_MINORS) - static int sd_revalidate_disk(struct gendisk *); static int sd_probe(struct device *); static int sd_remove(struct device *); @@ -1811,7 +1807,7 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - gd = alloc_disk_ext(SD_MINORS, SD_EXT_MINORS); + gd = alloc_disk(SD_MINORS); if (!gd) goto out_free; @@ -1856,7 +1852,6 @@ static int sd_probe(struct device *dev) gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); gd->minors = SD_MINORS; - gd->ext_minors = SD_EXT_MINORS; gd->fops = &sd_fops; if (index < 26) { @@ -1880,7 +1875,7 @@ static int sd_probe(struct device *dev) blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); gd->driverfs_dev = &sdp->sdev_gendev; - gd->flags = GENHD_FL_DRIVERFS; + gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; if (sdp->removable) gd->flags |= GENHD_FL_REMOVABLE; diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ecd9e8..98dbe1a8452 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h @@ -5,15 +5,13 @@ * add_gd_partition adds a partitions details to the devices partition * description. */ -enum { MAX_PART = 256 }; - struct parsed_partitions { char name[BDEVNAME_SIZE]; struct { sector_t from; sector_t size; int flags; - } parts[MAX_PART]; + } parts[DISK_MAX_PARTS]; int next; int limit; }; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ecf649c3dee..04524c213de 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -58,6 +58,8 @@ enum { UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ }; +#define DISK_MAX_PARTS 256 + #include #include #include @@ -112,6 +114,7 @@ struct hd_struct { #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 +#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) @@ -129,15 +132,13 @@ struct disk_part_tbl { }; struct gendisk { - /* major, first_minor, minors and ext_minors are input - * parameters only, don't use directly. Use disk_devt() and - * disk_max_parts(). + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - int ext_minors; /* number of extended dynamic minors */ char disk_name[32]; /* name of major driver */ @@ -180,7 +181,9 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) static inline int disk_max_parts(struct gendisk *disk) { - return disk->minors + disk->ext_minors; + if (disk->flags & GENHD_FL_EXT_DEVT) + return DISK_MAX_PARTS; + return disk->minors; } static inline bool disk_partitionable(struct gendisk *disk) @@ -527,9 +530,6 @@ extern void printk_all_partitions(void); extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); -extern struct gendisk *alloc_disk_ext_node(int minors, int ext_minrs, - int node_id); -extern struct gendisk *alloc_disk_ext(int minors, int ext_minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, -- cgit v1.2.3-70-g09d2 From 3e1a7ff8a0a7b948f2684930166954f9e8e776fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:17 +0900 Subject: block: allow disk to have extended device number Now that disk and partition handlings are mostly unified, it's easy to allow disk to have extended device number. This patch makes add_disk() use extended device number if disk->minors is zero. Both sd and ide-disk are updated to use this. * sd_format_disk_name() is implemented which can generically determine the drive name. This removes disk number restriction stemming from limited device names. * If sd index goes over SD_MAX_DISKS (which can be increased now BTW), sd simply doesn't initialize minors letting block layer choose extended device number. * If CONFIG_DEBUG_EXT_DEVT is set, both sd and ide-disk always set minors to 0 and use extended device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 25 ++++++++++++++++- drivers/ide/ide-disk.c | 2 +- drivers/scsi/sd.c | 74 ++++++++++++++++++++++++++++++++++++-------------- fs/partitions/check.c | 1 + include/linux/genhd.h | 3 +- 5 files changed, 82 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index eedab5b4685..d9de3e482d1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -478,14 +478,37 @@ static int exact_lock(dev_t devt, void *data) * * This function registers the partitioning information in @disk * with the kernel. + * + * FIXME: error handling */ void add_disk(struct gendisk *disk) { struct backing_dev_info *bdi; + dev_t devt; int retval; + /* minors == 0 indicates to use ext devt from part0 and should + * be accompanied with EXT_DEVT flag. Make sure all + * parameters make sense. + */ + WARN_ON(disk->minors && !(disk->major || disk->first_minor)); + WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); + disk->flags |= GENHD_FL_UP; - disk_to_dev(disk)->devt = MKDEV(disk->major, disk->first_minor); + + retval = blk_alloc_devt(&disk->part0, &devt); + if (retval) { + WARN_ON(1); + return; + } + disk_to_dev(disk)->devt = devt; + + /* ->major and ->first_minor aren't supposed to be + * dereferenced from here on, but set them just in case. + */ + disk->major = MAJOR(devt); + disk->first_minor = MINOR(devt); + blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 29c8ae75268..33ea8c04871 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -44,7 +44,7 @@ #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define IDE_DISK_MINORS (1 << PARTN_BITS) #else -#define IDE_DISK_MINORS 1 +#define IDE_DISK_MINORS 0 #endif struct ide_disk_obj { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6598024531d..bcb04b2a767 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -89,7 +89,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 #else -#define SD_MINORS 1 +#define SD_MINORS 0 #endif static int sd_revalidate_disk(struct gendisk *); @@ -1769,6 +1769,52 @@ static int sd_revalidate_disk(struct gendisk *disk) return 0; } +/** + * sd_format_disk_name - format disk name + * @prefix: name prefix - ie. "sd" for SCSI disks + * @index: index of the disk to format name for + * @buf: output buffer + * @buflen: length of the output buffer + * + * SCSI disk names starts at sda. The 26th device is sdz and the + * 27th is sdaa. The last one for two lettered suffix is sdzz + * which is followed by sdaaa. + * + * This is basically 26 base counting with one extra 'nil' entry + * at the beggining from the second digit on and can be + * determined using similar method as 26 base conversion with the + * index shifted -1 after each digit is computed. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) +{ + const int base = 'z' - 'a' + 1; + char *begin = buf + strlen(prefix); + char *end = buf + buflen; + char *p; + int unit; + + p = end - 1; + *p = '\0'; + unit = base; + do { + if (p == begin) + return -EINVAL; + *--p = 'a' + (index % unit); + index = (index / unit) - 1; + } while (index >= 0); + + memmove(begin, p, end - p); + memcpy(buf, prefix, strlen(prefix)); + + return 0; +} + /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -1821,8 +1867,8 @@ static int sd_probe(struct device *dev) if (error) goto out_put; - error = -EBUSY; - if (index >= SD_MAX_DISKS) + error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); + if (error) goto out_free_index; sdkp->device = sdp; @@ -1849,24 +1895,12 @@ static int sd_probe(struct device *dev) get_device(&sdp->sdev_gendev); - gd->major = sd_major((index & 0xf0) >> 4); - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = SD_MINORS; - gd->fops = &sd_fops; - - if (index < 26) { - sprintf(gd->disk_name, "sd%c", 'a' + index % 26); - } else if (index < (26 + 1) * 26) { - sprintf(gd->disk_name, "sd%c%c", - 'a' + index / 26 - 1,'a' + index % 26); - } else { - const unsigned int m1 = (index / 26 - 1) / 26 - 1; - const unsigned int m2 = (index / 26 - 1) % 26; - const unsigned int m3 = index % 26; - sprintf(gd->disk_name, "sd%c%c%c", - 'a' + m1, 'a' + m2, 'a' + m3); + if (index < SD_MAX_DISKS) { + gd->major = sd_major((index & 0xf0) >> 4); + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); + gd->minors = SD_MINORS; } - + gd->fops = &sd_fops; gd->private_data = &sdkp->driver; gd->queue = sdkp->device->request_queue; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 772b2ed8d23..0e411603fdf 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -593,6 +593,7 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_exit(&piter); invalidate_partition(disk, 0); + blk_free_devt(disk_to_dev(disk)->devt); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 04524c213de..206cdf96c3a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -59,6 +59,7 @@ enum { }; #define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 #include #include @@ -140,7 +141,7 @@ struct gendisk { int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - char disk_name[32]; /* name of major driver */ + char disk_name[DISK_NAME_LEN]; /* name of major driver */ /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other -- cgit v1.2.3-70-g09d2 From a3bce90edd8f6cafe3f63b1a943800792e830178 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:05 +0900 Subject: block: add gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov Currently, blk_rq_map_user and blk_rq_map_user_iov always do GFP_KERNEL allocation. This adds gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov so sg can use it (sg always does GFP_ATOMIC allocation). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- block/blk-map.c | 20 ++++++++++++-------- block/bsg.c | 5 +++-- block/scsi_ioctl.c | 5 +++-- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_tgt_lib.c | 2 +- fs/bio.c | 33 +++++++++++++++++++-------------- include/linux/bio.h | 9 +++++---- include/linux/blkdev.h | 5 +++-- 8 files changed, 47 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/block/blk-map.c b/block/blk-map.c index ea1bf53929e..ac21b7397e1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -41,7 +41,8 @@ static int __blk_rq_unmap_user(struct bio *bio) } static int __blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned int len) + void __user *ubuf, unsigned int len, + gfp_t gfp_mask) { unsigned long uaddr; unsigned int alignment; @@ -57,9 +58,9 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, uaddr = (unsigned long) ubuf; alignment = queue_dma_alignment(q) | q->dma_pad_mask; if (!(uaddr & alignment) && !(len & alignment)) - bio = bio_map_user(q, NULL, uaddr, len, reading); + bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else - bio = bio_copy_user(q, uaddr, len, reading); + bio = bio_copy_user(q, uaddr, len, reading, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -90,6 +91,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * @rq: request structure to fill * @ubuf: the user buffer * @len: length of user data + * @gfp_mask: memory allocation flags * * Description: * Data will be mapped directly for zero copy I/O, if possible. Otherwise @@ -105,7 +107,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * unmapping. */ int blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned long len) + void __user *ubuf, unsigned long len, gfp_t gfp_mask) { unsigned long bytes_read = 0; struct bio *bio = NULL; @@ -132,7 +134,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (end - start > BIO_MAX_PAGES) map_len -= PAGE_SIZE; - ret = __blk_rq_map_user(q, rq, ubuf, map_len); + ret = __blk_rq_map_user(q, rq, ubuf, map_len, gfp_mask); if (ret < 0) goto unmap_rq; if (!bio) @@ -160,6 +162,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * @iov: pointer to the iovec * @iov_count: number of elements in the iovec * @len: I/O byte count + * @gfp_mask: memory allocation flags * * Description: * Data will be mapped directly for zero copy I/O, if possible. Otherwise @@ -175,7 +178,8 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct sg_iovec *iov, int iov_count, unsigned int len) + struct sg_iovec *iov, int iov_count, unsigned int len, + gfp_t gfp_mask) { struct bio *bio; int i, read = rq_data_dir(rq) == READ; @@ -194,9 +198,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } if (unaligned || (q->dma_pad_mask & len)) - bio = bio_copy_user_iov(q, iov, iov_count, read); + bio = bio_copy_user_iov(q, iov, iov_count, read, gfp_mask); else - bio = bio_map_user_iov(q, NULL, iov, iov_count, read); + bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); diff --git a/block/bsg.c b/block/bsg.c index 0aae8d7ba99..e7a142e9916 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) next_rq->cmd_type = rq->cmd_type; dxferp = (void*)(unsigned long)hdr->din_xferp; - ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); + ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len, + GFP_KERNEL); if (ret) goto out; } @@ -298,7 +299,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) dxfer_len = 0; if (dxfer_len) { - ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); + ret = blk_rq_map_user(q, rq, dxferp, dxfer_len, GFP_KERNEL); if (ret) goto out; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 3aab80a4c48..f49d6a11a69 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -315,10 +315,11 @@ static int sg_io(struct file *file, struct request_queue *q, } ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, - hdr->dxfer_len); + hdr->dxfer_len, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len, + GFP_KERNEL); if (ret) goto out; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 74031de517e..e861d24a6d3 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len); + ret = blk_rq_map_user(q, rq, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 257e097c39a..2a4fd820d61 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len); + err = blk_rq_map_user(q, rq, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, diff --git a/fs/bio.c b/fs/bio.c index 6a637b5c24b..3d2e9ad2472 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -558,13 +558,14 @@ int bio_uncopy_user(struct bio *bio) * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, - int iov_count, int write_to_vm) + int iov_count, int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; struct bio_vec *bvec; @@ -587,12 +588,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, len += iov[i].iov_len; } - bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); + bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); ret = -ENOMEM; - bio = bio_alloc(GFP_KERNEL, nr_pages); + bio = bio_alloc(gfp_mask, nr_pages); if (!bio) goto out_bmd; @@ -605,7 +606,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | GFP_KERNEL); + page = alloc_page(q->bounce_gfp | gfp_mask); if (!page) { ret = -ENOMEM; break; @@ -647,26 +648,27 @@ out_bmd: * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, - unsigned int len, int write_to_vm) + unsigned int len, int write_to_vm, gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_copy_user_iov(q, &iov, 1, write_to_vm); + return bio_copy_user_iov(q, &iov, 1, write_to_vm, gfp_mask); } static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, - int write_to_vm) + int write_to_vm, gfp_t gfp_mask) { int i, j; int nr_pages = 0; @@ -692,12 +694,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, if (!nr_pages) return ERR_PTR(-EINVAL); - bio = bio_alloc(GFP_KERNEL, nr_pages); + bio = bio_alloc(gfp_mask, nr_pages); if (!bio) return ERR_PTR(-ENOMEM); ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); if (!pages) goto out; @@ -776,19 +778,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, int write_to_vm) + unsigned long uaddr, unsigned int len, int write_to_vm, + gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); } /** @@ -798,18 +802,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, - int write_to_vm) + int write_to_vm, gfp_t gfp_mask) { struct bio *bio; - bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); - + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, + gfp_mask); if (IS_ERR(bio)) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 13aba20edb2..200b185c3e8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -325,11 +325,11 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, - unsigned long, unsigned int, int); + unsigned long, unsigned int, int, gfp_t); struct sg_iovec; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int); + struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -337,9 +337,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); +extern struct bio *bio_copy_user(struct request_queue *, unsigned long, + unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, - int, int); + int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12df8efeef1..00e388d0e22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -710,11 +710,12 @@ extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); -extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); +extern int blk_rq_map_user(struct request_queue *, struct request *, + void __user *, unsigned long, gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int); + struct sg_iovec *, int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3-70-g09d2 From 152e283fdfea0cd11e297d982378b55937842dde Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:06 +0900 Subject: block: introduce struct rq_map_data to use reserved pages This patch introduces struct rq_map_data to enable bio_copy_use_iov() use reserved pages. Currently, bio_copy_user_iov allocates bounce pages but drivers/scsi/sg.c wants to allocate pages by itself and use them. struct rq_map_data can be used to pass allocated pages to bio_copy_user_iov. The current users of bio_copy_user_iov simply passes NULL (they don't want to use pre-allocated pages). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Cc: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- block/blk-map.c | 26 ++++++++++++-------- block/bsg.c | 7 +++--- block/scsi_ioctl.c | 4 ++-- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_tgt_lib.c | 2 +- fs/bio.c | 58 ++++++++++++++++++++++++++++++++------------- include/linux/bio.h | 8 ++++--- include/linux/blkdev.h | 12 ++++++++-- 8 files changed, 80 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/block/blk-map.c b/block/blk-map.c index ac21b7397e1..dad6a290783 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -41,8 +41,8 @@ static int __blk_rq_unmap_user(struct bio *bio) } static int __blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned int len, - gfp_t gfp_mask) + struct rq_map_data *map_data, void __user *ubuf, + unsigned int len, gfp_t gfp_mask) { unsigned long uaddr; unsigned int alignment; @@ -57,10 +57,10 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, */ uaddr = (unsigned long) ubuf; alignment = queue_dma_alignment(q) | q->dma_pad_mask; - if (!(uaddr & alignment) && !(len & alignment)) + if (!(uaddr & alignment) && !(len & alignment) && !map_data) bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else - bio = bio_copy_user(q, uaddr, len, reading, gfp_mask); + bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -89,6 +89,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request structure to fill + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @ubuf: the user buffer * @len: length of user data * @gfp_mask: memory allocation flags @@ -107,7 +108,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * unmapping. */ int blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned long len, gfp_t gfp_mask) + struct rq_map_data *map_data, void __user *ubuf, + unsigned long len, gfp_t gfp_mask) { unsigned long bytes_read = 0; struct bio *bio = NULL; @@ -134,7 +136,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (end - start > BIO_MAX_PAGES) map_len -= PAGE_SIZE; - ret = __blk_rq_map_user(q, rq, ubuf, map_len, gfp_mask); + ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, + gfp_mask); if (ret < 0) goto unmap_rq; if (!bio) @@ -159,6 +162,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to map data to + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iov: pointer to the iovec * @iov_count: number of elements in the iovec * @len: I/O byte count @@ -178,8 +182,8 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct sg_iovec *iov, int iov_count, unsigned int len, - gfp_t gfp_mask) + struct rq_map_data *map_data, struct sg_iovec *iov, + int iov_count, unsigned int len, gfp_t gfp_mask) { struct bio *bio; int i, read = rq_data_dir(rq) == READ; @@ -197,8 +201,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } } - if (unaligned || (q->dma_pad_mask & len)) - bio = bio_copy_user_iov(q, iov, iov_count, read, gfp_mask); + if (unaligned || (q->dma_pad_mask & len) || map_data) + bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, + gfp_mask); else bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); @@ -220,6 +225,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, rq->buffer = rq->data = NULL; return 0; } +EXPORT_SYMBOL(blk_rq_map_user_iov); /** * blk_rq_unmap_user - unmap a request with user data diff --git a/block/bsg.c b/block/bsg.c index e7a142e9916..56cb343c76d 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -283,8 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) next_rq->cmd_type = rq->cmd_type; dxferp = (void*)(unsigned long)hdr->din_xferp; - ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len, - GFP_KERNEL); + ret = blk_rq_map_user(q, next_rq, NULL, dxferp, + hdr->din_xfer_len, GFP_KERNEL); if (ret) goto out; } @@ -299,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) dxfer_len = 0; if (dxfer_len) { - ret = blk_rq_map_user(q, rq, dxferp, dxfer_len, GFP_KERNEL); + ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len, + GFP_KERNEL); if (ret) goto out; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index f49d6a11a69..c34272a348f 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -314,11 +314,11 @@ static int sg_io(struct file *file, struct request_queue *q, goto out; } - ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, + ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, hdr->dxfer_len, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len, + ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, GFP_KERNEL); if (ret) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index e861d24a6d3..d47f2f80acc 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len, GFP_KERNEL); + ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 2a4fd820d61..3117bb106b5 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len, GFP_KERNEL); + err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, diff --git a/fs/bio.c b/fs/bio.c index 3d2e9ad2472..a2f072647cd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -439,16 +439,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, struct bio_map_data { struct bio_vec *iovecs; - int nr_sgvecs; struct sg_iovec *sgvecs; + int nr_sgvecs; + int is_our_pages; }; static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - struct sg_iovec *iov, int iov_count) + struct sg_iovec *iov, int iov_count, + int is_our_pages) { memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); bmd->nr_sgvecs = iov_count; + bmd->is_our_pages = is_our_pages; bio->bi_private = bmd; } @@ -483,7 +486,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, } static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, - struct sg_iovec *iov, int iov_count, int uncopy) + struct sg_iovec *iov, int iov_count, int uncopy, + int do_free_page) { int ret = 0, i; struct bio_vec *bvec; @@ -526,7 +530,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, } } - if (uncopy) + if (do_free_page) __free_page(bvec->bv_page); } @@ -545,7 +549,8 @@ int bio_uncopy_user(struct bio *bio) struct bio_map_data *bmd = bio->bi_private; int ret; - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1, + bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); @@ -555,6 +560,7 @@ int bio_uncopy_user(struct bio *bio) /** * bio_copy_user_iov - copy user data to bio * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not @@ -564,8 +570,10 @@ int bio_uncopy_user(struct bio *bio) * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, - int iov_count, int write_to_vm, gfp_t gfp_mask) +struct bio *bio_copy_user_iov(struct request_queue *q, + struct rq_map_data *map_data, + struct sg_iovec *iov, int iov_count, + int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; struct bio_vec *bvec; @@ -600,13 +608,26 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, bio->bi_rw |= (!write_to_vm << BIO_RW); ret = 0; + i = 0; while (len) { - unsigned int bytes = PAGE_SIZE; + unsigned int bytes; + + if (map_data) + bytes = 1U << (PAGE_SHIFT + map_data->page_order); + else + bytes = PAGE_SIZE; if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | gfp_mask); + if (map_data) { + if (i == map_data->nr_entries) { + ret = -ENOMEM; + break; + } + page = map_data->pages[i++]; + } else + page = alloc_page(q->bounce_gfp | gfp_mask); if (!page) { ret = -ENOMEM; break; @@ -625,16 +646,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, * success */ if (!write_to_vm) { - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); if (ret) goto cleanup; } - bio_set_map_data(bmd, bio, iov, iov_count); + bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); return bio; cleanup: - bio_for_each_segment(bvec, bio, i) - __free_page(bvec->bv_page); + if (!map_data) + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); bio_put(bio); out_bmd: @@ -645,6 +667,7 @@ out_bmd: /** * bio_copy_user - copy user data to bio * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not @@ -654,15 +677,16 @@ out_bmd: * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, - unsigned int len, int write_to_vm, gfp_t gfp_mask) +struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, + unsigned long uaddr, unsigned int len, + int write_to_vm, gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_copy_user_iov(q, &iov, 1, write_to_vm, gfp_mask); + return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); } static struct bio *__bio_map_user_iov(struct request_queue *q, @@ -1028,7 +1052,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, bio->bi_private = bmd; bio->bi_end_io = bio_copy_kern_endio; - bio_set_map_data(bmd, bio, &iov, 1); + bio_set_map_data(bmd, bio, &iov, 1, 1); return bio; cleanup: bio_for_each_segment(bvec, bio, i) diff --git a/include/linux/bio.h b/include/linux/bio.h index 200b185c3e8..bc386cd5e99 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; +struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, struct sg_iovec *, int, int, gfp_t); @@ -337,9 +338,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, - unsigned int, int, gfp_t); -extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, +extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, + unsigned long, unsigned int, int, gfp_t); +extern struct bio *bio_copy_user_iov(struct request_queue *, + struct rq_map_data *, struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00e388d0e22..358ac423ed2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -642,6 +642,12 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_MMU */ +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; +}; + struct req_iterator { int i; struct bio *bio; @@ -711,11 +717,13 @@ extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, - void __user *, unsigned long, gfp_t); + struct rq_map_data *, void __user *, unsigned long, + gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int, gfp_t); + struct rq_map_data *, struct sg_iovec *, int, + unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3-70-g09d2 From 10865dfa34e7552c4c64606edcdf1e21a110c985 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:07 +0900 Subject: sg: convert the non-data path to use the block layer This patch converts the non data path to use the block layer functions (blk_get_request, blk_execute_rq_nowait, etc) instead of uses scsi_execute_async(). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 661f9f21650..487c7776cc4 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -137,6 +137,7 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ char orphan; /* 1 -> drop on sight, 0 -> normal */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ volatile char done; /* 0->before bh, 1->before read, 2->read */ + struct request *rq; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ @@ -176,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ static int sg_fasync(int fd, struct file *filp, int mode); /* tasklet or soft irq callback */ static void sg_cmd_done(void *data, char *sense, int result, int resid); -static int sg_start_req(Sg_request * srp); +static int sg_start_req(Sg_request *srp, unsigned char *cmd); static void sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, @@ -229,6 +230,11 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) cmd, filp->f_mode & FMODE_WRITE); } +static void sg_rq_end_io(struct request *rq, int uptodate) +{ + sg_cmd_done(rq->end_io_data, rq->sense, rq->errors, rq->data_len); +} + static int sg_open(struct inode *inode, struct file *filp) { @@ -732,7 +738,8 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, SCSI_LOG_TIMEOUT(4, printk("sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if ((k = sg_start_req(srp))) { + k = sg_start_req(srp, cmnd); + if (k) { SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k)); sg_finish_rem_req(srp); return k; /* probably out of space --> ENOMEM */ @@ -765,6 +772,12 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, hp->duration = jiffies_to_msecs(jiffies); /* Now send everything of to mid-level. The next time we hear about this packet is when sg_cmd_done() is called (i.e. a callback). */ + if (srp->rq) { + srp->rq->timeout = timeout; + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, + srp->rq, 1, sg_rq_end_io); + return 0; + } if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, hp->dxfer_len, srp->data.k_use_sg, timeout, SG_DEFAULT_RETRIES, srp, sg_cmd_done, @@ -1634,8 +1647,32 @@ exit_sg(void) idr_destroy(&sg_index_idr); } -static int -sg_start_req(Sg_request * srp) +static int __sg_start_req(struct sg_request *srp, struct sg_io_hdr *hp, + unsigned char *cmd) +{ + struct sg_fd *sfp = srp->parentfp; + struct request_queue *q = sfp->parentdp->device->request_queue; + struct request *rq; + int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; + + rq = blk_get_request(q, rw, GFP_ATOMIC); + if (!rq) + return -ENOMEM; + + memcpy(rq->cmd, cmd, hp->cmd_len); + + rq->cmd_len = hp->cmd_len; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + + srp->rq = rq; + rq->end_io_data = srp; + rq->sense = srp->sense_b; + rq->retries = SG_DEFAULT_RETRIES; + + return 0; +} + +static int sg_start_req(Sg_request *srp, unsigned char *cmd) { int res; Sg_fd *sfp = srp->parentfp; @@ -1646,8 +1683,10 @@ sg_start_req(Sg_request * srp) Sg_scatter_hold *rsv_schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); + if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) - return 0; + return __sg_start_req(srp, hp, cmd); + if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma)) { @@ -1678,6 +1717,10 @@ sg_finish_rem_req(Sg_request * srp) sg_unlink_reserve(sfp, srp); else sg_remove_scat(req_schp); + + if (srp->rq) + blk_put_request(srp->rq); + sg_remove_request(sfp, srp); } -- cgit v1.2.3-70-g09d2 From 6e5a30cba5e7c03b2cd564e968f1dd667a0f7c42 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:08 +0900 Subject: sg: convert the direct IO path to use the block layer This patch converts the direct IO path (SG_FLAG_DIRECT_IO) to use the block layer functions (blk_get_request, blk_execute_rq_nowait, blk_rq_map_user, etc) instead of scsi_execute_async(). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 173 +++++++++--------------------------------------------- 1 file changed, 27 insertions(+), 146 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 487c7776cc4..cb6de0752ee 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -138,6 +138,7 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ volatile char done; /* 0->before bh, 1->before read, 2->read */ struct request *rq; + struct bio *bio; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ @@ -1679,21 +1680,29 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; + unsigned long uaddr = (unsigned long)hp->dxferp; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; + struct request_queue *q = sfp->parentdp->device->request_queue; + unsigned long alignment = queue_dma_alignment(q) | q->dma_pad_mask; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return __sg_start_req(srp, hp, cmd); +#ifdef SG_ALLOW_DIO_CODE if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && - (!sfp->parentdp->device->host->unchecked_isa_dma)) { - res = sg_build_direct(srp, sfp, dxfer_len); - if (res <= 0) /* -ve -> error, 0 -> done, 1 -> try indirect */ - return res; + (!sfp->parentdp->device->host->unchecked_isa_dma) && + !(uaddr & alignment) && !(dxfer_len & alignment)) { + res = __sg_start_req(srp, hp, cmd); + if (!res) + res = sg_build_direct(srp, sfp, dxfer_len); + + return res; } +#endif if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) sg_link_reserve(sfp, srp, dxfer_len); else { @@ -1718,8 +1727,11 @@ sg_finish_rem_req(Sg_request * srp) else sg_remove_scat(req_schp); - if (srp->rq) + if (srp->rq) { + if (srp->bio) + blk_rq_unmap_user(srp->bio); blk_put_request(srp->rq); + } sg_remove_request(sfp, srp); } @@ -1746,151 +1758,23 @@ sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) return tablesize; /* number of scat_gath elements allocated */ } -#ifdef SG_ALLOW_DIO_CODE -/* vvvvvvvv following code borrowed from st driver's direct IO vvvvvvvvv */ - /* TODO: hopefully we can use the generic block layer code */ - -/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if - - mapping of all pages not successful - (i.e., either completely successful or fails) -*/ -static int -st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, - unsigned long uaddr, size_t count, int rw) -{ - unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int res, i, j; - struct page **pages; - - /* User attempted Overflow! */ - if ((uaddr + count) < uaddr) - return -EINVAL; - - /* Too big */ - if (nr_pages > max_pages) - return -ENOMEM; - - /* Hmm? */ - if (count == 0) - return 0; - - if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL) - return -ENOMEM; - - /* Try to fault in all of the necessary pages */ - down_read(¤t->mm->mmap_sem); - /* rw==READ means read from drive, write into memory area */ - res = get_user_pages( - current, - current->mm, - uaddr, - nr_pages, - rw == READ, - 0, /* don't force */ - pages, - NULL); - up_read(¤t->mm->mmap_sem); - - /* Errors and no page mapped should return here */ - if (res < nr_pages) - goto out_unmap; - - for (i=0; i < nr_pages; i++) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(pages[i]); - /* ?? Is locking needed? I don't think so */ - /* if (!trylock_page(pages[i])) - goto out_unlock; */ - } - - sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK); - if (nr_pages > 1) { - sgl[0].length = PAGE_SIZE - sgl[0].offset; - count -= sgl[0].length; - for (i=1; i < nr_pages ; i++) - sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0); - } - else { - sgl[0].length = count; - } - - kfree(pages); - return nr_pages; - - out_unmap: - if (res > 0) { - for (j=0; j < res; j++) - page_cache_release(pages[j]); - res = 0; - } - kfree(pages); - return res; -} - - -/* And unmap them... */ -static int -st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages, - int dirtied) -{ - int i; - - for (i=0; i < nr_pages; i++) { - struct page *page = sg_page(&sgl[i]); - - if (dirtied) - SetPageDirty(page); - /* unlock_page(page); */ - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(page); - } - - return 0; -} - -/* ^^^^^^^^ above code borrowed from st driver's direct IO ^^^^^^^^^ */ -#endif - - /* Returns: -ve -> error, 0 -> done, 1 -> try indirect */ static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) { -#ifdef SG_ALLOW_DIO_CODE sg_io_hdr_t *hp = &srp->header; Sg_scatter_hold *schp = &srp->data; - int sg_tablesize = sfp->parentdp->sg_tablesize; - int mx_sc_elems, res; - struct scsi_device *sdev = sfp->parentdp->device; - - if (((unsigned long)hp->dxferp & - queue_dma_alignment(sdev->request_queue)) != 0) - return 1; + int res; + struct request *rq = srp->rq; + struct request_queue *q = sfp->parentdp->device->request_queue; - mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize); - if (mx_sc_elems <= 0) { - return 1; - } - res = st_map_user_pages(schp->buffer, mx_sc_elems, - (unsigned long)hp->dxferp, dxfer_len, - (SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0); - if (res <= 0) { - sg_remove_scat(schp); - return 1; - } - schp->k_use_sg = res; + res = blk_rq_map_user(q, rq, NULL, hp->dxferp, dxfer_len, GFP_ATOMIC); + if (res) + return res; + srp->bio = rq->bio; schp->dio_in_use = 1; hp->info |= SG_INFO_DIRECT_IO; return 0; -#else - return 1; -#endif } static int @@ -2069,11 +1953,7 @@ sg_remove_scat(Sg_scatter_hold * schp) if (schp->buffer && (schp->sglist_len > 0)) { struct scatterlist *sg = schp->buffer; - if (schp->dio_in_use) { -#ifdef SG_ALLOW_DIO_CODE - st_unmap_user_pages(sg, schp->k_use_sg, TRUE); -#endif - } else { + if (!schp->dio_in_use) { int k; for (k = 0; (k < schp->k_use_sg) && sg_page(sg); @@ -2083,8 +1963,9 @@ sg_remove_scat(Sg_scatter_hold * schp) k, sg_page(sg), sg->length)); sg_page_free(sg_page(sg), sg->length); } + + kfree(schp->buffer); } - kfree(schp->buffer); } memset(schp, 0, sizeof (*schp)); } -- cgit v1.2.3-70-g09d2 From 10db10d144c0248f285242f79daf6b9de6b00a62 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 29 Aug 2008 12:32:18 +0200 Subject: sg: convert the indirect IO path to use the block layer This patch converts the indirect IO path (including mmap IO and old struct sg_header) to use the block layer functions (blk_get_request, blk_execute_rq_nowait, blk_rq_map_user, etc) instead of scsi_execute_async(). [Jens: fixed compile error with SCSI logging enabled] Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 397 +++++++++++++++--------------------------------------- 1 file changed, 105 insertions(+), 292 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cb6de0752ee..d6391666502 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -47,7 +47,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #include #include #include -#include #include #include @@ -119,7 +118,8 @@ typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ unsigned b_malloc_len; /* actual len malloc'ed in buffer */ - struct scatterlist *buffer;/* scatter list */ + struct page **pages; + int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ unsigned char cmd_opcode; /* first byte of command */ } Sg_scatter_hold; @@ -190,8 +190,6 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, - int wr_xf, int *countp, unsigned char __user **up); static int sg_write_xfer(Sg_request * srp); static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); @@ -199,8 +197,6 @@ static void sg_remove_scat(Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); -static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp); -static void sg_page_free(struct page *page, int size); static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); @@ -771,26 +767,11 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, break; } hp->duration = jiffies_to_msecs(jiffies); -/* Now send everything of to mid-level. The next time we hear about this - packet is when sg_cmd_done() is called (i.e. a callback). */ - if (srp->rq) { - srp->rq->timeout = timeout; - blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, - srp->rq, 1, sg_rq_end_io); - return 0; - } - if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, - hp->dxfer_len, srp->data.k_use_sg, timeout, - SG_DEFAULT_RETRIES, srp, sg_cmd_done, - GFP_ATOMIC)) { - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n")); - /* - * most likely out of mem, but could also be a bad map - */ - sg_finish_rem_req(srp); - return -ENOMEM; - } else - return 0; + + srp->rq->timeout = timeout; + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, + srp->rq, 1, sg_rq_end_io); + return 0; } static int @@ -1206,8 +1187,7 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) Sg_fd *sfp; unsigned long offset, len, sa; Sg_scatter_hold *rsv_schp; - struct scatterlist *sg; - int k; + int k, length; if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) return VM_FAULT_SIGBUS; @@ -1217,15 +1197,14 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n", offset, rsv_schp->k_use_sg)); - sg = rsv_schp->buffer; sa = vma->vm_start; - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); - ++k, sg = sg_next(sg)) { + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; - len = (len < sg->length) ? len : sg->length; + len = (len < length) ? len : length; if (offset < len) { - struct page *page; - page = virt_to_page(page_address(sg_page(sg)) + offset); + struct page *page = nth_page(rsv_schp->pages[k], + offset >> PAGE_SHIFT); get_page(page); /* increment page count */ vmf->page = page; return 0; /* success */ @@ -1247,8 +1226,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) Sg_fd *sfp; unsigned long req_sz, len, sa; Sg_scatter_hold *rsv_schp; - int k; - struct scatterlist *sg; + int k, length; if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) return -ENXIO; @@ -1262,11 +1240,10 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return -ENOMEM; /* cannot map more than reserved buffer */ sa = vma->vm_start; - sg = rsv_schp->buffer; - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); - ++k, sg = sg_next(sg)) { + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; - len = (len < sg->length) ? len : sg->length; + len = (len < length) ? len : length; sa += len; } @@ -1310,7 +1287,6 @@ sg_cmd_done(void *data, char *sense, int result, int resid) if (0 != result) { struct scsi_sense_hdr sshdr; - memcpy(srp->sense_b, sense, sizeof (srp->sense_b)); srp->header.status = 0xff & result; srp->header.masked_status = status_byte(result); srp->header.msg_status = msg_byte(result); @@ -1685,34 +1661,51 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; unsigned long alignment = queue_dma_alignment(q) | q->dma_pad_mask; + struct rq_map_data map_data; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); + res = __sg_start_req(srp, hp, cmd); + if (res) + return res; + if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) - return __sg_start_req(srp, hp, cmd); + return 0; #ifdef SG_ALLOW_DIO_CODE if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma) && - !(uaddr & alignment) && !(dxfer_len & alignment)) { - res = __sg_start_req(srp, hp, cmd); - if (!res) - res = sg_build_direct(srp, sfp, dxfer_len); - - return res; - } + !(uaddr & alignment) && !(dxfer_len & alignment)) + return sg_build_direct(srp, sfp, dxfer_len); #endif if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) sg_link_reserve(sfp, srp, dxfer_len); - else { + else res = sg_build_indirect(req_schp, sfp, dxfer_len); - if (res) { - sg_remove_scat(req_schp); - return res; - } + + if (!res) { + struct request *rq = srp->rq; + Sg_scatter_hold *schp = &srp->data; + int iovec_count = (int) hp->iovec_count; + + map_data.pages = schp->pages; + map_data.page_order = schp->page_order; + map_data.nr_entries = schp->k_use_sg; + + if (iovec_count) + res = blk_rq_map_user_iov(q, rq, &map_data, hp->dxferp, + iovec_count, + hp->dxfer_len, GFP_ATOMIC); + else + res = blk_rq_map_user(q, rq, &map_data, hp->dxferp, + hp->dxfer_len, GFP_ATOMIC); + + if (!res) + srp->bio = rq->bio; } - return 0; + + return res; } static void @@ -1730,6 +1723,7 @@ sg_finish_rem_req(Sg_request * srp) if (srp->rq) { if (srp->bio) blk_rq_unmap_user(srp->bio); + blk_put_request(srp->rq); } @@ -1739,21 +1733,12 @@ sg_finish_rem_req(Sg_request * srp) static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) { - int sg_bufflen = tablesize * sizeof(struct scatterlist); + int sg_bufflen = tablesize * sizeof(struct page *); gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; - /* - * TODO: test without low_dma, we should not need it since - * the block layer will bounce the buffer for us - * - * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list. - */ - if (sfp->low_dma) - gfp_flags |= GFP_DMA; - schp->buffer = kzalloc(sg_bufflen, gfp_flags); - if (!schp->buffer) + schp->pages = kzalloc(sg_bufflen, gfp_flags); + if (!schp->pages) return -ENOMEM; - sg_init_table(schp->buffer, tablesize); schp->sglist_len = sg_bufflen; return tablesize; /* number of scat_gath elements allocated */ } @@ -1780,11 +1765,10 @@ sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { - struct scatterlist *sg; - int ret_sz = 0, k, rem_sz, num, mx_sc_elems; + int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems; int sg_tablesize = sfp->parentdp->sg_tablesize; - int blk_size = buff_size; - struct page *p = NULL; + int blk_size = buff_size, order; + gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; if (blk_size < 0) return -EFAULT; @@ -1808,15 +1792,26 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) } else scatter_elem_sz_prev = num; } - for (k = 0, sg = schp->buffer, rem_sz = blk_size; - (rem_sz > 0) && (k < mx_sc_elems); - ++k, rem_sz -= ret_sz, sg = sg_next(sg)) { - + + if (sfp->low_dma) + gfp_mask |= GFP_DMA; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + gfp_mask |= __GFP_ZERO; + + order = get_order(num); +retry: + ret_sz = 1 << (PAGE_SHIFT + order); + + for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems; + k++, rem_sz -= ret_sz) { + num = (rem_sz > scatter_elem_sz_prev) ? - scatter_elem_sz_prev : rem_sz; - p = sg_page_malloc(num, sfp->low_dma, &ret_sz); - if (!p) - return -ENOMEM; + scatter_elem_sz_prev : rem_sz; + + schp->pages[k] = alloc_pages(gfp_mask, order); + if (!schp->pages[k]) + goto out; if (num == scatter_elem_sz_prev) { if (unlikely(ret_sz > scatter_elem_sz_prev)) { @@ -1824,12 +1819,12 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = ret_sz; } } - sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0); SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " "ret_sz=%d\n", k, num, ret_sz)); } /* end of for loop */ + schp->page_order = order; schp->k_use_sg = k; SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, " "rem_sz=%d\n", k, rem_sz)); @@ -1837,8 +1832,15 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) schp->bufflen = blk_size; if (rem_sz > 0) /* must have failed */ return -ENOMEM; - return 0; +out: + for (i = 0; i < k; i++) + __free_pages(schp->pages[k], order); + + if (--order >= 0) + goto retry; + + return -ENOMEM; } static int @@ -1846,13 +1848,8 @@ sg_write_xfer(Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int num_xfer = 0; - int j, k, onum, usglen, ksglen, res; - int iovec_count = (int) hp->iovec_count; int dxfer_dir = hp->dxfer_direction; - unsigned char *p; - unsigned char __user *up; int new_interface = ('\0' == hp->interface_id) ? 0 : 1; if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) || @@ -1866,83 +1863,9 @@ sg_write_xfer(Sg_request * srp) && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) return 0; - SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, iovec_count, schp->k_use_sg)); - if (iovec_count) { - onum = iovec_count; - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) - return -EFAULT; - } else - onum = 1; - - ksglen = sg->length; - p = page_address(sg_page(sg)); - for (j = 0, k = 0; j < onum; ++j) { - res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up); - if (res) - return res; - - for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg_page(sg))) { - if (usglen <= 0) - break; - if (ksglen > usglen) { - if (usglen >= num_xfer) { - if (__copy_from_user(p, up, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_from_user(p, up, usglen)) - return -EFAULT; - p += usglen; - ksglen -= usglen; - break; - } else { - if (ksglen >= num_xfer) { - if (__copy_from_user(p, up, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_from_user(p, up, ksglen)) - return -EFAULT; - up += ksglen; - usglen -= ksglen; - } - ++k; - if (k >= schp->k_use_sg) - return 0; - } - } - - return 0; -} + SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, k_use_sg=%d\n", + num_xfer, schp->k_use_sg)); -static int -sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, - int wr_xf, int *countp, unsigned char __user **up) -{ - int num_xfer = (int) hp->dxfer_len; - unsigned char __user *p = hp->dxferp; - int count; - - if (0 == sg_num) { - if (wr_xf && ('\0' == hp->interface_id)) - count = (int) hp->flags; /* holds "old" input_size */ - else - count = num_xfer; - } else { - sg_iovec_t iovec; - if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC)) - return -EFAULT; - p = iovec.iov_base; - count = (int) iovec.iov_len; - } - if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count)) - return -EFAULT; - if (up) - *up = p; - if (countp) - *countp = count; return 0; } @@ -1950,21 +1873,18 @@ static void sg_remove_scat(Sg_scatter_hold * schp) { SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); - if (schp->buffer && (schp->sglist_len > 0)) { - struct scatterlist *sg = schp->buffer; - + if (schp->pages && schp->sglist_len > 0) { if (!schp->dio_in_use) { int k; - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); - ++k, sg = sg_next(sg)) { + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { SCSI_LOG_TIMEOUT(5, printk( - "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", - k, sg_page(sg), sg->length)); - sg_page_free(sg_page(sg), sg->length); + "sg_remove_scat: k=%d, pg=0x%p\n", + k, schp->pages[k])); + __free_pages(schp->pages[k], schp->page_order); } - kfree(schp->buffer); + kfree(schp->pages); } } memset(schp, 0, sizeof (*schp)); @@ -1975,13 +1895,8 @@ sg_read_xfer(Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int num_xfer = 0; - int j, k, onum, usglen, ksglen, res; - int iovec_count = (int) hp->iovec_count; int dxfer_dir = hp->dxfer_direction; - unsigned char *p; - unsigned char __user *up; int new_interface = ('\0' == hp->interface_id) ? 0 : 1; if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir) @@ -1996,53 +1911,7 @@ sg_read_xfer(Sg_request * srp) return 0; SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, iovec_count, schp->k_use_sg)); - if (iovec_count) { - onum = iovec_count; - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) - return -EFAULT; - } else - onum = 1; - - p = page_address(sg_page(sg)); - ksglen = sg->length; - for (j = 0, k = 0; j < onum; ++j) { - res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up); - if (res) - return res; - - for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg_page(sg))) { - if (usglen <= 0) - break; - if (ksglen > usglen) { - if (usglen >= num_xfer) { - if (__copy_to_user(up, p, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_to_user(up, p, usglen)) - return -EFAULT; - p += usglen; - ksglen -= usglen; - break; - } else { - if (ksglen >= num_xfer) { - if (__copy_to_user(up, p, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_to_user(up, p, ksglen)) - return -EFAULT; - up += ksglen; - usglen -= ksglen; - } - ++k; - if (k >= schp->k_use_sg) - return 0; - } - } - + num_xfer, (int)hp->iovec_count, schp->k_use_sg)); return 0; } @@ -2050,7 +1919,6 @@ static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int k, num; SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n", @@ -2058,15 +1926,18 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) if ((!outp) || (num_read_xfer <= 0)) return 0; - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { - num = sg->length; + blk_rq_unmap_user(srp->bio); + srp->bio = NULL; + + num = 1 << (PAGE_SHIFT + schp->page_order); + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { - if (__copy_to_user(outp, page_address(sg_page(sg)), + if (__copy_to_user(outp, page_address(schp->pages[k]), num_read_xfer)) return -EFAULT; break; } else { - if (__copy_to_user(outp, page_address(sg_page(sg)), + if (__copy_to_user(outp, page_address(schp->pages[k]), num)) return -EFAULT; num_read_xfer -= num; @@ -2101,24 +1972,22 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) { Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; - struct scatterlist *sg = rsv_schp->buffer; int k, num, rem; srp->res_used = 1; SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size)); rem = size; - for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) { - num = sg->length; + num = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg; k++) { if (rem <= num) { - sfp->save_scat_len = num; - sg->length = rem; req_schp->k_use_sg = k + 1; req_schp->sglist_len = rsv_schp->sglist_len; - req_schp->buffer = rsv_schp->buffer; + req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; req_schp->b_malloc_len = rsv_schp->b_malloc_len; + req_schp->page_order = rsv_schp->page_order; break; } else rem -= num; @@ -2132,22 +2001,13 @@ static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) { Sg_scatter_hold *req_schp = &srp->data; - Sg_scatter_hold *rsv_schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n", (int) req_schp->k_use_sg)); - if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) { - struct scatterlist *sg = rsv_schp->buffer; - - if (sfp->save_scat_len > 0) - (sg + (req_schp->k_use_sg - 1))->length = - (unsigned) sfp->save_scat_len; - else - SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n")); - } req_schp->k_use_sg = 0; req_schp->bufflen = 0; - req_schp->buffer = NULL; + req_schp->pages = NULL; + req_schp->page_order = 0; req_schp->sglist_len = 0; sfp->save_scat_len = 0; srp->res_used = 0; @@ -2405,53 +2265,6 @@ sg_res_in_use(Sg_fd * sfp) return srp ? 1 : 0; } -/* The size fetched (value output via retSzp) set when non-NULL return */ -static struct page * -sg_page_malloc(int rqSz, int lowDma, int *retSzp) -{ - struct page *resp = NULL; - gfp_t page_mask; - int order, a_size; - int resSz; - - if ((rqSz <= 0) || (NULL == retSzp)) - return resp; - - if (lowDma) - page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN; - else - page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; - - for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; - order++, a_size <<= 1) ; - resSz = a_size; /* rounded up if necessary */ - resp = alloc_pages(page_mask, order); - while ((!resp) && order) { - --order; - a_size >>= 1; /* divide by 2, until PAGE_SIZE */ - resp = alloc_pages(page_mask, order); /* try half */ - resSz = a_size; - } - if (resp) { - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) - memset(page_address(resp), 0, resSz); - *retSzp = resSz; - } - return resp; -} - -static void -sg_page_free(struct page *page, int size) -{ - int order, a_size; - - if (!page) - return; - for (order = 0, a_size = PAGE_SIZE; a_size < size; - order++, a_size <<= 1) ; - __free_pages(page, order); -} - #ifdef CONFIG_SCSI_PROC_FS static int sg_idr_max_id(int id, void *p, void *data) -- cgit v1.2.3-70-g09d2 From 01cfcddd98f09e05a2e36031654ed46643b76f23 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 15:05:59 +0900 Subject: sg: use blk_rq_aligned helper function Signed-off-by: FUJITA Tomonori Cc: Douglas Gilbert Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d6391666502..ed69292babd 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1656,11 +1656,9 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; - unsigned long uaddr = (unsigned long)hp->dxferp; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; - unsigned long alignment = queue_dma_alignment(q) | q->dma_pad_mask; struct rq_map_data map_data; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); @@ -1676,7 +1674,7 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma) && - !(uaddr & alignment) && !(dxfer_len & alignment)) + blk_rq_aligned(q, hp->dxferp, dxfer_len)) return sg_build_direct(srp, sfp, dxfer_len); #endif if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) -- cgit v1.2.3-70-g09d2 From fad7f01e61bf737fe8a3740d803f000db57ecac6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 16:20:20 +0900 Subject: sg: set dxferp to NULL for READ with the older SG interface With the older SG interface, we don't know a user-space address to trasfer data when executing a SCSI command. So we can't pass a user-space address to blk_rq_map_user. This patch fixes sg to pass a NULL user-space address to blk_rq_map_user so that it just sets up a request and bios with page frames propely without data transfer. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ed69292babd..50c07bca727 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -615,7 +615,10 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) else hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; hp->dxfer_len = mxsize; - hp->dxferp = (char __user *)buf + cmd_size; + if (hp->dxfer_direction == SG_DXFER_TO_DEV) + hp->dxferp = (char __user *)buf + cmd_size; + else + hp->dxferp = NULL; hp->sbp = NULL; hp->timeout = old_hdr.reply_len; /* structure abuse ... */ hp->flags = input_size; /* structure abuse ... */ -- cgit v1.2.3-70-g09d2 From f98a8cae12f2b2a8f9bfd7a53c990a1a405e880e Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:35 -0600 Subject: SCSI sd driver calls revalidate_disk wrapper. Modify the SCSI disk driver to call the revalidate_disk() wrapper. This allows us to do some housekeeping such as accounting for a disk being resized online. The wrapper will call sd_revalidate_disk() at the appropriate time. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bcb04b2a767..cb115d1bf22 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -165,7 +165,7 @@ sd_store_cache_type(struct device *dev, struct device_attribute *attr, sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } - sd_revalidate_disk(sdkp->disk); + revalidate_disk(sdkp->disk); return count; } @@ -916,7 +916,7 @@ static void sd_rescan(struct device *dev) struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); if (sdkp) { - sd_revalidate_disk(sdkp->disk); + revalidate_disk(sdkp->disk); scsi_disk_put(sdkp); } } -- cgit v1.2.3-70-g09d2 From 242f9dcb8ba6f68fcd217a119a7648a4f69290e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:55:09 -0700 Subject: block: unify request timeout handling Right now SCSI and others do their own command timeout handling. Move those bits to the block layer. Instead of having a timer per command, we try to be a bit more clever and simply have one per-queue. This avoids the overhead of having to tear down and setup a timer for each command, so it will result in a lot less timer fiddling. Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- block/Makefile | 4 +- block/blk-core.c | 7 ++ block/blk-settings.c | 12 +++ block/blk-softirq.c | 30 ++++--- block/blk-timeout.c | 155 +++++++++++++++++++++++++++++++++++ block/blk.h | 24 ++++++ block/elevator.c | 8 ++ drivers/ata/libata-eh.c | 13 +-- drivers/ata/libata.h | 2 +- drivers/scsi/aacraid/aachba.c | 2 +- drivers/scsi/gdth.c | 60 +++++++++----- drivers/scsi/gdth.h | 2 +- drivers/scsi/gdth_proc.c | 66 --------------- drivers/scsi/gdth_proc.h | 3 - drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +- drivers/scsi/ide-scsi.c | 2 +- drivers/scsi/ipr.c | 3 +- drivers/scsi/ips.c | 2 +- drivers/scsi/libiscsi.c | 17 ++-- drivers/scsi/libsas/sas_ata.c | 2 +- drivers/scsi/libsas/sas_internal.h | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 30 +++---- drivers/scsi/megaraid/megaraid_sas.c | 6 +- drivers/scsi/ncr53c8xx.c | 4 +- drivers/scsi/qla1280.c | 4 +- drivers/scsi/qla4xxx/ql4_os.c | 4 +- drivers/scsi/scsi.c | 92 ++++----------------- drivers/scsi/scsi_error.c | 90 +++----------------- drivers/scsi/scsi_lib.c | 17 +++- drivers/scsi/scsi_priv.h | 7 +- drivers/scsi/scsi_sysfs.c | 7 +- drivers/scsi/scsi_transport_fc.c | 6 +- drivers/scsi/sd.c | 9 +- drivers/scsi/sr.c | 5 +- drivers/scsi/sym53c8xx_2/sym_glue.c | 4 +- include/linux/blkdev.h | 20 +++++ include/scsi/scsi_cmnd.h | 3 - include/scsi/scsi_host.h | 9 +- include/scsi/scsi_transport.h | 3 +- 39 files changed, 399 insertions(+), 339 deletions(-) create mode 100644 block/blk-timeout.c (limited to 'drivers') diff --git a/block/Makefile b/block/Makefile index 0da976ce67d..bfe73049f93 100644 --- a/block/Makefile +++ b/block/Makefile @@ -4,8 +4,8 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ - blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \ - scsi_ioctl.o cmd-filter.o + blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ + ioctl.o genhd.o scsi_ioctl.o cmd-filter.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-core.c b/block/blk-core.c index f25eb9786d9..d768a8ddc17 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -110,6 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) memset(rq, 0, sizeof(*rq)); INIT_LIST_HEAD(&rq->queuelist); + INIT_LIST_HEAD(&rq->timeout_list); rq->cpu = -1; rq->q = q; rq->sector = rq->hard_sector = (sector_t) -1; @@ -490,6 +491,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) } init_timer(&q->unplug_timer); + setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); + INIT_LIST_HEAD(&q->timeout_list); kobject_init(&q->kobj, &blk_queue_ktype); @@ -897,6 +900,8 @@ EXPORT_SYMBOL(blk_start_queueing); */ void blk_requeue_request(struct request_queue *q, struct request *rq) { + blk_delete_timer(rq); + blk_clear_rq_complete(rq); blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); if (blk_rq_tagged(rq)) @@ -1650,6 +1655,8 @@ static void end_that_request_last(struct request *req, int error) { struct gendisk *disk = req->rq_disk; + blk_delete_timer(req); + if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); diff --git a/block/blk-settings.c b/block/blk-settings.c index d70692badcd..1d0330d0b40 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -77,6 +77,18 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) } EXPORT_SYMBOL(blk_queue_softirq_done); +void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) +{ + q->rq_timeout = timeout; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); + +void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) +{ + q->rq_timed_out_fn = fn; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 3a1af551191..7ab344afb16 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -101,18 +101,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; -/** - * blk_complete_request - end I/O on a request - * @req: the request being processed - * - * Description: - * Ends all I/O on a request. It does not handle partial completions, - * unless the driver actually implements this in its completion callback - * through requeueing. The actual completion happens out-of-order, - * through a softirq handler. The user must have registered a completion - * callback through blk_queue_softirq_done(). - **/ -void blk_complete_request(struct request *req) +void __blk_complete_request(struct request *req) { struct request_queue *q = req->q; unsigned long flags; @@ -151,6 +140,23 @@ do_local: local_irq_restore(flags); } + +/** + * blk_complete_request - end I/O on a request + * @req: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions, + * unless the driver actually implements this in its completion callback + * through requeueing. The actual completion happens out-of-order, + * through a softirq handler. The user must have registered a completion + * callback through blk_queue_softirq_done(). + **/ +void blk_complete_request(struct request *req) +{ + if (!blk_mark_rq_complete(req)) + __blk_complete_request(req); +} EXPORT_SYMBOL(blk_complete_request); __init int blk_softirq_init(void) diff --git a/block/blk-timeout.c b/block/blk-timeout.c new file mode 100644 index 00000000000..b36d07bf0af --- /dev/null +++ b/block/blk-timeout.c @@ -0,0 +1,155 @@ +/* + * Functions related to generic timeout handling of requests. + */ +#include +#include +#include + +#include "blk.h" + +/* + * blk_delete_timer - Delete/cancel timer for a given function. + * @req: request that we are canceling timer for + * + */ +void blk_delete_timer(struct request *req) +{ + struct request_queue *q = req->q; + + /* + * Nothing to detach + */ + if (!q->rq_timed_out_fn || !req->deadline) + return; + + list_del_init(&req->timeout_list); + + if (list_empty(&q->timeout_list)) + del_timer(&q->timeout); +} + +static void blk_rq_timed_out(struct request *req) +{ + struct request_queue *q = req->q; + enum blk_eh_timer_return ret; + + ret = q->rq_timed_out_fn(req); + switch (ret) { + case BLK_EH_HANDLED: + __blk_complete_request(req); + break; + case BLK_EH_RESET_TIMER: + blk_clear_rq_complete(req); + blk_add_timer(req); + break; + case BLK_EH_NOT_HANDLED: + /* + * LLD handles this for now but in the future + * we can send a request msg to abort the command + * and we can move more of the generic scsi eh code to + * the blk layer. + */ + break; + default: + printk(KERN_ERR "block: bad eh return: %d\n", ret); + break; + } +} + +void blk_rq_timed_out_timer(unsigned long data) +{ + struct request_queue *q = (struct request_queue *) data; + unsigned long flags, uninitialized_var(next), next_set = 0; + struct request *rq, *tmp; + + spin_lock_irqsave(q->queue_lock, flags); + + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { + if (time_after_eq(jiffies, rq->deadline)) { + list_del_init(&rq->timeout_list); + + /* + * Check if we raced with end io completion + */ + if (blk_mark_rq_complete(rq)) + continue; + blk_rq_timed_out(rq); + } + if (!next_set) { + next = rq->deadline; + next_set = 1; + } else if (time_after(next, rq->deadline)) + next = rq->deadline; + } + + if (next_set && !list_empty(&q->timeout_list)) + mod_timer(&q->timeout, round_jiffies(next)); + + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/** + * blk_abort_request -- Request request recovery for the specified command + * @req: pointer to the request of interest + * + * This function requests that the block layer start recovery for the + * request by deleting the timer and calling the q's timeout function. + * LLDDs who implement their own error recovery MAY ignore the timeout + * event if they generated blk_abort_req. Must hold queue lock. + */ +void blk_abort_request(struct request *req) +{ + blk_delete_timer(req); + blk_rq_timed_out(req); +} +EXPORT_SYMBOL_GPL(blk_abort_request); + +/** + * blk_add_timer - Start timeout timer for a single request + * @req: request that is about to start running. + * + * Notes: + * Each request has its own timer, and as it is added to the queue, we + * set up the timer. When the request completes, we cancel the timer. + */ +void blk_add_timer(struct request *req) +{ + struct request_queue *q = req->q; + unsigned long expiry; + + if (!q->rq_timed_out_fn) + return; + + BUG_ON(!list_empty(&req->timeout_list)); + BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); + + if (req->timeout) + req->deadline = jiffies + req->timeout; + else { + req->deadline = jiffies + q->rq_timeout; + /* + * Some LLDs, like scsi, peek at the timeout to prevent + * a command from being retried forever. + */ + req->timeout = q->rq_timeout; + } + list_add_tail(&req->timeout_list, &q->timeout_list); + + /* + * If the timer isn't already pending or this timeout is earlier + * than an existing one, modify the timer. Round to next nearest + * second. + */ + expiry = round_jiffies(req->deadline); + + /* + * We use ->deadline == 0 to detect whether a timer was added or + * not, so just increase to next jiffy for that specific case + */ + if (unlikely(!req->deadline)) + req->deadline = 1; + + if (!timer_pending(&q->timeout) || + time_before(expiry, q->timeout.expires)) + mod_timer(&q->timeout, expiry); +} diff --git a/block/blk.h b/block/blk.h index de74254cb91..a4f4a50aefa 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,30 @@ void __blk_queue_free_tags(struct request_queue *q); void blk_unplug_work(struct work_struct *work); void blk_unplug_timeout(unsigned long data); +void blk_rq_timed_out_timer(unsigned long data); +void blk_delete_timer(struct request *); +void blk_add_timer(struct request *); + +/* + * Internal atomic flags for request handling + */ +enum rq_atomic_flags { + REQ_ATOM_COMPLETE = 0, +}; + +/* + * EH timer and IO completion will both attempt to 'grab' the request, make + * sure that only one of them suceeds + */ +static inline int blk_mark_rq_complete(struct request *rq) +{ + return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); +} + +static inline void blk_clear_rq_complete(struct request *rq) +{ + clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); +} struct io_context *current_io_context(gfp_t gfp_flags, int node); diff --git a/block/elevator.c b/block/elevator.c index 8e3fc3afc77..a91fc59edd0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -36,6 +36,8 @@ #include #include +#include "blk.h" + static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); @@ -771,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q) */ rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + + /* + * We are now handing the request to the hardware, + * add the timeout handler + */ + blk_add_timer(rq); } if (!q->boundary_rq || q->boundary_rq == rq) { diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c1db2f234d2..bd0b2bc76f1 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -33,6 +33,7 @@ */ #include +#include #include #include #include @@ -457,29 +458,29 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, * RETURNS: * EH_HANDLED or EH_NOT_HANDLED */ -enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct ata_port *ap = ata_shost_to_port(host); unsigned long flags; struct ata_queued_cmd *qc; - enum scsi_eh_timer_return ret; + enum blk_eh_timer_return ret; DPRINTK("ENTER\n"); if (ap->ops->error_handler) { - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; goto out; } - ret = EH_HANDLED; + ret = BLK_EH_HANDLED; spin_lock_irqsave(ap->lock, flags); qc = ata_qc_from_tag(ap, ap->link.active_tag); if (qc) { WARN_ON(qc->scsicmd != cmd); qc->flags |= ATA_QCFLAG_EH_SCHEDULED; qc->err_mask |= AC_ERR_TIMEOUT; - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; } spin_unlock_irqrestore(ap->lock, flags); @@ -831,7 +832,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * Note that ATA_QCFLAG_FAILED is unconditionally set after * this function completes. */ - scsi_req_abort_cmd(qc->scsicmd); + blk_abort_request(qc->scsicmd->request); } /** diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index ade5c75b614..24f5005478b 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -152,7 +152,7 @@ extern int ata_bus_probe(struct ata_port *ap); /* libata-eh.c */ extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); -extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); +extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); extern void ata_scsi_error(struct Scsi_Host *host); extern void ata_port_wait_eh(struct ata_port *ap); extern void ata_eh_fastdrain_timerfn(unsigned long arg); diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index aa4e77c2527..8abfd06b5a7 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1139,7 +1139,7 @@ static struct aac_srb * aac_scsi_common(struct fib * fib, struct scsi_cmnd * cmd srbcmd->id = cpu_to_le32(scmd_id(cmd)); srbcmd->lun = cpu_to_le32(cmd->device->lun); srbcmd->flags = cpu_to_le32(flag); - timeout = cmd->timeout_per_command/HZ; + timeout = cmd->request->timeout/HZ; if (timeout == 0) timeout = 1; srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 822d5214692..c387c15a212 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -464,7 +464,6 @@ int __gdth_execute(struct scsi_device *sdev, gdth_cmd_str *gdtcmd, char *cmnd, /* use request field to save the ptr. to completion struct. */ scp->request = (struct request *)&wait; - scp->timeout_per_command = timeout*HZ; scp->cmd_len = 12; scp->cmnd = cmnd; cmndinfo.priority = IOCTL_PRI; @@ -1995,23 +1994,12 @@ static void gdth_putq(gdth_ha_str *ha, Scsi_Cmnd *scp, unchar priority) register Scsi_Cmnd *pscp; register Scsi_Cmnd *nscp; ulong flags; - unchar b, t; TRACE(("gdth_putq() priority %d\n",priority)); spin_lock_irqsave(&ha->smp_lock, flags); - if (!cmndinfo->internal_command) { + if (!cmndinfo->internal_command) cmndinfo->priority = priority; - b = scp->device->channel; - t = scp->device->id; - if (priority >= DEFAULT_PRI) { - if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) || - (b==ha->virt_bus && thdr[t].lock)) { - TRACE2(("gdth_putq(): locked IO ->update_timeout()\n")); - cmndinfo->timeout = gdth_update_timeout(scp, 0); - } - } - } if (ha->req_first==NULL) { ha->req_first = scp; /* queue was empty */ @@ -3899,6 +3887,39 @@ static const char *gdth_info(struct Scsi_Host *shp) return ((const char *)ha->binfo.type_string); } +static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp) +{ + gdth_ha_str *ha = shost_priv(scp->device->host); + struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); + unchar b, t; + ulong flags; + enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED; + + TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__)); + b = scp->device->channel; + t = scp->device->id; + + /* + * We don't really honor the command timeout, but we try to + * honor 6 times of the actual command timeout! So reset the + * timer if this is less than 6th timeout on this command! + */ + if (++cmndinfo->timeout_count < 6) + retval = BLK_EH_RESET_TIMER; + + /* Reset the timeout if it is locked IO */ + spin_lock_irqsave(&ha->smp_lock, flags); + if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) || + (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) { + TRACE2(("%s(): locked IO, reset timeout\n", __func__)); + retval = BLK_EH_RESET_TIMER; + } + spin_unlock_irqrestore(&ha->smp_lock, flags); + + return retval; +} + + static int gdth_eh_bus_reset(Scsi_Cmnd *scp) { gdth_ha_str *ha = shost_priv(scp->device->host); @@ -3992,7 +4013,7 @@ static int gdth_queuecommand(struct scsi_cmnd *scp, BUG_ON(!cmndinfo); scp->scsi_done = done; - gdth_update_timeout(scp, scp->timeout_per_command * 6); + cmndinfo->timeout_count = 0; cmndinfo->priority = DEFAULT_PRI; return __gdth_queuecommand(ha, scp, cmndinfo); @@ -4096,12 +4117,10 @@ static int ioc_lockdrv(void __user *arg) ha->hdr[j].lock = 1; spin_unlock_irqrestore(&ha->smp_lock, flags); gdth_wait_completion(ha, ha->bus_cnt, j); - gdth_stop_timeout(ha, ha->bus_cnt, j); } else { spin_lock_irqsave(&ha->smp_lock, flags); ha->hdr[j].lock = 0; spin_unlock_irqrestore(&ha->smp_lock, flags); - gdth_start_timeout(ha, ha->bus_cnt, j); gdth_next(ha); } } @@ -4539,18 +4558,14 @@ static int gdth_ioctl(struct inode *inode, struct file *filep, spin_lock_irqsave(&ha->smp_lock, flags); ha->raw[i].lock = 1; spin_unlock_irqrestore(&ha->smp_lock, flags); - for (j = 0; j < ha->tid_cnt; ++j) { + for (j = 0; j < ha->tid_cnt; ++j) gdth_wait_completion(ha, i, j); - gdth_stop_timeout(ha, i, j); - } } else { spin_lock_irqsave(&ha->smp_lock, flags); ha->raw[i].lock = 0; spin_unlock_irqrestore(&ha->smp_lock, flags); - for (j = 0; j < ha->tid_cnt; ++j) { - gdth_start_timeout(ha, i, j); + for (j = 0; j < ha->tid_cnt; ++j) gdth_next(ha); - } } } break; @@ -4644,6 +4659,7 @@ static struct scsi_host_template gdth_template = { .slave_configure = gdth_slave_configure, .bios_param = gdth_bios_param, .proc_info = gdth_proc_info, + .eh_timed_out = gdth_timed_out, .proc_name = "gdth", .can_queue = GDTH_MAXCMDS, .this_id = -1, diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h index ca92476727c..1646444e9bd 100644 --- a/drivers/scsi/gdth.h +++ b/drivers/scsi/gdth.h @@ -916,7 +916,7 @@ typedef struct { gdth_cmd_str *internal_cmd_str; /* crier for internal messages*/ dma_addr_t sense_paddr; /* sense dma-addr */ unchar priority; - int timeout; + int timeout_count; /* # of timeout calls */ volatile int wait_for_completion; ushort status; ulong32 info; diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c index ce0228e26ae..59349a316e1 100644 --- a/drivers/scsi/gdth_proc.c +++ b/drivers/scsi/gdth_proc.c @@ -748,69 +748,3 @@ static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id) } spin_unlock_irqrestore(&ha->smp_lock, flags); } - -static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id) -{ - ulong flags; - Scsi_Cmnd *scp; - unchar b, t; - - spin_lock_irqsave(&ha->smp_lock, flags); - - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); - if (!cmndinfo->internal_command) { - b = scp->device->channel; - t = scp->device->id; - if (t == (unchar)id && b == (unchar)busnum) { - TRACE2(("gdth_stop_timeout(): update_timeout()\n")); - cmndinfo->timeout = gdth_update_timeout(scp, 0); - } - } - } - spin_unlock_irqrestore(&ha->smp_lock, flags); -} - -static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id) -{ - ulong flags; - Scsi_Cmnd *scp; - unchar b, t; - - spin_lock_irqsave(&ha->smp_lock, flags); - - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); - if (!cmndinfo->internal_command) { - b = scp->device->channel; - t = scp->device->id; - if (t == (unchar)id && b == (unchar)busnum) { - TRACE2(("gdth_start_timeout(): update_timeout()\n")); - gdth_update_timeout(scp, cmndinfo->timeout); - } - } - } - spin_unlock_irqrestore(&ha->smp_lock, flags); -} - -static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout) -{ - int oldto; - - oldto = scp->timeout_per_command; - scp->timeout_per_command = timeout; - - if (timeout == 0) { - del_timer(&scp->eh_timeout); - scp->eh_timeout.data = (unsigned long) NULL; - scp->eh_timeout.expires = 0; - } else { - if (scp->eh_timeout.data != (unsigned long) NULL) - del_timer(&scp->eh_timeout); - scp->eh_timeout.data = (unsigned long) scp; - scp->eh_timeout.expires = jiffies + timeout; - add_timer(&scp->eh_timeout); - } - - return oldto; -} diff --git a/drivers/scsi/gdth_proc.h b/drivers/scsi/gdth_proc.h index 45e6fdacf36..9b900cc9ebe 100644 --- a/drivers/scsi/gdth_proc.h +++ b/drivers/scsi/gdth_proc.h @@ -20,9 +20,6 @@ static char *gdth_ioctl_alloc(gdth_ha_str *ha, int size, int scratch, ulong64 *paddr); static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr); static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id); -static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id); -static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id); -static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout); #endif diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 7b1502c0ab6..87e09f35d3d 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -756,7 +756,7 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd, init_event_struct(evt_struct, handle_cmd_rsp, VIOSRP_SRP_FORMAT, - cmnd->timeout_per_command/HZ); + cmnd->request->timeout/HZ); evt_struct->cmnd = cmnd; evt_struct->cmnd_done = done; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 461331d3dc4..81c16cba541 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -612,7 +612,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd, pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); pc->scsi_cmd = cmd; pc->done = done; - pc->timeout = jiffies + cmd->timeout_per_command; + pc->timeout = jiffies + cmd->request->timeout; if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index e7a3a655442..d30eb7ba018 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3670,7 +3670,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) sdev->no_uld_attach = 1; } if (ipr_is_vset_device(res)) { - sdev->timeout = IPR_VSET_RW_TIMEOUT; + blk_queue_rq_timeout(sdev->request_queue, + IPR_VSET_RW_TIMEOUT); blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS); } if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res)) diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index bc9e6ddf41d..ef683f0d2b5 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -3818,7 +3818,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb) scb->cmd.dcdb.segment_4G = 0; scb->cmd.dcdb.enhanced_sg = 0; - TimeOut = scb->scsi_cmd->timeout_per_command; + TimeOut = scb->scsi_cmd->request->timeout; if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */ if (!scb->sg_len) { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 299e075a7b3..1eca82420aa 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1476,12 +1476,12 @@ static void iscsi_start_tx(struct iscsi_conn *conn) scsi_queue_work(conn->session->host, &conn->xmitwork); } -static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) +static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct iscsi_conn *conn; - enum scsi_eh_timer_return rc = EH_NOT_HANDLED; + enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; cls_session = starget_to_session(scsi_target(scmd->device)); session = cls_session->dd_data; @@ -1494,14 +1494,14 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) * We are probably in the middle of iscsi recovery so let * that complete and handle the error. */ - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; goto done; } conn = session->leadconn; if (!conn) { /* In the middle of shuting down */ - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; goto done; } @@ -1513,20 +1513,21 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) */ if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; /* * if we are about to check the transport then give the command * more time */ if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ), jiffies)) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; /* if in the middle of checking the transport then give us more time */ if (conn->ping_task) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; done: spin_unlock(&session->lock); - debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh"); + debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ? + "timer reset" : "nh"); return rc; } diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 48ee8c7f5bd..837b095ba90 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -398,7 +398,7 @@ void sas_ata_task_abort(struct sas_task *task) /* Bounce SCSI-initiated commands to the SCSI EH */ if (qc->scsicmd) { - scsi_req_abort_cmd(qc->scsicmd); + blk_abort_request(qc->scsicmd->request); scsi_schedule_eh(qc->scsicmd->device->host); return; } diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index b4f9368f116..0001374bd6b 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -55,7 +55,7 @@ void sas_unregister_phys(struct sas_ha_struct *sas_ha); int sas_register_ports(struct sas_ha_struct *sas_ha); void sas_unregister_ports(struct sas_ha_struct *sas_ha); -enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); +enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); int sas_init_queue(struct sas_ha_struct *sas_ha); int sas_init_events(struct sas_ha_struct *sas_ha); diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index a8e3ef30907..744838780ad 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -673,43 +673,43 @@ out: return; } -enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) { struct sas_task *task = TO_SAS_TASK(cmd); unsigned long flags; if (!task) { - cmd->timeout_per_command /= 2; + cmd->request->timeout /= 2; SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n", - cmd, task, (cmd->timeout_per_command ? - "EH_RESET_TIMER" : "EH_NOT_HANDLED")); - if (!cmd->timeout_per_command) - return EH_NOT_HANDLED; - return EH_RESET_TIMER; + cmd, task, (cmd->request->timeout ? + "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED")); + if (!cmd->request->timeout) + return BLK_EH_NOT_HANDLED; + return BLK_EH_RESET_TIMER; } spin_lock_irqsave(&task->task_state_lock, flags); BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED); if (task->task_state_flags & SAS_TASK_STATE_DONE) { spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", - cmd, task); - return EH_HANDLED; + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: " + "BLK_EH_HANDLED\n", cmd, task); + return BLK_EH_HANDLED; } if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) { spin_unlock_irqrestore(&task->task_state_lock, flags); SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: " - "EH_RESET_TIMER\n", + "BLK_EH_RESET_TIMER\n", cmd, task); - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; } task->task_state_flags |= SAS_TASK_STATE_ABORTED; spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n", + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n", cmd, task); - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) @@ -1039,7 +1039,7 @@ void sas_task_abort(struct sas_task *task) return; } - scsi_req_abort_cmd(sc); + blk_abort_request(sc->request); scsi_schedule_eh(sc->device->host); } diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index 97b763378e7..afe1de99876 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -1167,7 +1167,7 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) * cmd has not been completed within the timeout period. */ static enum -scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) +blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) { struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr; struct megasas_instance *instance; @@ -1175,7 +1175,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) if (time_after(jiffies, scmd->jiffies_at_alloc + (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) { - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } instance = cmd->instance; @@ -1189,7 +1189,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) spin_unlock_irqrestore(instance->host->host_lock, flags); } - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; } /** diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index c57c94c0ffd..3b7240e4081 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -4170,8 +4170,8 @@ static int ncr_queue_command (struct ncb *np, struct scsi_cmnd *cmd) ** **---------------------------------------------------- */ - if (np->settle_time && cmd->timeout_per_command >= HZ) { - u_long tlimit = jiffies + cmd->timeout_per_command - HZ; + if (np->settle_time && cmd->request->timeout >= HZ) { + u_long tlimit = jiffies + cmd->request->timeout - HZ; if (time_after(np->settle_time, tlimit)) np->settle_time = tlimit; } diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 37f9ba0cd79..b6cd12b2e99 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2845,7 +2845,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); /* Set ISP command timeout. */ - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); /* Set device target ID and LUN */ pkt->lun = SCSI_LUN_32(cmd); @@ -3114,7 +3114,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); /* Set ISP command timeout. */ - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); /* Set device target ID and LUN */ pkt->lun = SCSI_LUN_32(cmd); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 88bebb13bc5..de8279ad7d8 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1542,7 +1542,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) DEBUG2(printk(KERN_INFO "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x," "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no, - cmd, jiffies, cmd->timeout_per_command / HZ, + cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); /* FIXME: wait for hba to go online */ @@ -1598,7 +1598,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) DEBUG2(printk(KERN_INFO "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, " "to=%x,dpc_flags=%lx, status=%x allowed=%d\n", - ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ, + ha->host_no, cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); stat = qla4xxx_reset_target(ha, ddb_entry); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index ee6be596503..dbeb86cafc0 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -291,7 +291,6 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) unsigned long flags; cmd->device = dev; - init_timer(&cmd->eh_timeout); INIT_LIST_HEAD(&cmd->list); spin_lock_irqsave(&dev->list_lock, flags); list_add_tail(&cmd->list, &dev->cmd_list); @@ -652,14 +651,19 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) unsigned long timeout; int rtn = 0; + /* + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. + */ + atomic_inc(&cmd->device->iorequest_cnt); + /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT * returns an immediate error upwards, and signals * that the device is no longer present */ cmd->result = DID_NO_CONNECT << 16; - atomic_inc(&cmd->device->iorequest_cnt); - __scsi_done(cmd); + scsi_done(cmd); /* return 0 (because the command has been processed) */ goto out; } @@ -672,6 +676,7 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) * future requests should not occur until the device * transitions out of the suspend state. */ + scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n")); @@ -714,20 +719,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) host->resetting = 0; } - /* - * AK: unlikely race here: for some reason the timer could - * expire before the serial number is set up below. - */ - scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out); - scsi_log_send(cmd); - /* - * We will use a queued command if possible, otherwise we will - * emulate the queuing and calling of completion function ourselves. - */ - atomic_inc(&cmd->device->iorequest_cnt); - /* * Before we queue this command, check if the command * length exceeds what the host adapter can handle. @@ -744,6 +737,12 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } spin_lock_irqsave(host->host_lock, flags); + /* + * AK: unlikely race here: for some reason the timer could + * expire before the serial number is set up below. + * + * TODO: kill serial or move to blk layer + */ scsi_cmd_get_serial(host, cmd); if (unlikely(host->shost_state == SHOST_DEL)) { @@ -754,12 +753,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } spin_unlock_irqrestore(host->host_lock, flags); if (rtn) { - if (scsi_delete_timer(cmd)) { - atomic_inc(&cmd->device->iodone_cnt); - scsi_queue_insert(cmd, - (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? - rtn : SCSI_MLQUEUE_HOST_BUSY); - } + scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? + rtn : SCSI_MLQUEUE_HOST_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } @@ -769,24 +764,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return rtn; } -/** - * scsi_req_abort_cmd -- Request command recovery for the specified command - * @cmd: pointer to the SCSI command of interest - * - * This function requests that SCSI Core start recovery for the - * command by deleting the timer and adding the command to the eh - * queue. It can be called by either LLDDs or SCSI Core. LLDDs who - * implement their own error recovery MAY ignore the timeout event if - * they generated scsi_req_abort_cmd. - */ -void scsi_req_abort_cmd(struct scsi_cmnd *cmd) -{ - if (!scsi_delete_timer(cmd)) - return; - scsi_times_out(cmd); -} -EXPORT_SYMBOL(scsi_req_abort_cmd); - /** * scsi_done - Enqueue the finished SCSI command into the done queue. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives @@ -802,42 +779,7 @@ EXPORT_SYMBOL(scsi_req_abort_cmd); */ static void scsi_done(struct scsi_cmnd *cmd) { - /* - * We don't have to worry about this one timing out anymore. - * If we are unable to remove the timer, then the command - * has already timed out. In which case, we have no choice but to - * let the timeout function run, as we have no idea where in fact - * that function could really be. It might be on another processor, - * etc, etc. - */ - if (!scsi_delete_timer(cmd)) - return; - __scsi_done(cmd); -} - -/* Private entry to scsi_done() to complete a command when the timer - * isn't running --- used by scsi_times_out */ -void __scsi_done(struct scsi_cmnd *cmd) -{ - struct request *rq = cmd->request; - - /* - * Set the serial numbers back to zero - */ - cmd->serial_number = 0; - - atomic_inc(&cmd->device->iodone_cnt); - if (cmd->result) - atomic_inc(&cmd->device->ioerr_cnt); - - BUG_ON(!rq); - - /* - * The uptodate/nbytes values don't matter, as we allow partial - * completes and thus will check this in the softirq callback - */ - rq->completion_data = cmd; - blk_complete_request(rq); + blk_complete_request(cmd->request); } /* Move this to a header if it becomes more generally useful */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 39ce3aba1da..fecefa05cb6 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -111,70 +111,9 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) return ret; } -/** - * scsi_add_timer - Start timeout timer for a single scsi command. - * @scmd: scsi command that is about to start running. - * @timeout: amount of time to allow this command to run. - * @complete: timeout function to call if timer isn't canceled. - * - * Notes: - * This should be turned into an inline function. Each scsi command - * has its own timer, and as it is added to the queue, we set up the - * timer. When the command completes, we cancel the timer. - */ -void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, - void (*complete)(struct scsi_cmnd *)) -{ - - /* - * If the clock was already running for this command, then - * first delete the timer. The timer handling code gets rather - * confused if we don't do this. - */ - if (scmd->eh_timeout.function) - del_timer(&scmd->eh_timeout); - - scmd->eh_timeout.data = (unsigned long)scmd; - scmd->eh_timeout.expires = jiffies + timeout; - scmd->eh_timeout.function = (void (*)(unsigned long)) complete; - - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" - " %d, (%p)\n", __func__, - scmd, timeout, complete)); - - add_timer(&scmd->eh_timeout); -} - -/** - * scsi_delete_timer - Delete/cancel timer for a given function. - * @scmd: Cmd that we are canceling timer for - * - * Notes: - * This should be turned into an inline function. - * - * Return value: - * 1 if we were able to detach the timer. 0 if we blew it, and the - * timer function has already started to run. - */ -int scsi_delete_timer(struct scsi_cmnd *scmd) -{ - int rtn; - - rtn = del_timer(&scmd->eh_timeout); - - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," - " rtn: %d\n", __func__, - scmd, rtn)); - - scmd->eh_timeout.data = (unsigned long)NULL; - scmd->eh_timeout.function = NULL; - - return rtn; -} - /** * scsi_times_out - Timeout function for normal scsi commands. - * @scmd: Cmd that is timing out. + * @req: request that is timing out. * * Notes: * We do not need to lock this. There is the potential for a race @@ -182,9 +121,11 @@ int scsi_delete_timer(struct scsi_cmnd *scmd) * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ -void scsi_times_out(struct scsi_cmnd *scmd) +enum blk_eh_timer_return scsi_times_out(struct request *req) { - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + struct scsi_cmnd *scmd = req->special; + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; scsi_log_completion(scmd, TIMEOUT_ERROR); @@ -196,22 +137,20 @@ void scsi_times_out(struct scsi_cmnd *scmd) eh_timed_out = NULL; if (eh_timed_out) - switch (eh_timed_out(scmd)) { - case EH_HANDLED: - __scsi_done(scmd); - return; - case EH_RESET_TIMER: - scsi_add_timer(scmd, scmd->timeout_per_command, - scsi_times_out); - return; - case EH_NOT_HANDLED: + rtn = eh_timed_out(scmd); + switch (rtn) { + case BLK_EH_NOT_HANDLED: break; + default: + return rtn; } if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { scmd->result |= DID_TIME_OUT << 16; - __scsi_done(scmd); + return BLK_EH_HANDLED; } + + return BLK_EH_NOT_HANDLED; } /** @@ -1793,7 +1732,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) blk_rq_init(NULL, &req); scmd->request = &req; - memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); scmd->cmnd = req.cmd; @@ -1804,8 +1742,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) scmd->sc_data_direction = DMA_BIDIRECTIONAL; - init_timer(&scmd->eh_timeout); - spin_lock_irqsave(shost->host_lock, flags); shost->tmf_in_progress = 1; spin_unlock_irqrestore(shost->host_lock, flags); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 62307bd794a..e7686500e9d 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1181,7 +1181,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) cmd->transfersize = req->data_len; cmd->allowed = req->retries; - cmd->timeout_per_command = req->timeout; return BLKPREP_OK; } EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); @@ -1416,17 +1415,26 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) spin_unlock(shost->host_lock); spin_lock(sdev->request_queue->queue_lock); - __scsi_done(cmd); + blk_complete_request(req); } static void scsi_softirq_done(struct request *rq) { - struct scsi_cmnd *cmd = rq->completion_data; - unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command; + struct scsi_cmnd *cmd = rq->special; + unsigned long wait_for = (cmd->allowed + 1) * rq->timeout; int disposition; INIT_LIST_HEAD(&cmd->eh_entry); + /* + * Set the serial numbers back to zero + */ + cmd->serial_number = 0; + + atomic_inc(&cmd->device->iodone_cnt); + if (cmd->result) + atomic_inc(&cmd->device->ioerr_cnt); + disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { @@ -1675,6 +1683,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) blk_queue_prep_rq(q, scsi_prep_fn); blk_queue_softirq_done(q, scsi_softirq_done); + blk_queue_rq_timed_out(q, scsi_times_out); return q; } diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 79f0f751120..6cddd5dd323 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -4,6 +4,7 @@ #include struct request_queue; +struct request; struct scsi_cmnd; struct scsi_device; struct scsi_host_template; @@ -27,7 +28,6 @@ extern void scsi_exit_hosts(void); extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd); extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); -extern void __scsi_done(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd); void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); @@ -49,10 +49,7 @@ extern int __init scsi_init_devinfo(void); extern void scsi_exit_devinfo(void); /* scsi_error.c */ -extern void scsi_add_timer(struct scsi_cmnd *, int, - void (*)(struct scsi_cmnd *)); -extern int scsi_delete_timer(struct scsi_cmnd *); -extern void scsi_times_out(struct scsi_cmnd *cmd); +extern enum blk_eh_timer_return scsi_times_out(struct request *req); extern int scsi_error_handler(void *host); extern int scsi_decide_disposition(struct scsi_cmnd *cmd); extern void scsi_eh_wakeup(struct Scsi_Host *shost); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index ab3c71869be..7f618ee5ece 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -560,12 +560,15 @@ sdev_rd_attr (vendor, "%.8s\n"); sdev_rd_attr (model, "%.16s\n"); sdev_rd_attr (rev, "%.4s\n"); +/* + * TODO: can we make these symlinks to the block layer ones? + */ static ssize_t sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); - return snprintf (buf, 20, "%d\n", sdev->timeout / HZ); + return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ); } static ssize_t @@ -576,7 +579,7 @@ sdev_store_timeout (struct device *dev, struct device_attribute *attr, int timeout; sdev = to_scsi_device(dev); sscanf (buf, "%d\n", &timeout); - sdev->timeout = timeout * HZ; + blk_queue_rq_timeout(sdev->request_queue, timeout * HZ); return count; } static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 56823fd1fb8..9168883d0df 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -1950,15 +1950,15 @@ static int fc_vport_match(struct attribute_container *cont, * Notes: * This routine assumes no locks are held on entry. */ -static enum scsi_eh_timer_return +static enum blk_eh_timer_return fc_timed_out(struct scsi_cmnd *scmd) { struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); if (rport->port_state == FC_PORTSTATE_BLOCKED) - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cb115d1bf22..c0cf4acda7d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -383,7 +383,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) sector_t block = rq->sector; sector_t threshold; unsigned int this_count = rq->nr_sectors; - unsigned int timeout = sdp->timeout; int ret; if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { @@ -584,7 +583,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) SCpnt->transfersize = sdp->sector_size; SCpnt->underflow = this_count << 9; SCpnt->allowed = SD_MAX_RETRIES; - SCpnt->timeout_per_command = timeout; /* * This indicates that the command is ready from our end to be @@ -1878,11 +1876,12 @@ static int sd_probe(struct device *dev) sdkp->openers = 0; sdkp->previous_state = 1; - if (!sdp->timeout) { + if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) - sdp->timeout = SD_TIMEOUT; + blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); else - sdp->timeout = SD_MOD_TIMEOUT; + blk_queue_rq_timeout(sdp->request_queue, + SD_MOD_TIMEOUT); } device_initialize(&sdkp->dev); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 8dbe3798d5f..0f17009c99d 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -331,7 +331,7 @@ static int sr_done(struct scsi_cmnd *SCpnt) static int sr_prep_fn(struct request_queue *q, struct request *rq) { - int block=0, this_count, s_size, timeout = SR_TIMEOUT; + int block = 0, this_count, s_size; struct scsi_cd *cd; struct scsi_cmnd *SCpnt; struct scsi_device *sdp = q->queuedata; @@ -461,7 +461,6 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) SCpnt->transfersize = cd->device->sector_size; SCpnt->underflow = this_count << 9; SCpnt->allowed = MAX_RETRIES; - SCpnt->timeout_per_command = timeout; /* * This indicates that the command is ready from our end to be @@ -620,6 +619,8 @@ static int sr_probe(struct device *dev) disk->fops = &sr_bdops; disk->flags = GENHD_FL_CD; + blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); + cd->device = sdev; cd->disk = disk; cd->driver = &sr_template; diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index d39107b7669..f4e6cde1fd0 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -519,8 +519,8 @@ static int sym53c8xx_queue_command(struct scsi_cmnd *cmd, * Shorten our settle_time if needed for * this command not to time out. */ - if (np->s.settle_time_valid && cmd->timeout_per_command) { - unsigned long tlimit = jiffies + cmd->timeout_per_command; + if (np->s.settle_time_valid && cmd->request->timeout) { + unsigned long tlimit = jiffies + cmd->request->timeout; tlimit -= SYM_CONF_TIMER_INTERVAL*2; if (time_after(np->s.settle_time, tlimit)) { np->s.settle_time = tlimit; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c254926042..067f28b8007 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -147,6 +147,7 @@ struct request { unsigned int cmd_flags; enum rq_cmd_type_bits cmd_type; + unsigned long atomic_flags; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! @@ -214,6 +215,8 @@ struct request { void *data; void *sense; + unsigned long deadline; + struct list_head timeout_list; unsigned int timeout; int retries; @@ -266,6 +269,14 @@ typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); +enum blk_eh_timer_return { + BLK_EH_NOT_HANDLED, + BLK_EH_HANDLED, + BLK_EH_RESET_TIMER, +}; + +typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); + enum blk_queue_state { Queue_down, Queue_up, @@ -311,6 +322,7 @@ struct request_queue merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; + rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; /* @@ -386,6 +398,10 @@ struct request_queue unsigned int nr_sorted; unsigned int in_flight; + unsigned int rq_timeout; + struct timer_list timeout; + struct list_head timeout_list; + /* * sg stuff */ @@ -770,6 +786,8 @@ extern int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); +extern void __blk_complete_request(struct request *); +extern void blk_abort_request(struct request *); /* * blk_end_request() takes bytes instead of sectors as a complete size. @@ -811,6 +829,8 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); +extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index f9f6e793575..855bf95963e 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -75,7 +75,6 @@ struct scsi_cmnd { int retries; int allowed; - int timeout_per_command; unsigned char prot_op; unsigned char prot_type; @@ -86,7 +85,6 @@ struct scsi_cmnd { /* These elements define the operation we are about to perform */ unsigned char *cmnd; - struct timer_list eh_timeout; /* Used to time out the command. */ /* These elements define the operation we ultimately want to perform */ struct scsi_data_buffer sdb; @@ -139,7 +137,6 @@ extern void scsi_put_command(struct scsi_cmnd *); extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *, struct device *); extern void scsi_finish_command(struct scsi_cmnd *cmd); -extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd); extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 44a55d1bf53..d123ca84e73 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -43,13 +43,6 @@ struct blk_queue_tags; #define DISABLE_CLUSTERING 0 #define ENABLE_CLUSTERING 1 -enum scsi_eh_timer_return { - EH_NOT_HANDLED, - EH_HANDLED, - EH_RESET_TIMER, -}; - - struct scsi_host_template { struct module *module; const char *name; @@ -347,7 +340,7 @@ struct scsi_host_template { * * Status: OPTIONAL */ - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); /* * Name of proc directory diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h index 490bd13a634..0de32cd4e8a 100644 --- a/include/scsi/scsi_transport.h +++ b/include/scsi/scsi_transport.h @@ -21,6 +21,7 @@ #define SCSI_TRANSPORT_H #include +#include #include #include @@ -64,7 +65,7 @@ struct scsi_transport_template { * begin counting again * EH_NOT_HANDLED Begin normal error recovery */ - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); /* * Used as callback for the completion of i_t_nexus request -- cgit v1.2.3-70-g09d2 From 224cb3e981f1b2f9f93dbd49eaef505d17d894c2 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Fri, 29 Aug 2008 09:36:09 +0200 Subject: dm: Call blk_abort_queue on failed paths Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- drivers/md/dm-mpath.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28b4c7..3d3848132c6 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -33,6 +33,7 @@ struct pgpath { unsigned fail_count; /* Cumulative failure count */ struct dm_path path; + struct work_struct deactivate_path; }; #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) @@ -112,6 +113,7 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void process_queued_ios(struct work_struct *work); static void trigger_event(struct work_struct *work); static void activate_path(struct work_struct *work); +static void deactivate_path(struct work_struct *work); /*----------------------------------------------- @@ -122,8 +124,10 @@ static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); - if (pgpath) + if (pgpath) { pgpath->path.is_active = 1; + INIT_WORK(&pgpath->deactivate_path, deactivate_path); + } return pgpath; } @@ -133,6 +137,14 @@ static void free_pgpath(struct pgpath *pgpath) kfree(pgpath); } +static void deactivate_path(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, deactivate_path); + + blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); +} + static struct priority_group *alloc_priority_group(void) { struct priority_group *pg; @@ -870,6 +882,7 @@ static int fail_path(struct pgpath *pgpath) pgpath->path.dev->name, m->nr_valid_paths); queue_work(kmultipathd, &m->trigger_event); + queue_work(kmultipathd, &pgpath->deactivate_path); out: spin_unlock_irqrestore(&m->lock, flags); -- cgit v1.2.3-70-g09d2 From a91a3a20e06621b9931793888583efe37db4e4e8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:01 +0900 Subject: sg: rename sg_cmd_done sg_rq_end_io old sg_rq_end_io() was used to wrap sg_cmd_done during converting sg to use the block layer (in order to cover the difference scsi_execute_async and blk_execute_rq_nowait). Now we don't need it so let's remove it. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 50c07bca727..d18f90d1d9a 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ static int sg_fasync(int fd, struct file *filp, int mode); /* tasklet or soft irq callback */ -static void sg_cmd_done(void *data, char *sense, int result, int resid); +static void sg_rq_end_io(struct request *rq, int uptodate); static int sg_start_req(Sg_request *srp, unsigned char *cmd); static void sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); @@ -227,11 +227,6 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) cmd, filp->f_mode & FMODE_WRITE); } -static void sg_rq_end_io(struct request *rq, int uptodate) -{ - sg_cmd_done(rq->end_io_data, rq->sense, rq->errors, rq->data_len); -} - static int sg_open(struct inode *inode, struct file *filp) { @@ -1257,16 +1252,19 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } -/* This function is a "bottom half" handler that is called by the - * mid level when a command is completed (or has failed). */ -static void -sg_cmd_done(void *data, char *sense, int result, int resid) +/* + * This function is a "bottom half" handler that is called by the mid + * level when a command is completed (or has failed). + */ +static void sg_rq_end_io(struct request *rq, int uptodate) { - Sg_request *srp = data; + struct sg_request *srp = rq->end_io_data; Sg_device *sdp = NULL; Sg_fd *sfp; unsigned long iflags; unsigned int ms; + char *sense; + int result, resid; if (NULL == srp) { printk(KERN_ERR "sg_cmd_done: NULL request\n"); @@ -1280,6 +1278,9 @@ sg_cmd_done(void *data, char *sense, int result, int resid) return; } + sense = rq->sense; + result = rq->errors; + resid = rq->data_len; SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n", sdp->disk->disk_name, srp->header.pack_id, result)); -- cgit v1.2.3-70-g09d2 From 7e56cb0f7e7a132803ffefa0a5a15fb2079afaf1 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:02 +0900 Subject: sg: remove SG_ALLOW_DIO_CODE define sg had lots of the own functions for the direct IO but now sg uses the block layer functions for it. There are only five lines for the direct IO. SG_ALLOW_DIO_CODE define was used to compile out the direct IO code but we don't need the define. If someone wants to remove the direct IO code, he can do easily without the define. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d18f90d1d9a..2c30331abbe 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -68,7 +68,6 @@ static void sg_proc_cleanup(void); #endif #define SG_ALLOW_DIO_DEF 0 -#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */ #define SG_MAX_DEVS 32768 @@ -1674,13 +1673,12 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; -#ifdef SG_ALLOW_DIO_CODE if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma) && blk_rq_aligned(q, hp->dxferp, dxfer_len)) return sg_build_direct(srp, sfp, dxfer_len); -#endif + if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) sg_link_reserve(sfp, srp, dxfer_len); else -- cgit v1.2.3-70-g09d2 From fd1c1de0766844af4cfc39298e109ad273e72a9e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:03 +0900 Subject: sg: remove b_malloc_len in sg_scatter_hold struct It's not used for anything useful after the block layer conversion. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2c30331abbe..ccce31a400e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -116,7 +116,6 @@ typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ - unsigned b_malloc_len; /* actual len malloc'ed in buffer */ struct page **pages; int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ @@ -1986,7 +1985,6 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; - req_schp->b_malloc_len = rsv_schp->b_malloc_len; req_schp->page_order = rsv_schp->page_order; break; } else -- cgit v1.2.3-70-g09d2 From 44c7b0eaa041007066e30ab4869d5bbf8dad5989 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:04 +0900 Subject: sg: remove __sg_start_req __sg_start_req() was used temporarily to call blk_get_request() during converting sg to use the block layer. Now sg always calls blk_get_request() so we can move blk_get_request() to sg_start_req(). We don't need __sg_start_req anymore. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ccce31a400e..9a56c0d320b 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1626,14 +1626,23 @@ exit_sg(void) idr_destroy(&sg_index_idr); } -static int __sg_start_req(struct sg_request *srp, struct sg_io_hdr *hp, - unsigned char *cmd) +static int sg_start_req(Sg_request *srp, unsigned char *cmd) { - struct sg_fd *sfp = srp->parentfp; - struct request_queue *q = sfp->parentdp->device->request_queue; + int res = 0; struct request *rq; + Sg_fd *sfp = srp->parentfp; + sg_io_hdr_t *hp = &srp->header; + int dxfer_len = (int) hp->dxfer_len; + int dxfer_dir = hp->dxfer_direction; + Sg_scatter_hold *req_schp = &srp->data; + Sg_scatter_hold *rsv_schp = &sfp->reserve; + struct request_queue *q = sfp->parentdp->device->request_queue; + struct rq_map_data map_data; int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; + SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n", + dxfer_len)); + rq = blk_get_request(q, rw, GFP_ATOMIC); if (!rq) return -ENOMEM; @@ -1648,27 +1657,6 @@ static int __sg_start_req(struct sg_request *srp, struct sg_io_hdr *hp, rq->sense = srp->sense_b; rq->retries = SG_DEFAULT_RETRIES; - return 0; -} - -static int sg_start_req(Sg_request *srp, unsigned char *cmd) -{ - int res; - Sg_fd *sfp = srp->parentfp; - sg_io_hdr_t *hp = &srp->header; - int dxfer_len = (int) hp->dxfer_len; - int dxfer_dir = hp->dxfer_direction; - Sg_scatter_hold *req_schp = &srp->data; - Sg_scatter_hold *rsv_schp = &sfp->reserve; - struct request_queue *q = sfp->parentdp->device->request_queue; - struct rq_map_data map_data; - - SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); - - res = __sg_start_req(srp, hp, cmd); - if (res) - return res; - if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; -- cgit v1.2.3-70-g09d2 From 626710c9d665ff381c7ec666b6a023f064ca5fef Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:05 +0900 Subject: sg: incorporate sg_build_direct into sg_start_req Calling blk_rq_map_user() at a single place is better than at different two places. It makes the code more understandable. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 80 +++++++++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9a56c0d320b..c0b6866eece 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -202,7 +202,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static int sg_res_in_use(Sg_fd * sfp); -static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len); static Sg_device *sg_get_dev(int dev); #ifdef CONFIG_SCSI_PROC_FS static int sg_last_dev(void); @@ -1628,16 +1627,17 @@ exit_sg(void) static int sg_start_req(Sg_request *srp, unsigned char *cmd) { - int res = 0; + int res; struct request *rq; Sg_fd *sfp = srp->parentfp; sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; + unsigned int iov_count = hp->iovec_count; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; - struct rq_map_data map_data; + struct rq_map_data *md, map_data; int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n", @@ -1660,38 +1660,43 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; - if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && - (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && - (!sfp->parentdp->device->host->unchecked_isa_dma) && + if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && + dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && + !sfp->parentdp->device->host->unchecked_isa_dma && blk_rq_aligned(q, hp->dxferp, dxfer_len)) - return sg_build_direct(srp, sfp, dxfer_len); + md = NULL; + else + md = &map_data; + + if (md) { + if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen) + sg_link_reserve(sfp, srp, dxfer_len); + else { + res = sg_build_indirect(req_schp, sfp, dxfer_len); + if (res) + return res; + } - if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) - sg_link_reserve(sfp, srp, dxfer_len); + md->pages = req_schp->pages; + md->page_order = req_schp->page_order; + md->nr_entries = req_schp->k_use_sg; + } + + if (iov_count) + res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count, + hp->dxfer_len, GFP_ATOMIC); else - res = sg_build_indirect(req_schp, sfp, dxfer_len); + res = blk_rq_map_user(q, rq, md, hp->dxferp, + hp->dxfer_len, GFP_ATOMIC); if (!res) { - struct request *rq = srp->rq; - Sg_scatter_hold *schp = &srp->data; - int iovec_count = (int) hp->iovec_count; - - map_data.pages = schp->pages; - map_data.page_order = schp->page_order; - map_data.nr_entries = schp->k_use_sg; - - if (iovec_count) - res = blk_rq_map_user_iov(q, rq, &map_data, hp->dxferp, - iovec_count, - hp->dxfer_len, GFP_ATOMIC); - else - res = blk_rq_map_user(q, rq, &map_data, hp->dxferp, - hp->dxfer_len, GFP_ATOMIC); + srp->bio = rq->bio; - if (!res) - srp->bio = rq->bio; + if (!md) { + req_schp->dio_in_use = 1; + hp->info |= SG_INFO_DIRECT_IO; + } } - return res; } @@ -1730,25 +1735,6 @@ sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) return tablesize; /* number of scat_gath elements allocated */ } -/* Returns: -ve -> error, 0 -> done, 1 -> try indirect */ -static int -sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int res; - struct request *rq = srp->rq; - struct request_queue *q = sfp->parentdp->device->request_queue; - - res = blk_rq_map_user(q, rq, NULL, hp->dxferp, dxfer_len, GFP_ATOMIC); - if (res) - return res; - srp->bio = rq->bio; - schp->dio_in_use = 1; - hp->info |= SG_INFO_DIRECT_IO; - return 0; -} - static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { -- cgit v1.2.3-70-g09d2 From c3919af2354fff673026dcbeac6f009d2ce5ceee Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:06 +0900 Subject: sg: remove sg_write_xfer sg_write_xfer was used to copy data from user space for WRITE commands. blk_rq_map_user_iov and blk_rq_map_user do the job so sg_write_xfer does nothing useful. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c0b6866eece..07bd6833130 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -188,7 +188,6 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_write_xfer(Sg_request * srp); static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_scatter_hold * schp); @@ -736,11 +735,6 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, sg_finish_rem_req(srp); return k; /* probably out of space --> ENOMEM */ } - if ((k = sg_write_xfer(srp))) { - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n")); - sg_finish_rem_req(srp); - return k; - } if (sdp->detached) { sg_finish_rem_req(srp); return -ENODEV; @@ -1816,32 +1810,6 @@ out: return -ENOMEM; } -static int -sg_write_xfer(Sg_request * srp) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int num_xfer = 0; - int dxfer_dir = hp->dxfer_direction; - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; - - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) || - (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { - num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags); - if (schp->bufflen < num_xfer) - num_xfer = schp->bufflen; - } - if ((num_xfer <= 0) || (schp->dio_in_use) || - (new_interface - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) - return 0; - - SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, k_use_sg=%d\n", - num_xfer, schp->k_use_sg)); - - return 0; -} - static void sg_remove_scat(Sg_scatter_hold * schp) { -- cgit v1.2.3-70-g09d2 From 0b6cb26c6686f1f24607c41f0a6d21ce54191710 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:07 +0900 Subject: sg: remove sg_read_xfer sg_read_xfer was used to copy data to user space for READ commands. blk_rq_unmap_user does the job so sg_read_xfer does nothing useful. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 07bd6833130..df8bf67b171 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -188,7 +188,6 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); @@ -523,8 +522,11 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) err = -EFAULT; goto err_out; } - err = sg_read_xfer(srp); - err_out: + if (srp->bio) { + err = blk_rq_unmap_user(srp->bio); + srp->bio = NULL; + } +err_out: sg_finish_rem_req(srp); return (0 == err) ? count : err; } @@ -1831,31 +1833,6 @@ sg_remove_scat(Sg_scatter_hold * schp) memset(schp, 0, sizeof (*schp)); } -static int -sg_read_xfer(Sg_request * srp) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int num_xfer = 0; - int dxfer_dir = hp->dxfer_direction; - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; - - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir) - || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { - num_xfer = hp->dxfer_len; - if (schp->bufflen < num_xfer) - num_xfer = schp->bufflen; - } - if ((num_xfer <= 0) || (schp->dio_in_use) || - (new_interface - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) - return 0; - - SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, (int)hp->iovec_count, schp->k_use_sg)); - return 0; -} - static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { -- cgit v1.2.3-70-g09d2 From 4677735f03f5b6b6f2182f457a921855cadfb85b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:08 +0900 Subject: sg: remove unnecessary blk_rq_unmap_user blk_rq_unmap_user in sg_finish_rem_req can take care of all the cases. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index df8bf67b171..ba9b9bbd4e7 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -522,10 +522,6 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) err = -EFAULT; goto err_out; } - if (srp->bio) { - err = blk_rq_unmap_user(srp->bio); - srp->bio = NULL; - } err_out: sg_finish_rem_req(srp); return (0 == err) ? count : err; @@ -1844,9 +1840,6 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) if ((!outp) || (num_read_xfer <= 0)) return 0; - blk_rq_unmap_user(srp->bio); - srp->bio = NULL; - num = 1 << (PAGE_SHIFT + schp->page_order); for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { -- cgit v1.2.3-70-g09d2 From 9246b5f06deeea541e7c62437c2ad19a0b1172c0 Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 17 Sep 2008 14:30:32 -0700 Subject: block: Expand Xen blkfront for > 16 xvd Until recently, the maximum number of xvd block devices you could attach to a Xen domU was 16. This limitation turned out to be problematic for some users, so it was expanded to handle a much larger number of disks. However, this requires a couple of changes in the way that blkfront scans for disks. This functionality is already present in the Xen linux-2.6.18-xen.hg tree; the attached patch adds this functionality to the mainline xen-blkfront implementation. I successfully tested it on a 2.6.25 tree, and build tested it on 2.6.27-rc3. Signed-off-by: Chris Lalancette Acked-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 76 ++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3ca643cafcc..bff602ccccf 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -105,15 +105,17 @@ static DEFINE_SPINLOCK(blkif_io_lock); #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 +#define PARTS_PER_EXT_DISK 256 #define BLKIF_MAJOR(dev) ((dev)>>8) #define BLKIF_MINOR(dev) ((dev) & 0xff) -#define DEV_NAME "xvd" /* name in /dev */ +#define EXT_SHIFT 28 +#define EXTENDED (1<gd != NULL); BUG_ON(info->rq != NULL); - if ((minor % PARTS_PER_DISK) == 0) - nr_minors = PARTS_PER_DISK; + if ((info->vdevice>>EXT_SHIFT) > 1) { + /* this is above the extended range; something is wrong */ + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); + return -ENODEV; + } + + if (!VDEV_IS_EXTENDED(info->vdevice)) { + minor = BLKIF_MINOR(info->vdevice); + nr_parts = PARTS_PER_DISK; + } else { + minor = BLKIF_MINOR_EXT(info->vdevice); + nr_parts = PARTS_PER_EXT_DISK; + } + + if ((minor % nr_parts) == 0) + nr_minors = nr_parts; gd = alloc_disk(nr_minors); if (gd == NULL) goto out; - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", DEV_NAME, - 'a' + minor / PARTS_PER_DISK); - else - sprintf(gd->disk_name, "%s%c%d", DEV_NAME, - 'a' + minor / PARTS_PER_DISK, - minor % PARTS_PER_DISK); + offset = minor / nr_parts; + + if (nr_minors > 1) { + if (offset < 26) + sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); + else + sprintf(gd->disk_name, "%s%c%c", DEV_NAME, + 'a' + ((offset / 26)-1), 'a' + (offset % 26)); + } else { + if (offset < 26) + sprintf(gd->disk_name, "%s%c%d", DEV_NAME, + 'a' + offset, + minor & (nr_parts - 1)); + else + sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, + 'a' + ((offset / 26) - 1), + 'a' + (offset % 26), + minor & (nr_parts - 1)); + } gd->major = XENVBD_MAJOR; gd->first_minor = minor; @@ -699,8 +730,13 @@ static int blkfront_probe(struct xenbus_device *dev, err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device", "%i", &vdevice); if (err != 1) { - xenbus_dev_fatal(dev, err, "reading virtual-device"); - return err; + /* go looking in the extended area instead */ + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", + "%i", &vdevice); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading virtual-device"); + return err; + } } info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -861,9 +897,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) info->feature_barrier = 0; - err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), - sectors, info->vdevice, - binfo, sector_size, info); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- cgit v1.2.3-70-g09d2 From 905bd78f2188da69e74966918e3d71df3dff382b Mon Sep 17 00:00:00 2001 From: "scameron@beardog.cca.cpqcorp.net" Date: Fri, 19 Sep 2008 18:27:47 -0700 Subject: cciss: Fix cciss SCSI rescan code to better notice device changes Fix cciss SCSI rescan code to better notice device changes. If you hot-unplug a tape drive, then hot-plug a different tape drive into the same slot in a storage enclosure, the cciss driver wouldn't notice anything had changed, as it was only looking at the LUN address and device type. Now it looks at the inquiry page 0x83 device identifier, and vendor and model strings as well. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss_scsi.c | 151 +++++++++++++++++++++++++++++---------------- drivers/block/cciss_scsi.h | 4 ++ 2 files changed, 102 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1233aabda7..a3fd87b4144 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -365,7 +365,7 @@ struct scsi2map { static int cciss_scsi_add_entry(int ctlr, int hostno, - unsigned char *scsi3addr, int devtype, + struct cciss_scsi_dev_t *device, struct scsi2map *added, int *nadded) { /* assumes hba[ctlr]->scsi_ctlr->lock is held */ @@ -384,12 +384,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, lun = 0; /* Is this device a non-zero lun of a multi-lun device */ /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */ - if (scsi3addr[4] != 0) { + if (device->scsi3addr[4] != 0) { /* Search through our list and find the device which */ /* has the same 8 byte LUN address, excepting byte 4. */ /* Assign the same bus and target for this new LUN. */ /* Use the logical unit number from the firmware. */ - memcpy(addr1, scsi3addr, 8); + memcpy(addr1, device->scsi3addr, 8); addr1[4] = 0; for (i = 0; i < n; i++) { sd = &ccissscsi[ctlr].dev[i]; @@ -399,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno, if (memcmp(addr1, addr2, 8) == 0) { bus = sd->bus; target = sd->target; - lun = scsi3addr[4]; + lun = device->scsi3addr[4]; break; } } @@ -420,8 +420,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, added[*nadded].lun = sd->lun; (*nadded)++; - memcpy(&sd->scsi3addr[0], scsi3addr, 8); - sd->devtype = devtype; + memcpy(sd->scsi3addr, device->scsi3addr, 8); + memcpy(sd->vendor, device->vendor, sizeof(sd->vendor)); + memcpy(sd->revision, device->revision, sizeof(sd->revision)); + memcpy(sd->device_id, device->device_id, sizeof(sd->device_id)); + sd->devtype = device->devtype; + ccissscsi[ctlr].ndevices++; /* initially, (before registering with scsi layer) we don't @@ -487,6 +491,22 @@ static void fixup_botched_add(int ctlr, char *scsi3addr) CPQ_TAPE_UNLOCK(ctlr, flags); } +static int device_is_the_same(struct cciss_scsi_dev_t *dev1, + struct cciss_scsi_dev_t *dev2) +{ + return dev1->devtype == dev2->devtype && + memcmp(dev1->scsi3addr, dev2->scsi3addr, + sizeof(dev1->scsi3addr)) == 0 && + memcmp(dev1->device_id, dev2->device_id, + sizeof(dev1->device_id)) == 0 && + memcmp(dev1->vendor, dev2->vendor, + sizeof(dev1->vendor)) == 0 && + memcmp(dev1->model, dev2->model, + sizeof(dev1->model)) == 0 && + memcmp(dev1->revision, dev2->revision, + sizeof(dev1->revision)) == 0; +} + static int adjust_cciss_scsi_table(int ctlr, int hostno, struct cciss_scsi_dev_t sd[], int nsds) @@ -532,7 +552,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, for (j=0;jscsi3addr)) { - if (sd[j].devtype == csd->devtype) + if (device_is_the_same(&sd[j], csd)) found=2; else found=1; @@ -548,22 +568,26 @@ adjust_cciss_scsi_table(int ctlr, int hostno, cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - } - else if (found == 1) { /* device is different kind */ + } else if (found == 1) { /* device is different in some way */ changes++; - printk("cciss%d: device c%db%dt%dl%d type changed " - "(device type now %s).\n", - ctlr, hostno, csd->bus, csd->target, csd->lun, - scsi_device_type(csd->devtype)); + printk("cciss%d: device c%db%dt%dl%d has changed.\n", + ctlr, hostno, csd->bus, csd->target, csd->lun); cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - if (cciss_scsi_add_entry(ctlr, hostno, - &sd[j].scsi3addr[0], sd[j].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[j], added, &nadded) != 0) /* we just removed one, so add can't fail. */ BUG(); csd->devtype = sd[j].devtype; + memcpy(csd->device_id, sd[j].device_id, + sizeof(csd->device_id)); + memcpy(csd->vendor, sd[j].vendor, + sizeof(csd->vendor)); + memcpy(csd->model, sd[j].model, + sizeof(csd->model)); + memcpy(csd->revision, sd[j].revision, + sizeof(csd->revision)); } else /* device is same as it ever was, */ i++; /* so just move along. */ } @@ -577,7 +601,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, csd = &ccissscsi[ctlr].dev[j]; if (SCSI3ADDR_EQ(sd[i].scsi3addr, csd->scsi3addr)) { - if (sd[i].devtype == csd->devtype) + if (device_is_the_same(&sd[i], csd)) found=2; /* found device */ else found=1; /* found a bug. */ @@ -586,16 +610,14 @@ adjust_cciss_scsi_table(int ctlr, int hostno, } if (!found) { changes++; - if (cciss_scsi_add_entry(ctlr, hostno, - - &sd[i].scsi3addr[0], sd[i].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[i], added, &nadded) != 0) break; } else if (found == 1) { /* should never happen... */ changes++; - printk("cciss%d: device unexpectedly changed type\n", - ctlr); + printk(KERN_WARNING "cciss%d: device " + "unexpectedly changed\n", ctlr); /* but if it does happen, we just ignore that device */ } } @@ -1012,7 +1034,8 @@ cciss_scsi_interpret_error(CommandList_struct *cp) static int cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, - unsigned char *buf, unsigned char bufsize) + unsigned char page, unsigned char *buf, + unsigned char bufsize) { int rc; CommandList_struct *cp; @@ -1032,8 +1055,8 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, ei = cp->err_info; cdb[0] = CISS_INQUIRY; - cdb[1] = 0; - cdb[2] = 0; + cdb[1] = (page != 0); + cdb[2] = page; cdb[3] = 0; cdb[4] = bufsize; cdb[5] = 0; @@ -1053,6 +1076,25 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, return rc; } +/* Get the device id from inquiry page 0x83 */ +static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr, + unsigned char *device_id, int buflen) +{ + int rc; + unsigned char *buf; + + if (buflen > 16) + buflen = 16; + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return -1; + rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64); + if (rc == 0) + memcpy(device_id, &buf[8], buflen); + kfree(buf); + return rc != 0; +} + static int cciss_scsi_do_report_phys_luns(ctlr_info_t *c, ReportLunData_struct *buf, int bufsize) @@ -1142,25 +1184,21 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) ctlr_info_t *c; __u32 num_luns=0; unsigned char *ch; - /* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */ - struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; + struct cciss_scsi_dev_t *currentsd, *this_device; int ncurrent=0; int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8; int i; c = (ctlr_info_t *) hba[cntl_num]; ld_buff = kzalloc(reportlunsize, GFP_KERNEL); - if (ld_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - return; - } inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); - if (inq_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - kfree(ld_buff); - return; + currentsd = kzalloc(sizeof(*currentsd) * + (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL); + if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) { + printk(KERN_ERR "cciss: out of memory\n"); + goto out; } - + this_device = ¤tsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) { ch = &ld_buff->LUNListLength[0]; num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; @@ -1179,23 +1217,34 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) /* adjust our table of devices */ - for(i=0; iLUN[i][3] & 0xC0) continue; memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE); memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8); - if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff, - (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { + if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff, + (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) /* Inquiry failed (msg printed already) */ - devtype = 0; /* so we will skip this device. */ - } else /* what kind of device is this? */ - devtype = (inq_buff[0] & 0x1f); - - switch (devtype) + continue; /* so we will skip this device. */ + + this_device->devtype = (inq_buff[0] & 0x1f); + this_device->bus = -1; + this_device->target = -1; + this_device->lun = -1; + memcpy(this_device->scsi3addr, scsi3addr, 8); + memcpy(this_device->vendor, &inq_buff[8], + sizeof(this_device->vendor)); + memcpy(this_device->model, &inq_buff[16], + sizeof(this_device->model)); + memcpy(this_device->revision, &inq_buff[32], + sizeof(this_device->revision)); + memset(this_device->device_id, 0, + sizeof(this_device->device_id)); + cciss_scsi_get_device_id(hba[cntl_num], scsi3addr, + this_device->device_id, sizeof(this_device->device_id)); + + switch (this_device->devtype) { case 0x05: /* CD-ROM */ { @@ -1220,15 +1269,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { printk(KERN_INFO "cciss%d: %s ignored, " "too many devices.\n", cntl_num, - scsi_device_type(devtype)); + scsi_device_type(this_device->devtype)); break; } - memcpy(¤tsd[ncurrent].scsi3addr[0], - &scsi3addr[0], 8); - currentsd[ncurrent].devtype = devtype; - currentsd[ncurrent].bus = -1; - currentsd[ncurrent].target = -1; - currentsd[ncurrent].lun = -1; + currentsd[ncurrent] = *this_device; ncurrent++; break; default: @@ -1240,6 +1284,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) out: kfree(inq_buff); kfree(ld_buff); + kfree(currentsd); return; } diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h index d9c2c586502..7b750245ae7 100644 --- a/drivers/block/cciss_scsi.h +++ b/drivers/block/cciss_scsi.h @@ -66,6 +66,10 @@ struct cciss_scsi_dev_t { int devtype; int bus, target, lun; /* as presented to the OS */ unsigned char scsi3addr[8]; /* as presented to the HW */ + unsigned char device_id[16]; /* from inquiry pg. 0x83 */ + unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ + unsigned char model[16]; /* bytes 16-31 of inquiry data */ + unsigned char revision[4]; /* bytes 32-35 of inquiry data */ }; struct cciss_scsi_hba_t { -- cgit v1.2.3-70-g09d2 From 061837bc8687edc2739ef02f721b7ae0b8076390 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 22 Sep 2008 14:57:16 -0700 Subject: drivers/block: Use DIV_ROUND_UP The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) / (d)) but is perhaps more readable. An extract of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @haskernel@ @@ #include @depends on haskernel@ expression n,d; @@ ( - (n + d - 1) / d + DIV_ROUND_UP(n,d) | - (n + (d - 1)) / d + DIV_ROUND_UP(n,d) ) @depends on haskernel@ expression n,d; @@ - DIV_ROUND_UP((n),d) + DIV_ROUND_UP(n,d) @depends on haskernel@ expression n,d; @@ - DIV_ROUND_UP(n,(d)) + DIV_ROUND_UP(n,d) // Signed-off-by: Julia Lawall Cc: Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 8 ++++---- drivers/block/cpqarray.c | 2 +- drivers/block/floppy.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b73116ef923..1e1f9153000 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3460,8 +3460,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = - kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); + kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long), GFP_KERNEL); hba[i]->cmd_pool = (CommandList_struct *) pci_alloc_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct), @@ -3493,8 +3493,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* command and error info recs zeroed out before they are used */ memset(hba[i]->cmd_pool_bits, 0, - ((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long)); + DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long)); hba[i]->num_luns = 0; hba[i]->highest_lun = -1; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 09c14341e6e..3d967525e9a 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -424,7 +424,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), &(hba[i]->cmd_pool_dhandle)); hba[i]->cmd_pool_bits = kcalloc( - (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long), + DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long), GFP_KERNEL); if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 395f8ea7981..9c0b494f5e8 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1355,20 +1355,20 @@ static void fdc_specify(void) } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR; + srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); if (slow_floppy) { srt = srt / 4; } SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); if (hut < 0x1) hut = 0x1; else if (hut > 0xf) @@ -2385,7 +2385,7 @@ static void rw_interrupt(void) #ifdef FLOPPY_SANITY_CHECK if (nr_sectors / ssize > - (in_sector_offset + current_count_sectors + ssize - 1) / ssize) { + DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); printk("rs=%d s=%d\n", R_SECTOR, SECTOR); -- cgit v1.2.3-70-g09d2 From 9e49184c82e9ec3ab4d45f9ea5a17ccaf43869f0 Mon Sep 17 00:00:00 2001 From: Keith Wansbrough Date: Mon, 22 Sep 2008 14:57:17 -0700 Subject: floppy: support arbitrary first-sector numbers The current floppy_struct allows floppies to number sectors starting from 0 or 1. This patch allows arbitrary first-sector numbers - for example, 0xC1 for Amstrad CPC disks. This extends the existing 1-bit field (FD_ZEROBASED, bit 2 of stretch) to 8 bits (FD_SECTMASK, bits 2 to 9). Currently 0x00 denotes a first sector number of 1, and 0x01 denotes a first sector number of 0. We extend this by interpreting FD_SECTMASK as the first sector number with the LSB flipped. Signed-off-by: Keith Wansbrough Cc: Alain Knaff Cc: Michael Kerrisk Cc: Karel Zak Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 23 +++++++++++++++-------- include/linux/fd.h | 8 +++++++- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9c0b494f5e8..cf64ddf5d83 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -423,8 +423,15 @@ static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical * side 0 is on physical side 0 (but with the misnamed sector IDs). * 'stretch' should probably be renamed to something more general, like - * 'options'. Other parameters should be self-explanatory (see also - * setfdprm(8)). + * 'options'. + * + * Bits 2 through 9 of 'stretch' tell the number of the first sector. + * The LSB (bit 2) is flipped. For most disks, the first sector + * is 1 (represented by 0x00<<2). For some CP/M and music sampler + * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2). + * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2). + * + * Other parameters should be self-explanatory (see also setfdprm(8)). */ /* Size @@ -2236,9 +2243,9 @@ static void setup_format_params(int track) } } } - if (_floppy->stretch & FD_ZEROBASED) { + if (_floppy->stretch & FD_SECTBASEMASK) { for (count = 0; count < F_SECT_PER_TRACK; count++) - here[count].sect--; + here[count].sect += FD_SECTBASE(_floppy) - 1; } } @@ -2649,7 +2656,7 @@ static int make_raw_rw_request(void) } HEAD = fsector_t / _floppy->sect; - if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) || + if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect) max_sector = _floppy->sect; @@ -2679,7 +2686,7 @@ static int make_raw_rw_request(void) CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + - ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1); + FD_SECTBASE(_floppy); /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -3311,7 +3318,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, g->head <= 0 || g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || /* check if reserved bits are set */ - (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0) + (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) return -EINVAL; if (type) { if (!capable(CAP_SYS_ADMIN)) @@ -3356,7 +3363,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, if (DRS->maxblock > user_params[drive].sect || DRS->maxtrack || ((user_params[drive].sect ^ oldStretch) & - (FD_SWAPSIDES | FD_ZEROBASED))) + (FD_SWAPSIDES | FD_SECTBASEMASK))) invalidate_drive(bdev); else process_fd_request(); diff --git a/include/linux/fd.h b/include/linux/fd.h index b6bd41d2b46..f5d194af07a 100644 --- a/include/linux/fd.h +++ b/include/linux/fd.h @@ -15,10 +15,16 @@ struct floppy_struct { sect, /* sectors per track */ head, /* nr of heads */ track, /* nr of tracks */ - stretch; /* !=0 means double track steps */ + stretch; /* bit 0 !=0 means double track steps */ + /* bit 1 != 0 means swap sides */ + /* bits 2..9 give the first sector */ + /* number (the LSB is flipped) */ #define FD_STRETCH 1 #define FD_SWAPSIDES 2 #define FD_ZEROBASED 4 +#define FD_SECTBASEMASK 0x3FC +#define FD_MKSECTBASE(s) (((s) ^ 1) << 2) +#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1) unsigned char gap, /* gap1 size */ -- cgit v1.2.3-70-g09d2 From 8bff7c6b0f63c7ee9c5e3a076338d74125b8debb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Sep 2008 13:05:10 +0200 Subject: libata: set queue SSD flag for SSD devices SSD devices should give an RPM setting of 1 in word 217 of the ID page. If we see such a device, tell the block layer about it. Signed-off-by: Jens Axboe --- drivers/ata/libata-scsi.c | 4 ++++ include/linux/ata.h | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b9d3ba423cb..054370700ab 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -977,6 +977,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); } else { + if (ata_id_is_ssd(dev->id)) + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, + sdev->request_queue); + /* ATA devices must be sector aligned */ blk_queue_update_dma_alignment(sdev->request_queue, ATA_SECT_SIZE - 1); diff --git a/include/linux/ata.h b/include/linux/ata.h index 8a12d718c16..c1c8b4a4ba2 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -88,6 +88,7 @@ enum { ATA_ID_DLF = 128, ATA_ID_CSFO = 129, ATA_ID_CFA_POWER = 160, + ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), ATA_ID_SERNO_LEN = 20, @@ -691,6 +692,11 @@ static inline int ata_id_is_cfa(const u16 *id) return 0; } +static inline int ata_id_is_ssd(const u16 *id) +{ + return id[ATA_ID_ROT_SPEED] == 0x01; +} + static inline int ata_drive_40wire(const u16 *dev_id) { if (ata_id_is_sata(dev_id)) -- cgit v1.2.3-70-g09d2 From 8316982ac06d7d8875dc8738efbb030791dc33bb Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:11:20 -0400 Subject: virtio_blk: change to use __blk_end_request() This patch converts virtio_blk to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. Related 'uptodate' argument is converted to 'error'. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 879506a2c23..6ec5fc05278 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -47,20 +47,20 @@ static void blk_done(struct virtqueue *vq) spin_lock_irqsave(&vblk->lock, flags); while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { - int uptodate; + int error; switch (vbr->status) { case VIRTIO_BLK_S_OK: - uptodate = 1; + error = 0; break; case VIRTIO_BLK_S_UNSUPP: - uptodate = -ENOTTY; + error = -ENOTTY; break; default: - uptodate = 0; + error = -EIO; break; } - end_dequeued_request(vbr->req, uptodate); + __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); list_del(&vbr->list); mempool_free(vbr, vblk->pool); } -- cgit v1.2.3-70-g09d2 From 2a9df5055a99df25533daf4041fdb99f0ed3463c Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:12:15 -0400 Subject: memstick: change to use __blk_end_request() This patch converts memstick to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alex Dubov Signed-off-by: Jens Axboe --- drivers/memstick/core/mspro_block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 82bf649ef13..6e291bf8237 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -828,7 +828,7 @@ static void mspro_block_submit_req(struct request_queue *q) if (msb->eject) { while ((req = elv_next_request(q)) != NULL) - end_queued_request(req, -ENODEV); + __blk_end_request(req, -ENODEV, blk_rq_bytes(req)); return; } -- cgit v1.2.3-70-g09d2 From 7afb3a6e752503d5ebeb038336aa0fa886a51b44 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:13:02 -0400 Subject: gdrom: change to use __blk_end_request() This patch converts gdrom to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. gd.transfer is '1' in error cases and '0' in non-error cases, so gdrom hasn't been propagating any error code to the block layer. We can just convert error cases to '-EIO'. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Adrian McMenamin Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 1231d95aa69..d6ba77a2dd7 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -624,14 +624,14 @@ static void gdrom_readdisk_dma(struct work_struct *work) ctrl_outb(1, GDROM_DMA_STATUS_REG); wait_event_interruptible_timeout(request_queue, gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); - err = gd.transfer; + err = gd.transfer ? -EIO : 0; gd.transfer = 0; gd.pending = 0; /* now seek to take the request spinlock * before handling ending the request */ spin_lock(&gdrom_lock); list_del_init(&req->queuelist); - end_dequeued_request(req, 1 - err); + __blk_end_request(req, err, blk_rq_bytes(req)); } spin_unlock(&gdrom_lock); kfree(read_command); -- cgit v1.2.3-70-g09d2 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/raid0.c | 2 +- drivers/md/raid10.c | 2 +- fs/bio.c | 9 ++++----- include/linux/bio.h | 4 +--- 6 files changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index e1a90bbb474..0e077150568 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; - bp = bio_split(bio, bio_split_pool, first_sectors); + bp = bio_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index c80ea90593d..b9cbee688fa 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -353,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f52f442a735..53508a8a981 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -427,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5f990133f5e..8bdc9bfc288 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -817,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/fs/bio.c b/fs/bio.c index a5af5809f56..77a55bcceed 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -30,7 +30,7 @@ static struct kmem_cache *bio_slab __read_mostly; -mempool_t *bio_split_pool __read_mostly; +static mempool_t *bio_split_pool __read_mostly; /* * if you change this list, also change bvec_alloc or things will @@ -1256,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) * split a bio - only worry about a bio with a single page * in it's iovec */ -struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) +struct bio_pair *bio_split(struct bio *bi, int first_sectors) { - struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); + struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); if (!bp) return bp; @@ -1292,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio2.bi_end_io = bio_pair_end_2; bp->bio1.bi_private = bi; - bp->bio2.bi_private = pool; + bp->bio2.bi_private = bio_split_pool; if (bio_integrity(bi)) bio_integrity_split(bi, bp, first_sectors); @@ -1455,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_split_pool); EXPORT_SYMBOL(bio_copy_user); EXPORT_SYMBOL(bio_uncopy_user); EXPORT_SYMBOL(bioset_create); diff --git a/include/linux/bio.h b/include/linux/bio.h index fe12d0f9eba..fb97221d7c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,9 +300,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); -- cgit v1.2.3-70-g09d2