From b8b3e16cfe6435d961f6aaebcfd52a1ff2a988c5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:15:19 +0200 Subject: block: drop virtual merging accounting Remove virtual merge accounting. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- fs/bio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 3cba7ae34d7..4ac7c59d1c6 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -350,8 +350,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page */ while (bio->bi_phys_segments >= q->max_phys_segments - || bio->bi_hw_segments >= q->max_hw_segments - || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { + || bio->bi_hw_segments >= q->max_hw_segments) { if (retried_segments) return 0; @@ -395,8 +394,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || - BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) bio->bi_flags &= ~(1 << BIO_SEG_VALID); bio->bi_vcnt++; -- cgit v1.2.3-70-g09d2 From 5df97b91b5d7ed426034fcc84cb6e7cf682b8838 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:20:02 +0200 Subject: drop vmerge accounting Remove hw_segments field from struct bio and struct request. Without virtual merge accounting they have no purpose. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-merge.c | 31 ++++--------------------------- block/elevator.c | 2 -- drivers/md/raid1.c | 3 --- drivers/md/raid10.c | 3 --- fs/bio.c | 12 +----------- include/linux/bio.h | 16 +--------------- include/linux/blkdev.h | 7 ------- 8 files changed, 6 insertions(+), 69 deletions(-) (limited to 'fs/bio.c') diff --git a/block/blk-core.c b/block/blk-core.c index 1261516dd42..2616cdd049a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2026,7 +2026,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, if (bio_has_data(bio)) { rq->nr_phys_segments = bio_phys_segments(q, bio); - rq->nr_hw_segments = bio_hw_segments(q, bio); rq->buffer = bio_data(bio); } rq->current_nr_sectors = bio_cur_sectors(bio); diff --git a/block/blk-merge.c b/block/blk-merge.c index 2c2a2ee716e..d81d91419ff 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) void blk_recalc_rq_segments(struct request *rq) { int nr_phys_segs; - int nr_hw_segs; unsigned int phys_size; - unsigned int hw_size; struct bio_vec *bv, *bvprv = NULL; int seg_size; - int hw_seg_size; int cluster; struct req_iterator iter; int high, highprv = 1; @@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq) return; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); - hw_seg_size = seg_size = 0; - phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; + seg_size = 0; + phys_size = nr_phys_segs = 0; rq_for_each_segment(bv, rq, iter) { /* * the trick here is making sure that a high page is never @@ -76,30 +73,17 @@ void blk_recalc_rq_segments(struct request *rq) goto new_segment; seg_size += bv->bv_len; - hw_seg_size += bv->bv_len; bvprv = bv; continue; } new_segment: - if (nr_hw_segs == 1 && - hw_seg_size > rq->bio->bi_hw_front_size) - rq->bio->bi_hw_front_size = hw_seg_size; - hw_seg_size = bv->bv_len; - nr_hw_segs++; - nr_phys_segs++; bvprv = bv; seg_size = bv->bv_len; highprv = high; } - if (nr_hw_segs == 1 && - hw_seg_size > rq->bio->bi_hw_front_size) - rq->bio->bi_hw_front_size = hw_seg_size; - if (hw_seg_size > rq->biotail->bi_hw_back_size) - rq->biotail->bi_hw_back_size = hw_seg_size; rq->nr_phys_segments = nr_phys_segs; - rq->nr_hw_segments = nr_hw_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) @@ -112,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio) blk_recalc_rq_segments(&rq); bio->bi_next = nxt; bio->bi_phys_segments = rq.nr_phys_segments; - bio->bi_hw_segments = rq.nr_hw_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); @@ -255,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) { - int nr_hw_segs = bio_hw_segments(q, bio); int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments + if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) @@ -270,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q, * This will form the start of a new hw segment. Bump both * counters. */ - req->nr_hw_segments += nr_hw_segs; req->nr_phys_segments += nr_phys_segs; return 1; } @@ -328,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; - int total_hw_segments; /* * First check if the either of the requests are re-queued @@ -350,14 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > q->max_phys_segments) return 0; - total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; - - if (total_hw_segments > q->max_hw_segments) + if (total_phys_segments > q->max_hw_segments) return 0; /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; - req->nr_hw_segments = total_hw_segments; return 1; } diff --git a/block/elevator.c b/block/elevator.c index 4f5127054e3..269615e6dbf 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -790,7 +790,6 @@ struct request *elv_next_request(struct request_queue *q) * device can handle */ rq->nr_phys_segments++; - rq->nr_hw_segments++; } if (!q->prep_rq_fn) @@ -813,7 +812,6 @@ struct request *elv_next_request(struct request_queue *q) * so that we don't add it again */ --rq->nr_phys_segments; - --rq->nr_hw_segments; } rq = NULL; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03a5ab705c2..28a3869dcfd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1302,9 +1302,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) sbio->bi_size = r1_bio->sectors << 9; sbio->bi_idx = 0; sbio->bi_phys_segments = 0; - sbio->bi_hw_segments = 0; - sbio->bi_hw_front_size = 0; - sbio->bi_hw_back_size = 0; sbio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bi_flags |= 1 << BIO_UPTODATE; sbio->bi_next = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e34cd0e6247..0f40688503e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1345,9 +1345,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) tbio->bi_size = r10_bio->sectors << 9; tbio->bi_idx = 0; tbio->bi_phys_segments = 0; - tbio->bi_hw_segments = 0; - tbio->bi_hw_front_size = 0; - tbio->bi_hw_back_size = 0; tbio->bi_flags &= ~(BIO_POOL_MASK - 1); tbio->bi_flags |= 1 << BIO_UPTODATE; tbio->bi_next = NULL; diff --git a/fs/bio.c b/fs/bio.c index 4ac7c59d1c6..bee4deca774 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -208,14 +208,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) return bio->bi_phys_segments; } -inline int bio_hw_segments(struct request_queue *q, struct bio *bio) -{ - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) - blk_recount_segments(q, bio); - - return bio->bi_hw_segments; -} - /** * __bio_clone - clone a bio * @bio: destination bio @@ -350,7 +342,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page */ while (bio->bi_phys_segments >= q->max_phys_segments - || bio->bi_hw_segments >= q->max_hw_segments) { + || bio->bi_phys_segments >= q->max_hw_segments) { if (retried_segments) return 0; @@ -399,7 +391,6 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page bio->bi_vcnt++; bio->bi_phys_segments++; - bio->bi_hw_segments++; done: bio->bi_size += len; return len; @@ -1381,7 +1372,6 @@ EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(__bio_clone); EXPORT_SYMBOL(bio_clone); EXPORT_SYMBOL(bio_phys_segments); -EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_add_pc_page); EXPORT_SYMBOL(bio_get_nr_vecs); diff --git a/include/linux/bio.h b/include/linux/bio.h index 894d16ce002..dfc3556d311 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -77,21 +77,8 @@ struct bio { */ unsigned short bi_phys_segments; - /* Number of segments after physical and DMA remapping - * hardware coalescing is performed. - */ - unsigned short bi_hw_segments; - unsigned int bi_size; /* residual I/O count */ - /* - * To keep track of the max hw size, we account for the - * sizes of the first and last virtually mergeable segments - * in this bio - */ - unsigned int bi_hw_front_size; - unsigned int bi_hw_back_size; - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -113,7 +100,7 @@ struct bio { #define BIO_UPTODATE 0 /* ok after I/O completion */ #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ #define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ @@ -324,7 +311,6 @@ extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone(struct bio *, gfp_t); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 490ce458b03..1adb03827bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -189,13 +189,6 @@ struct request { */ unsigned short nr_phys_segments; - /* Number of scatter-gather addr+len pairs after - * physical and DMA remapping hardware coalescing is performed. - * This is the number of scatter-gather entries the driver - * will actually have to deal with after DMA mapping is done. - */ - unsigned short nr_hw_segments; - unsigned short ioprio; void *special; -- cgit v1.2.3-70-g09d2 From c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Sep 2008 20:26:01 +0200 Subject: block: add support for IO CPU affinity This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe --- block/blk-core.c | 46 ++++++++--------- block/blk-settings.c | 2 +- block/blk-softirq.c | 126 +++++++++++++++++++++++++++++++++++------------ block/blk-sysfs.c | 31 ++++++++++++ block/blk.h | 12 +++++ fs/bio.c | 1 + include/linux/bio.h | 11 +++++ include/linux/blkdev.h | 5 +- include/linux/elevator.h | 8 +-- 9 files changed, 182 insertions(+), 60 deletions(-) (limited to 'fs/bio.c') diff --git a/block/blk-core.c b/block/blk-core.c index 9c6f818d0c3..5484838f46e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) memset(rq, 0, sizeof(*rq)); INIT_LIST_HEAD(&rq->queuelist); - INIT_LIST_HEAD(&rq->donelist); + rq->cpu = -1; rq->q = q; rq->sector = rq->hard_sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); @@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q) } EXPORT_SYMBOL(blk_unplug); +static void blk_invoke_request_fn(struct request_queue *q) +{ + /* + * one level of recursion is ok and is much faster than kicking + * the unplug handling + */ + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + q->request_fn(q); + queue_flag_clear(QUEUE_FLAG_REENTER, q); + } else { + queue_flag_set(QUEUE_FLAG_PLUGGED, q); + kblockd_schedule_work(q, &q->unplug_work); + } +} + /** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question @@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - - /* - * one level of recursion is ok and is much faster than kicking - * the unplug handling - */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - blk_plug_device(q); - kblockd_schedule_work(q, &q->unplug_work); - } + blk_invoke_request_fn(q); } EXPORT_SYMBOL(blk_start_queue); @@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q) * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!elv_queue_empty(q)) { - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - blk_plug_device(q); - kblockd_schedule_work(q, &q->unplug_work); - } - } + if (!elv_queue_empty(q)) + blk_invoke_request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request); void init_request_from_bio(struct request *req, struct bio *bio) { + req->cpu = bio->bi_comp_cpu; req->cmd_type = REQ_TYPE_FS; /* @@ -1198,13 +1196,15 @@ get_rq: init_request_from_bio(req, bio); spin_lock_irq(q->queue_lock); + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || + bio_flagged(bio, BIO_CPU_AFFINE)) + req->cpu = blk_cpu_to_group(smp_processor_id()); if (elv_queue_empty(q)) blk_plug_device(q); add_request(q, req); out: if (sync) __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); return 0; diff --git a/block/blk-settings.c b/block/blk-settings.c index d70692badcd..a60e959a12c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); -static int __init blk_settings_init(void) +int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; blk_max_pfn = max_pfn - 1; diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 9e1c43bff66..3a1af551191 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -13,6 +13,70 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); +/* + * Softirq action handler - move entries to local list and loop over them + * while passing them to the queue registered handler. + */ +static void blk_done_softirq(struct softirq_action *h) +{ + struct list_head *cpu_list, local_list; + + local_irq_disable(); + cpu_list = &__get_cpu_var(blk_cpu_done); + list_replace_init(cpu_list, &local_list); + local_irq_enable(); + + while (!list_empty(&local_list)) { + struct request *rq; + + rq = list_entry(local_list.next, struct request, csd.list); + list_del_init(&rq->csd.list); + rq->q->softirq_done_fn(rq); + } +} + +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +static void trigger_softirq(void *data) +{ + struct request *rq = data; + unsigned long flags; + struct list_head *list; + + local_irq_save(flags); + list = &__get_cpu_var(blk_cpu_done); + list_add_tail(&rq->csd.list, list); + + if (list->next == &rq->csd.list) + raise_softirq_irqoff(BLOCK_SOFTIRQ); + + local_irq_restore(flags); +} + +/* + * Setup and invoke a run of 'trigger_softirq' on the given cpu. + */ +static int raise_blk_irq(int cpu, struct request *rq) +{ + if (cpu_online(cpu)) { + struct call_single_data *data = &rq->csd; + + data->func = trigger_softirq; + data->info = rq; + data->flags = 0; + + __smp_call_function_single(cpu, data); + return 0; + } + + return 1; +} +#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +static int raise_blk_irq(int cpu, struct request *rq) +{ + return 1; +} +#endif + static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -33,33 +97,10 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } - -static struct notifier_block blk_cpu_notifier __cpuinitdata = { +static struct notifier_block __cpuinitdata blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; -/* - * splice the completion data to a local structure and hand off to - * process_completion_queue() to complete the requests - */ -static void blk_done_softirq(struct softirq_action *h) -{ - struct list_head *cpu_list, local_list; - - local_irq_disable(); - cpu_list = &__get_cpu_var(blk_cpu_done); - list_replace_init(cpu_list, &local_list); - local_irq_enable(); - - while (!list_empty(&local_list)) { - struct request *rq; - - rq = list_entry(local_list.next, struct request, donelist); - list_del_init(&rq->donelist); - rq->q->softirq_done_fn(rq); - } -} - /** * blk_complete_request - end I/O on a request * @req: the request being processed @@ -71,25 +112,48 @@ static void blk_done_softirq(struct softirq_action *h) * through a softirq handler. The user must have registered a completion * callback through blk_queue_softirq_done(). **/ - void blk_complete_request(struct request *req) { - struct list_head *cpu_list; + struct request_queue *q = req->q; unsigned long flags; + int ccpu, cpu, group_cpu; - BUG_ON(!req->q->softirq_done_fn); + BUG_ON(!q->softirq_done_fn); local_irq_save(flags); + cpu = smp_processor_id(); + group_cpu = blk_cpu_to_group(cpu); - cpu_list = &__get_cpu_var(blk_cpu_done); - list_add_tail(&req->donelist, cpu_list); - raise_softirq_irqoff(BLOCK_SOFTIRQ); + /* + * Select completion CPU + */ + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) + ccpu = req->cpu; + else + ccpu = cpu; + + if (ccpu == cpu || ccpu == group_cpu) { + struct list_head *list; +do_local: + list = &__get_cpu_var(blk_cpu_done); + list_add_tail(&req->csd.list, list); + + /* + * if the list only contains our just added request, + * signal a raise of the softirq. If there are already + * entries there, someone already raised the irq but it + * hasn't run yet. + */ + if (list->next == &req->csd.list) + raise_softirq_irqoff(BLOCK_SOFTIRQ); + } else if (raise_blk_irq(ccpu, req)) + goto do_local; local_irq_restore(flags); } EXPORT_SYMBOL(blk_complete_request); -int __init blk_softirq_init(void) +__init int blk_softirq_init(void) { int i; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b9a6ed16664..21e275d7eed 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, return ret; } +static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) +{ + unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); + + return queue_var_show(set != 0, page); +} + +static ssize_t +queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) +{ + ssize_t ret = -EINVAL; +#if defined(CONFIG_USE_GENERIC_SMP_HELPERS) + unsigned long val; + + ret = queue_var_store(&val, page, count); + spin_lock_irq(q->queue_lock); + if (val) + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + else + queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); + spin_unlock_irq(q->queue_lock); +#endif + return ret; +} static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, @@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = { .store = queue_nomerges_store, }; +static struct queue_sysfs_entry queue_rq_affinity_entry = { + .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR }, + .show = queue_rq_affinity_show, + .store = queue_rq_affinity_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = { &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, &queue_nomerges_entry.attr, + &queue_rq_affinity_entry.attr, NULL, }; diff --git a/block/blk.h b/block/blk.h index c79f30e1df5..de74254cb91 100644 --- a/block/blk.h +++ b/block/blk.h @@ -59,4 +59,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) #endif /* BLK_DEV_INTEGRITY */ +static inline int blk_cpu_to_group(int cpu) +{ +#ifdef CONFIG_SCHED_MC + cpumask_t mask = cpu_coregroup_map(cpu); + return first_cpu(mask); +#elif defined(CONFIG_SCHED_SMT) + return first_cpu(per_cpu(cpu_sibling_map, cpu)); +#else + return cpu; +#endif +} + #endif diff --git a/fs/bio.c b/fs/bio.c index bee4deca774..6a637b5c24b 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -111,6 +111,7 @@ void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; + bio->bi_comp_cpu = -1; atomic_set(&bio->bi_cnt, 1); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 2c0c09034fd..13aba20edb2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -81,6 +81,8 @@ struct bio { unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + unsigned int bi_comp_cpu; /* completion CPU */ + struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; @@ -105,6 +107,7 @@ struct bio { #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -342,6 +345,14 @@ void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); extern unsigned int bvec_nr_vecs(unsigned short idx); +/* + * Allow queuer to specify a completion CPU for this bio + */ +static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) +{ + bio->bi_comp_cpu = cpu; +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 10aa46c8f17..93204bf7b29 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -139,7 +140,8 @@ enum rq_flag_bits { */ struct request { struct list_head queuelist; - struct list_head donelist; + struct call_single_data csd; + int cpu; struct request_queue *q; @@ -420,6 +422,7 @@ struct request_queue #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ static inline int queue_is_locked(struct request_queue *q) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 639624b55fb..bb791c311a5 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -173,15 +173,15 @@ enum { #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* - * Hack to reuse the donelist list_head as the fifo time holder while + * Hack to reuse the csd.list list_head as the fifo time holder while * the request is in the io scheduler. Saves an unsigned long in rq. */ -#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) -#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) +#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) +#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) do { \ list_del_init(&(rq)->queuelist); \ - INIT_LIST_HEAD(&(rq)->donelist); \ + INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) /* -- cgit v1.2.3-70-g09d2 From a3bce90edd8f6cafe3f63b1a943800792e830178 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:05 +0900 Subject: block: add gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov Currently, blk_rq_map_user and blk_rq_map_user_iov always do GFP_KERNEL allocation. This adds gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov so sg can use it (sg always does GFP_ATOMIC allocation). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- block/blk-map.c | 20 ++++++++++++-------- block/bsg.c | 5 +++-- block/scsi_ioctl.c | 5 +++-- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_tgt_lib.c | 2 +- fs/bio.c | 33 +++++++++++++++++++-------------- include/linux/bio.h | 9 +++++---- include/linux/blkdev.h | 5 +++-- 8 files changed, 47 insertions(+), 34 deletions(-) (limited to 'fs/bio.c') diff --git a/block/blk-map.c b/block/blk-map.c index ea1bf53929e..ac21b7397e1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -41,7 +41,8 @@ static int __blk_rq_unmap_user(struct bio *bio) } static int __blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned int len) + void __user *ubuf, unsigned int len, + gfp_t gfp_mask) { unsigned long uaddr; unsigned int alignment; @@ -57,9 +58,9 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, uaddr = (unsigned long) ubuf; alignment = queue_dma_alignment(q) | q->dma_pad_mask; if (!(uaddr & alignment) && !(len & alignment)) - bio = bio_map_user(q, NULL, uaddr, len, reading); + bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else - bio = bio_copy_user(q, uaddr, len, reading); + bio = bio_copy_user(q, uaddr, len, reading, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -90,6 +91,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * @rq: request structure to fill * @ubuf: the user buffer * @len: length of user data + * @gfp_mask: memory allocation flags * * Description: * Data will be mapped directly for zero copy I/O, if possible. Otherwise @@ -105,7 +107,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * unmapping. */ int blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned long len) + void __user *ubuf, unsigned long len, gfp_t gfp_mask) { unsigned long bytes_read = 0; struct bio *bio = NULL; @@ -132,7 +134,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (end - start > BIO_MAX_PAGES) map_len -= PAGE_SIZE; - ret = __blk_rq_map_user(q, rq, ubuf, map_len); + ret = __blk_rq_map_user(q, rq, ubuf, map_len, gfp_mask); if (ret < 0) goto unmap_rq; if (!bio) @@ -160,6 +162,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * @iov: pointer to the iovec * @iov_count: number of elements in the iovec * @len: I/O byte count + * @gfp_mask: memory allocation flags * * Description: * Data will be mapped directly for zero copy I/O, if possible. Otherwise @@ -175,7 +178,8 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct sg_iovec *iov, int iov_count, unsigned int len) + struct sg_iovec *iov, int iov_count, unsigned int len, + gfp_t gfp_mask) { struct bio *bio; int i, read = rq_data_dir(rq) == READ; @@ -194,9 +198,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } if (unaligned || (q->dma_pad_mask & len)) - bio = bio_copy_user_iov(q, iov, iov_count, read); + bio = bio_copy_user_iov(q, iov, iov_count, read, gfp_mask); else - bio = bio_map_user_iov(q, NULL, iov, iov_count, read); + bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); diff --git a/block/bsg.c b/block/bsg.c index 0aae8d7ba99..e7a142e9916 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) next_rq->cmd_type = rq->cmd_type; dxferp = (void*)(unsigned long)hdr->din_xferp; - ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); + ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len, + GFP_KERNEL); if (ret) goto out; } @@ -298,7 +299,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) dxfer_len = 0; if (dxfer_len) { - ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); + ret = blk_rq_map_user(q, rq, dxferp, dxfer_len, GFP_KERNEL); if (ret) goto out; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 3aab80a4c48..f49d6a11a69 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -315,10 +315,11 @@ static int sg_io(struct file *file, struct request_queue *q, } ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, - hdr->dxfer_len); + hdr->dxfer_len, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len, + GFP_KERNEL); if (ret) goto out; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 74031de517e..e861d24a6d3 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len); + ret = blk_rq_map_user(q, rq, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 257e097c39a..2a4fd820d61 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len); + err = blk_rq_map_user(q, rq, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, diff --git a/fs/bio.c b/fs/bio.c index 6a637b5c24b..3d2e9ad2472 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -558,13 +558,14 @@ int bio_uncopy_user(struct bio *bio) * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, - int iov_count, int write_to_vm) + int iov_count, int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; struct bio_vec *bvec; @@ -587,12 +588,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, len += iov[i].iov_len; } - bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); + bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); ret = -ENOMEM; - bio = bio_alloc(GFP_KERNEL, nr_pages); + bio = bio_alloc(gfp_mask, nr_pages); if (!bio) goto out_bmd; @@ -605,7 +606,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | GFP_KERNEL); + page = alloc_page(q->bounce_gfp | gfp_mask); if (!page) { ret = -ENOMEM; break; @@ -647,26 +648,27 @@ out_bmd: * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, - unsigned int len, int write_to_vm) + unsigned int len, int write_to_vm, gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_copy_user_iov(q, &iov, 1, write_to_vm); + return bio_copy_user_iov(q, &iov, 1, write_to_vm, gfp_mask); } static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, - int write_to_vm) + int write_to_vm, gfp_t gfp_mask) { int i, j; int nr_pages = 0; @@ -692,12 +694,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, if (!nr_pages) return ERR_PTR(-EINVAL); - bio = bio_alloc(GFP_KERNEL, nr_pages); + bio = bio_alloc(gfp_mask, nr_pages); if (!bio) return ERR_PTR(-ENOMEM); ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); if (!pages) goto out; @@ -776,19 +778,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, int write_to_vm) + unsigned long uaddr, unsigned int len, int write_to_vm, + gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); } /** @@ -798,18 +802,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, - int write_to_vm) + int write_to_vm, gfp_t gfp_mask) { struct bio *bio; - bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); - + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, + gfp_mask); if (IS_ERR(bio)) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 13aba20edb2..200b185c3e8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -325,11 +325,11 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, - unsigned long, unsigned int, int); + unsigned long, unsigned int, int, gfp_t); struct sg_iovec; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int); + struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -337,9 +337,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); +extern struct bio *bio_copy_user(struct request_queue *, unsigned long, + unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, - int, int); + int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12df8efeef1..00e388d0e22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -710,11 +710,12 @@ extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); -extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); +extern int blk_rq_map_user(struct request_queue *, struct request *, + void __user *, unsigned long, gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int); + struct sg_iovec *, int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3-70-g09d2 From 152e283fdfea0cd11e297d982378b55937842dde Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:06 +0900 Subject: block: introduce struct rq_map_data to use reserved pages This patch introduces struct rq_map_data to enable bio_copy_use_iov() use reserved pages. Currently, bio_copy_user_iov allocates bounce pages but drivers/scsi/sg.c wants to allocate pages by itself and use them. struct rq_map_data can be used to pass allocated pages to bio_copy_user_iov. The current users of bio_copy_user_iov simply passes NULL (they don't want to use pre-allocated pages). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Cc: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- block/blk-map.c | 26 ++++++++++++-------- block/bsg.c | 7 +++--- block/scsi_ioctl.c | 4 ++-- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_tgt_lib.c | 2 +- fs/bio.c | 58 ++++++++++++++++++++++++++++++++------------- include/linux/bio.h | 8 ++++--- include/linux/blkdev.h | 12 ++++++++-- 8 files changed, 80 insertions(+), 39 deletions(-) (limited to 'fs/bio.c') diff --git a/block/blk-map.c b/block/blk-map.c index ac21b7397e1..dad6a290783 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -41,8 +41,8 @@ static int __blk_rq_unmap_user(struct bio *bio) } static int __blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned int len, - gfp_t gfp_mask) + struct rq_map_data *map_data, void __user *ubuf, + unsigned int len, gfp_t gfp_mask) { unsigned long uaddr; unsigned int alignment; @@ -57,10 +57,10 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, */ uaddr = (unsigned long) ubuf; alignment = queue_dma_alignment(q) | q->dma_pad_mask; - if (!(uaddr & alignment) && !(len & alignment)) + if (!(uaddr & alignment) && !(len & alignment) && !map_data) bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else - bio = bio_copy_user(q, uaddr, len, reading, gfp_mask); + bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -89,6 +89,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request structure to fill + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @ubuf: the user buffer * @len: length of user data * @gfp_mask: memory allocation flags @@ -107,7 +108,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * unmapping. */ int blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned long len, gfp_t gfp_mask) + struct rq_map_data *map_data, void __user *ubuf, + unsigned long len, gfp_t gfp_mask) { unsigned long bytes_read = 0; struct bio *bio = NULL; @@ -134,7 +136,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (end - start > BIO_MAX_PAGES) map_len -= PAGE_SIZE; - ret = __blk_rq_map_user(q, rq, ubuf, map_len, gfp_mask); + ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, + gfp_mask); if (ret < 0) goto unmap_rq; if (!bio) @@ -159,6 +162,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to map data to + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iov: pointer to the iovec * @iov_count: number of elements in the iovec * @len: I/O byte count @@ -178,8 +182,8 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct sg_iovec *iov, int iov_count, unsigned int len, - gfp_t gfp_mask) + struct rq_map_data *map_data, struct sg_iovec *iov, + int iov_count, unsigned int len, gfp_t gfp_mask) { struct bio *bio; int i, read = rq_data_dir(rq) == READ; @@ -197,8 +201,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } } - if (unaligned || (q->dma_pad_mask & len)) - bio = bio_copy_user_iov(q, iov, iov_count, read, gfp_mask); + if (unaligned || (q->dma_pad_mask & len) || map_data) + bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, + gfp_mask); else bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); @@ -220,6 +225,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, rq->buffer = rq->data = NULL; return 0; } +EXPORT_SYMBOL(blk_rq_map_user_iov); /** * blk_rq_unmap_user - unmap a request with user data diff --git a/block/bsg.c b/block/bsg.c index e7a142e9916..56cb343c76d 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -283,8 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) next_rq->cmd_type = rq->cmd_type; dxferp = (void*)(unsigned long)hdr->din_xferp; - ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len, - GFP_KERNEL); + ret = blk_rq_map_user(q, next_rq, NULL, dxferp, + hdr->din_xfer_len, GFP_KERNEL); if (ret) goto out; } @@ -299,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) dxfer_len = 0; if (dxfer_len) { - ret = blk_rq_map_user(q, rq, dxferp, dxfer_len, GFP_KERNEL); + ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len, + GFP_KERNEL); if (ret) goto out; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index f49d6a11a69..c34272a348f 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -314,11 +314,11 @@ static int sg_io(struct file *file, struct request_queue *q, goto out; } - ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, + ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, hdr->dxfer_len, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len, + ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, GFP_KERNEL); if (ret) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index e861d24a6d3..d47f2f80acc 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len, GFP_KERNEL); + ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 2a4fd820d61..3117bb106b5 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len, GFP_KERNEL); + err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, diff --git a/fs/bio.c b/fs/bio.c index 3d2e9ad2472..a2f072647cd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -439,16 +439,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, struct bio_map_data { struct bio_vec *iovecs; - int nr_sgvecs; struct sg_iovec *sgvecs; + int nr_sgvecs; + int is_our_pages; }; static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - struct sg_iovec *iov, int iov_count) + struct sg_iovec *iov, int iov_count, + int is_our_pages) { memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); bmd->nr_sgvecs = iov_count; + bmd->is_our_pages = is_our_pages; bio->bi_private = bmd; } @@ -483,7 +486,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, } static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, - struct sg_iovec *iov, int iov_count, int uncopy) + struct sg_iovec *iov, int iov_count, int uncopy, + int do_free_page) { int ret = 0, i; struct bio_vec *bvec; @@ -526,7 +530,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, } } - if (uncopy) + if (do_free_page) __free_page(bvec->bv_page); } @@ -545,7 +549,8 @@ int bio_uncopy_user(struct bio *bio) struct bio_map_data *bmd = bio->bi_private; int ret; - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1, + bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); @@ -555,6 +560,7 @@ int bio_uncopy_user(struct bio *bio) /** * bio_copy_user_iov - copy user data to bio * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not @@ -564,8 +570,10 @@ int bio_uncopy_user(struct bio *bio) * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, - int iov_count, int write_to_vm, gfp_t gfp_mask) +struct bio *bio_copy_user_iov(struct request_queue *q, + struct rq_map_data *map_data, + struct sg_iovec *iov, int iov_count, + int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; struct bio_vec *bvec; @@ -600,13 +608,26 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, bio->bi_rw |= (!write_to_vm << BIO_RW); ret = 0; + i = 0; while (len) { - unsigned int bytes = PAGE_SIZE; + unsigned int bytes; + + if (map_data) + bytes = 1U << (PAGE_SHIFT + map_data->page_order); + else + bytes = PAGE_SIZE; if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | gfp_mask); + if (map_data) { + if (i == map_data->nr_entries) { + ret = -ENOMEM; + break; + } + page = map_data->pages[i++]; + } else + page = alloc_page(q->bounce_gfp | gfp_mask); if (!page) { ret = -ENOMEM; break; @@ -625,16 +646,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, * success */ if (!write_to_vm) { - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); if (ret) goto cleanup; } - bio_set_map_data(bmd, bio, iov, iov_count); + bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); return bio; cleanup: - bio_for_each_segment(bvec, bio, i) - __free_page(bvec->bv_page); + if (!map_data) + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); bio_put(bio); out_bmd: @@ -645,6 +667,7 @@ out_bmd: /** * bio_copy_user - copy user data to bio * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not @@ -654,15 +677,16 @@ out_bmd: * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, - unsigned int len, int write_to_vm, gfp_t gfp_mask) +struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, + unsigned long uaddr, unsigned int len, + int write_to_vm, gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_copy_user_iov(q, &iov, 1, write_to_vm, gfp_mask); + return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); } static struct bio *__bio_map_user_iov(struct request_queue *q, @@ -1028,7 +1052,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, bio->bi_private = bmd; bio->bi_end_io = bio_copy_kern_endio; - bio_set_map_data(bmd, bio, &iov, 1); + bio_set_map_data(bmd, bio, &iov, 1, 1); return bio; cleanup: bio_for_each_segment(bvec, bio, i) diff --git a/include/linux/bio.h b/include/linux/bio.h index 200b185c3e8..bc386cd5e99 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; +struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, struct sg_iovec *, int, int, gfp_t); @@ -337,9 +338,10 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, - unsigned int, int, gfp_t); -extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, +extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, + unsigned long, unsigned int, int, gfp_t); +extern struct bio *bio_copy_user_iov(struct request_queue *, + struct rq_map_data *, struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 00e388d0e22..358ac423ed2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -642,6 +642,12 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_MMU */ +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; +}; + struct req_iterator { int i; struct bio *bio; @@ -711,11 +717,13 @@ extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, - void __user *, unsigned long, gfp_t); + struct rq_map_data *, void __user *, unsigned long, + gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int, gfp_t); + struct rq_map_data *, struct sg_iovec *, int, + unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3-70-g09d2 From 4d8ab62e087d9300883b82c2662e73e6eef803a3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 15:05:57 +0900 Subject: bio: convert bio_copy_kern to use bio_copy_user bio_copy_kern and bio_copy_user are very similar. This converts bio_copy_kern to use bio_copy_user. Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- fs/bio.c | 54 ++++-------------------------------------------------- 1 file changed, 4 insertions(+), 50 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index a2f072647cd..9d68ddb89b7 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -995,48 +995,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask, int reading) { - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - const int nr_pages = end - start; struct bio *bio; struct bio_vec *bvec; - struct bio_map_data *bmd; - int i, ret; - struct sg_iovec iov; - - iov.iov_base = data; - iov.iov_len = len; - - bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); - if (!bmd) - return ERR_PTR(-ENOMEM); - - ret = -ENOMEM; - bio = bio_alloc(gfp_mask, nr_pages); - if (!bio) - goto out_bmd; - - while (len) { - struct page *page; - unsigned int bytes = PAGE_SIZE; - - if (bytes > len) - bytes = len; - - page = alloc_page(q->bounce_gfp | gfp_mask); - if (!page) { - ret = -ENOMEM; - goto cleanup; - } - - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { - ret = -EINVAL; - goto cleanup; - } + int i; - len -= bytes; - } + bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); + if (IS_ERR(bio)) + return bio; if (!reading) { void *p = data; @@ -1049,20 +1014,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, } } - bio->bi_private = bmd; bio->bi_end_io = bio_copy_kern_endio; - bio_set_map_data(bmd, bio, &iov, 1, 1); return bio; -cleanup: - bio_for_each_segment(bvec, bio, i) - __free_page(bvec->bv_page); - - bio_put(bio); -out_bmd: - bio_free_map_data(bmd); - - return ERR_PTR(ret); } /* -- cgit v1.2.3-70-g09d2 From 818827669d85b84241696ffef2de485db46b0b5e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 16:20:19 +0900 Subject: block: make blk_rq_map_user take a NULL user-space buffer This patch changes blk_rq_map_user to accept a NULL user-space buffer with a READ command if rq_map_data is not NULL. Thus a caller can pass page frames to lk_rq_map_user to just set up a request and bios with page frames propely. bio_uncopy_user (called via blk_rq_unmap_user) doesn't copy data to user space with such request. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-map.c | 16 ++++++++++++---- fs/bio.c | 8 ++++---- include/linux/bio.h | 1 + 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'fs/bio.c') diff --git a/block/blk-map.c b/block/blk-map.c index 572140cda5f..4849fa36161 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -42,7 +42,7 @@ static int __blk_rq_unmap_user(struct bio *bio) static int __blk_rq_map_user(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, void __user *ubuf, - unsigned int len, gfp_t gfp_mask) + unsigned int len, int null_mapped, gfp_t gfp_mask) { unsigned long uaddr; struct bio *bio, *orig_bio; @@ -63,6 +63,9 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, if (IS_ERR(bio)) return PTR_ERR(bio); + if (null_mapped) + bio->bi_flags |= (1 << BIO_NULL_MAPPED); + orig_bio = bio; blk_queue_bounce(q, &bio); @@ -111,12 +114,17 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, { unsigned long bytes_read = 0; struct bio *bio = NULL; - int ret; + int ret, null_mapped = 0; if (len > (q->max_hw_sectors << 9)) return -EINVAL; - if (!len || !ubuf) + if (!len) return -EINVAL; + if (!ubuf) { + if (!map_data || rq_data_dir(rq) != READ) + return -EINVAL; + null_mapped = 1; + } while (bytes_read != len) { unsigned long map_len, end, start; @@ -135,7 +143,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, map_len -= PAGE_SIZE; ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, - gfp_mask); + null_mapped, gfp_mask); if (ret < 0) goto unmap_rq; if (!bio) diff --git a/fs/bio.c b/fs/bio.c index 9d68ddb89b7..355302985e2 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -547,11 +547,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret; - - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1, - bmd->is_our_pages); + int ret = 0; + if (!bio_flagged(bio, BIO_NULL_MAPPED)) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, 1, bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); return ret; diff --git a/include/linux/bio.h b/include/linux/bio.h index bc386cd5e99..7af373f253d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -108,6 +108,7 @@ struct bio { #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* -- cgit v1.2.3-70-g09d2 From 0a0d96b03a1f3bfd6bc3ea08008699e8e59fccd9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Sep 2008 13:17:37 +0200 Subject: block: add bio_kmalloc() Not all callers need (or want!) the mempool backing guarentee, it essentially means that you can only use bio_alloc() for short allocations and not for preallocating some bio's at setup or init time. So add bio_kmalloc() which does the same thing as bio_alloc(), except it just uses kmalloc() as the backing instead of the bio mempools. Signed-off-by: Jens Axboe --- fs/bio.c | 96 +++++++++++++++++++++++++++++++++++++++++------------ include/linux/bio.h | 1 + 2 files changed, 76 insertions(+), 21 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 355302985e2..e56e7685af9 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct struct bio_vec *bvl; /* - * see comment near bvec_array define! + * If 'bs' is given, lookup the pool and do the mempool alloc. + * If not, this is a bio_kmalloc() allocation and just do a + * kzalloc() for the exact number of vecs right away. */ - switch (nr) { - case 1 : *idx = 0; break; - case 2 ... 4: *idx = 1; break; - case 5 ... 16: *idx = 2; break; - case 17 ... 64: *idx = 3; break; - case 65 ... 128: *idx = 4; break; - case 129 ... BIO_MAX_PAGES: *idx = 5; break; + if (bs) { + /* + * see comment near bvec_array define! + */ + switch (nr) { + case 1: + *idx = 0; + break; + case 2 ... 4: + *idx = 1; + break; + case 5 ... 16: + *idx = 2; + break; + case 17 ... 64: + *idx = 3; + break; + case 65 ... 128: + *idx = 4; + break; + case 129 ... BIO_MAX_PAGES: + *idx = 5; + break; default: return NULL; - } - /* - * idx now points to the pool we want to allocate from - */ + } - bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) - memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); + /* + * idx now points to the pool we want to allocate from + */ + bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); + if (bvl) + memset(bvl, 0, + bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); + } else + bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); return bvl; } @@ -107,6 +128,12 @@ static void bio_fs_destructor(struct bio *bio) bio_free(bio, fs_bio_set); } +static void bio_kmalloc_destructor(struct bio *bio) +{ + kfree(bio->bi_io_vec); + kfree(bio); +} + void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); @@ -119,19 +146,25 @@ void bio_init(struct bio *bio) * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator * @nr_iovecs: number of iovecs to pre-allocate - * @bs: the bio_set to allocate from + * @bs: the bio_set to allocate from. If %NULL, just use kmalloc * * Description: - * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. + * bio_alloc_bioset will first try its own mempool to satisfy the allocation. * If %__GFP_WAIT is set then we will block on the internal pool waiting - * for a &struct bio to become free. + * for a &struct bio to become free. If a %NULL @bs is passed in, we will + * fall back to just using @kmalloc to allocate the required memory. * * allocate bio and iovecs from the memory pools specified by the - * bio_set structure. + * bio_set structure, or @kmalloc if none given. **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { - struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); + struct bio *bio; + + if (bs) + bio = mempool_alloc(bs->bio_pool, gfp_mask); + else + bio = kmalloc(sizeof(*bio), gfp_mask); if (likely(bio)) { struct bio_vec *bvl = NULL; @@ -142,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); if (unlikely(!bvl)) { - mempool_free(bio, bs->bio_pool); + if (bs) + mempool_free(bio, bs->bio_pool); + else + kfree(bio); bio = NULL; goto out; } @@ -165,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) return bio; } +/* + * Like bio_alloc(), but doesn't use a mempool backing. This means that + * it CAN fail, but while bio_alloc() can only be used for allocations + * that have a short (finite) life span, bio_kmalloc() should be used + * for more permanent bio allocations (like allocating some bio's for + * initalization or setup purposes). + */ +struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) +{ + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); + + if (bio) + bio->bi_destructor = bio_kmalloc_destructor; + + return bio; +} + void zero_fill_bio(struct bio *bio) { unsigned long flags; @@ -1349,6 +1402,7 @@ static int __init init_bio(void) subsys_initcall(init_bio); EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_kmalloc); EXPORT_SYMBOL(bio_put); EXPORT_SYMBOL(bio_free); EXPORT_SYMBOL(bio_endio); diff --git a/include/linux/bio.h b/include/linux/bio.h index 7af373f253d..6520ee1a3f6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -308,6 +308,7 @@ extern struct bio_set *bioset_create(int, int); extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_kmalloc(gfp_t, int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); -- cgit v1.2.3-70-g09d2 From ad3316bf4eeb53c89164f759767f911072b56203 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Oct 2008 22:42:53 -0400 Subject: block: Find bio sector offset given idx and offset Helper function to find the sector offset in a bio given bvec index and page offset. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/bio.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 2 files changed, 37 insertions(+) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index e56e7685af9..a5af5809f56 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1300,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) return bp; } +/** + * bio_sector_offset - Find hardware sector offset in bio + * @bio: bio to inspect + * @index: bio_vec index + * @offset: offset in bv_page + * + * Return the number of hardware sectors between beginning of bio + * and an end point indicated by a bio_vec index and an offset + * within that vector's page. + */ +sector_t bio_sector_offset(struct bio *bio, unsigned short index, + unsigned int offset) +{ + unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); + struct bio_vec *bv; + sector_t sectors; + int i; + + sectors = 0; + + if (index >= bio->bi_idx) + index = bio->bi_vcnt - 1; + + __bio_for_each_segment(bv, bio, i, 0) { + if (i == index) { + if (offset > bv->bv_offset) + sectors += (offset - bv->bv_offset) / sector_sz; + break; + } + + sectors += bv->bv_len / sector_sz; + } + + return sectors; +} +EXPORT_SYMBOL(bio_sector_offset); /* * create memory pools for biovec's in a bio_set. diff --git a/include/linux/bio.h b/include/linux/bio.h index d86d39d490e..fe12d0f9eba 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -327,6 +327,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); +extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; -- cgit v1.2.3-70-g09d2 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/raid0.c | 2 +- drivers/md/raid10.c | 2 +- fs/bio.c | 9 ++++----- include/linux/bio.h | 4 +--- 6 files changed, 9 insertions(+), 12 deletions(-) (limited to 'fs/bio.c') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index e1a90bbb474..0e077150568 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; - bp = bio_split(bio, bio_split_pool, first_sectors); + bp = bio_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index c80ea90593d..b9cbee688fa 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -353,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f52f442a735..53508a8a981 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -427,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5f990133f5e..8bdc9bfc288 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -817,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/fs/bio.c b/fs/bio.c index a5af5809f56..77a55bcceed 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -30,7 +30,7 @@ static struct kmem_cache *bio_slab __read_mostly; -mempool_t *bio_split_pool __read_mostly; +static mempool_t *bio_split_pool __read_mostly; /* * if you change this list, also change bvec_alloc or things will @@ -1256,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) * split a bio - only worry about a bio with a single page * in it's iovec */ -struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) +struct bio_pair *bio_split(struct bio *bi, int first_sectors) { - struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); + struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); if (!bp) return bp; @@ -1292,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio2.bi_end_io = bio_pair_end_2; bp->bio1.bi_private = bi; - bp->bio2.bi_private = pool; + bp->bio2.bi_private = bio_split_pool; if (bio_integrity(bi)) bio_integrity_split(bi, bp, first_sectors); @@ -1455,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_split_pool); EXPORT_SYMBOL(bio_copy_user); EXPORT_SYMBOL(bio_uncopy_user); EXPORT_SYMBOL(bioset_create); diff --git a/include/linux/bio.h b/include/linux/bio.h index fe12d0f9eba..fb97221d7c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,9 +300,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); -- cgit v1.2.3-70-g09d2