diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 452 |
1 files changed, 240 insertions, 212 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bb2cd3ce9b0..7e469260fe5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -163,7 +163,6 @@ struct mapped_device { * io objects are allocated from here. */ mempool_t *io_pool; - mempool_t *tio_pool; struct bio_set *bs; @@ -197,7 +196,6 @@ struct mapped_device { */ struct dm_md_mempools { mempool_t *io_pool; - mempool_t *tio_pool; struct bio_set *bs; }; @@ -205,12 +203,6 @@ struct dm_md_mempools { static struct kmem_cache *_io_cache; static struct kmem_cache *_rq_tio_cache; -/* - * Unused now, and needs to be deleted. But since io_pool is overloaded and it's - * still used for _io_cache, I'm leaving this for a later cleanup - */ -static struct kmem_cache *_rq_bio_info_cache; - static int __init local_init(void) { int r = -ENOMEM; @@ -224,13 +216,9 @@ static int __init local_init(void) if (!_rq_tio_cache) goto out_free_io_cache; - _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); - if (!_rq_bio_info_cache) - goto out_free_rq_tio_cache; - r = dm_uevent_init(); if (r) - goto out_free_rq_bio_info_cache; + goto out_free_rq_tio_cache; _major = major; r = register_blkdev(_major, _name); @@ -244,8 +232,6 @@ static int __init local_init(void) out_uevent_exit: dm_uevent_exit(); -out_free_rq_bio_info_cache: - kmem_cache_destroy(_rq_bio_info_cache); out_free_rq_tio_cache: kmem_cache_destroy(_rq_tio_cache); out_free_io_cache: @@ -256,7 +242,6 @@ out_free_io_cache: static void local_exit(void) { - kmem_cache_destroy(_rq_bio_info_cache); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); @@ -448,12 +433,12 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, gfp_t gfp_mask) { - return mempool_alloc(md->tio_pool, gfp_mask); + return mempool_alloc(md->io_pool, gfp_mask); } static void free_rq_tio(struct dm_rq_target_io *tio) { - mempool_free(tio, tio->md->tio_pool); + mempool_free(tio, tio->md->io_pool); } static int md_in_flight(struct mapped_device *md) @@ -985,12 +970,13 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); -static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) +static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; struct bio *clone = &tio->clone; + struct dm_target *ti = tio->ti; clone->bi_end_io = clone_endio; clone->bi_private = tio; @@ -1031,32 +1017,54 @@ struct clone_info { unsigned short idx; }; +static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) +{ + bio->bi_sector = sector; + bio->bi_size = to_bytes(len); +} + +static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count) +{ + bio->bi_idx = idx; + bio->bi_vcnt = idx + bv_count; + bio->bi_flags &= ~(1 << BIO_SEG_VALID); +} + +static void clone_bio_integrity(struct bio *bio, struct bio *clone, + unsigned short idx, unsigned len, unsigned offset, + unsigned trim) +{ + if (!bio_integrity(bio)) + return; + + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (trim) + bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); +} + /* * Creates a little bio that just does part of a bvec. */ -static void split_bvec(struct dm_target_io *tio, struct bio *bio, - sector_t sector, unsigned short idx, unsigned int offset, - unsigned int len, struct bio_set *bs) +static void clone_split_bio(struct dm_target_io *tio, struct bio *bio, + sector_t sector, unsigned short idx, + unsigned offset, unsigned len) { struct bio *clone = &tio->clone; struct bio_vec *bv = bio->bi_io_vec + idx; *clone->bi_io_vec = *bv; - clone->bi_sector = sector; + bio_setup_sector(clone, sector, len); + clone->bi_bdev = bio->bi_bdev; clone->bi_rw = bio->bi_rw; clone->bi_vcnt = 1; - clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; - if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); - bio_integrity_trim(clone, - bio_sector_offset(bio, idx, offset), len); - } + clone_bio_integrity(bio, clone, idx, len, offset, 1); } /* @@ -1064,29 +1072,23 @@ static void split_bvec(struct dm_target_io *tio, struct bio *bio, */ static void clone_bio(struct dm_target_io *tio, struct bio *bio, sector_t sector, unsigned short idx, - unsigned short bv_count, unsigned int len, - struct bio_set *bs) + unsigned short bv_count, unsigned len) { struct bio *clone = &tio->clone; + unsigned trim = 0; __bio_clone(clone, bio); - clone->bi_sector = sector; - clone->bi_idx = idx; - clone->bi_vcnt = idx + bv_count; - clone->bi_size = to_bytes(len); - clone->bi_flags &= ~(1 << BIO_SEG_VALID); - - if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); - - if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) - bio_integrity_trim(clone, - bio_sector_offset(bio, idx, 0), len); - } + bio_setup_sector(clone, sector, len); + bio_setup_bv(clone, idx, bv_count); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + trim = 1; + clone_bio_integrity(bio, clone, idx, len, 0, trim); } static struct dm_target_io *alloc_tio(struct clone_info *ci, - struct dm_target *ti, int nr_iovecs) + struct dm_target *ti, int nr_iovecs, + unsigned target_bio_nr) { struct dm_target_io *tio; struct bio *clone; @@ -1097,96 +1099,104 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); - tio->target_request_nr = 0; + tio->target_bio_nr = target_bio_nr; return tio; } -static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, - unsigned request_nr, sector_t len) +static void __clone_and_map_simple_bio(struct clone_info *ci, + struct dm_target *ti, + unsigned target_bio_nr, sector_t len) { - struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); + struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; - tio->target_request_nr = request_nr; - /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush * and discard, so no need for concern about wasted bvec allocations. */ - __bio_clone(clone, ci->bio); - if (len) { - clone->bi_sector = ci->sector; - clone->bi_size = to_bytes(len); - } + if (len) + bio_setup_sector(clone, ci->sector, len); - __map_bio(ti, tio); + __map_bio(tio); } -static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, - unsigned num_requests, sector_t len) +static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + unsigned num_bios, sector_t len) { - unsigned request_nr; + unsigned target_bio_nr; - for (request_nr = 0; request_nr < num_requests; request_nr++) - __issue_target_request(ci, ti, request_nr, len); + for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) + __clone_and_map_simple_bio(ci, ti, target_bio_nr, len); } -static int __clone_and_map_empty_flush(struct clone_info *ci) +static int __send_empty_flush(struct clone_info *ci) { unsigned target_nr = 0; struct dm_target *ti; BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __issue_target_requests(ci, ti, ti->num_flush_requests, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); return 0; } -/* - * Perform all io with a single clone. - */ -static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) +static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, + sector_t sector, int nr_iovecs, + unsigned short idx, unsigned short bv_count, + unsigned offset, unsigned len, + unsigned split_bvec) { struct bio *bio = ci->bio; struct dm_target_io *tio; + unsigned target_bio_nr; + unsigned num_target_bios = 1; + + /* + * Does the target want to receive duplicate copies of the bio? + */ + if (bio_data_dir(bio) == WRITE && ti->num_write_bios) + num_target_bios = ti->num_write_bios(ti, bio); - tio = alloc_tio(ci, ti, bio->bi_max_vecs); - clone_bio(tio, bio, ci->sector, ci->idx, bio->bi_vcnt - ci->idx, - ci->sector_count, ci->md->bs); - __map_bio(ti, tio); - ci->sector_count = 0; + for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { + tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); + if (split_bvec) + clone_split_bio(tio, bio, sector, idx, offset, len); + else + clone_bio(tio, bio, sector, idx, bv_count, len); + __map_bio(tio); + } } -typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); +typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); -static unsigned get_num_discard_requests(struct dm_target *ti) +static unsigned get_num_discard_bios(struct dm_target *ti) { - return ti->num_discard_requests; + return ti->num_discard_bios; } -static unsigned get_num_write_same_requests(struct dm_target *ti) +static unsigned get_num_write_same_bios(struct dm_target *ti) { - return ti->num_write_same_requests; + return ti->num_write_same_bios; } typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) { - return ti->split_discard_requests; + return ti->split_discard_bios; } -static int __clone_and_map_changing_extent_only(struct clone_info *ci, - get_num_requests_fn get_num_requests, - is_split_required_fn is_split_required) +static int __send_changing_extent_only(struct clone_info *ci, + get_num_bios_fn get_num_bios, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; - unsigned num_requests; + unsigned num_bios; do { ti = dm_table_find_target(ci->map, ci->sector); @@ -1199,8 +1209,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, * reconfiguration might also have changed that since the * check was performed. */ - num_requests = get_num_requests ? get_num_requests(ti) : 0; - if (!num_requests) + num_bios = get_num_bios ? get_num_bios(ti) : 0; + if (!num_bios) return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) @@ -1208,7 +1218,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, else len = min(ci->sector_count, max_io_len(ci->sector, ti)); - __issue_target_requests(ci, ti, num_requests, len); + __send_duplicate_bios(ci, ti, num_bios, len); ci->sector += len; } while (ci->sector_count -= len); @@ -1216,108 +1226,129 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, return 0; } -static int __clone_and_map_discard(struct clone_info *ci) +static int __send_discard(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, - is_split_required_for_discard); + return __send_changing_extent_only(ci, get_num_discard_bios, + is_split_required_for_discard); } -static int __clone_and_map_write_same(struct clone_info *ci) +static int __send_write_same(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); + return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); } -static int __clone_and_map(struct clone_info *ci) +/* + * Find maximum number of sectors / bvecs we can process with a single bio. + */ +static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx) { struct bio *bio = ci->bio; - struct dm_target *ti; - sector_t len = 0, max; - struct dm_target_io *tio; - - if (unlikely(bio->bi_rw & REQ_DISCARD)) - return __clone_and_map_discard(ci); - else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) - return __clone_and_map_write_same(ci); + sector_t bv_len, total_len = 0; - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; + for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) { + bv_len = to_sector(bio->bi_io_vec[*idx].bv_len); - max = max_io_len(ci->sector, ti); + if (bv_len > max) + break; - if (ci->sector_count <= max) { - /* - * Optimise for the simple case where we can do all of - * the remaining io with a single clone. - */ - __clone_and_map_simple(ci, ti); + max -= bv_len; + total_len += bv_len; + } - } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { - /* - * There are some bvecs that don't span targets. - * Do as many of these as possible. - */ - int i; - sector_t remaining = max; - sector_t bv_len; + return total_len; +} - for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { - bv_len = to_sector(bio->bi_io_vec[i].bv_len); +static int __split_bvec_across_targets(struct clone_info *ci, + struct dm_target *ti, sector_t max) +{ + struct bio *bio = ci->bio; + struct bio_vec *bv = bio->bi_io_vec + ci->idx; + sector_t remaining = to_sector(bv->bv_len); + unsigned offset = 0; + sector_t len; - if (bv_len > remaining) - break; + do { + if (offset) { + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; - remaining -= bv_len; - len += bv_len; + max = max_io_len(ci->sector, ti); } - tio = alloc_tio(ci, ti, bio->bi_max_vecs); - clone_bio(tio, bio, ci->sector, ci->idx, i - ci->idx, len, - ci->md->bs); - __map_bio(ti, tio); + len = min(remaining, max); + + __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0, + bv->bv_offset + offset, len, 1); ci->sector += len; ci->sector_count -= len; - ci->idx = i; + offset += to_bytes(len); + } while (remaining -= len); - } else { - /* - * Handle a bvec that must be split between two or more targets. - */ - struct bio_vec *bv = bio->bi_io_vec + ci->idx; - sector_t remaining = to_sector(bv->bv_len); - unsigned int offset = 0; + ci->idx++; + + return 0; +} + +/* + * Select the correct strategy for processing a non-flush bio. + */ +static int __split_and_process_non_flush(struct clone_info *ci) +{ + struct bio *bio = ci->bio; + struct dm_target *ti; + sector_t len, max; + int idx; + + if (unlikely(bio->bi_rw & REQ_DISCARD)) + return __send_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __send_write_same(ci); - do { - if (offset) { - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; - max = max_io_len(ci->sector, ti); - } + max = max_io_len(ci->sector, ti); - len = min(remaining, max); + /* + * Optimise for the simple case where we can do all of + * the remaining io with a single clone. + */ + if (ci->sector_count <= max) { + __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, + ci->idx, bio->bi_vcnt - ci->idx, 0, + ci->sector_count, 0); + ci->sector_count = 0; + return 0; + } - tio = alloc_tio(ci, ti, 1); - split_bvec(tio, bio, ci->sector, ci->idx, - bv->bv_offset + offset, len, ci->md->bs); + /* + * There are some bvecs that don't span targets. + * Do as many of these as possible. + */ + if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { + len = __len_within_target(ci, max, &idx); - __map_bio(ti, tio); + __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, + ci->idx, idx - ci->idx, 0, len, 0); - ci->sector += len; - ci->sector_count -= len; - offset += to_bytes(len); - } while (remaining -= len); + ci->sector += len; + ci->sector_count -= len; + ci->idx = idx; - ci->idx++; + return 0; } - return 0; + /* + * Handle a bvec that must be split between two or more targets. + */ + return __split_bvec_across_targets(ci, ti, max); } /* - * Split the bio into several clones and submit it to targets. + * Entry point to split a bio into clones and submit them to the targets. */ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) { @@ -1341,16 +1372,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.idx = bio->bi_idx; start_io_acct(ci.io); + if (bio->bi_rw & REQ_FLUSH) { ci.bio = &ci.md->flush_bio; ci.sector_count = 0; - error = __clone_and_map_empty_flush(&ci); + error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ } else { ci.bio = bio; ci.sector_count = bio_sectors(bio); while (ci.sector_count && !error) - error = __clone_and_map(&ci); + error = __split_and_process_non_flush(&ci); } /* drop the extra reference count */ @@ -1923,8 +1955,6 @@ static void free_dev(struct mapped_device *md) unlock_fs(md); bdput(md->bdev); destroy_workqueue(md->wq); - if (md->tio_pool) - mempool_destroy(md->tio_pool); if (md->io_pool) mempool_destroy(md->io_pool); if (md->bs) @@ -1947,24 +1977,33 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { - /* - * The md already has necessary mempools. Reload just the - * bioset because front_pad may have changed because - * a different table was loaded. - */ - bioset_free(md->bs); - md->bs = p->bs; - p->bs = NULL; + if (md->io_pool && md->bs) { + /* The md already has necessary mempools. */ + if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { + /* + * Reload bioset because front_pad may have changed + * because a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; + } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) { + /* + * There's no need to reload with request-based dm + * because the size of front_pad doesn't change. + * Note for future: If you are to reload bioset, + * prep-ed requests in the queue may refer + * to bio from the old bioset, so you must walk + * through the queue to unprep. + */ + } goto out; } - BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); + BUG_ON(!p || md->io_pool || md->bs); md->io_pool = p->io_pool; p->io_pool = NULL; - md->tio_pool = p->tio_pool; - p->tio_pool = NULL; md->bs = p->bs; p->bs = NULL; @@ -2395,7 +2434,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *live_map, *map = ERR_PTR(-EINVAL); + struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2418,10 +2457,12 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) dm_table_put(live_map); } - r = dm_calculate_queue_limits(table, &limits); - if (r) { - map = ERR_PTR(r); - goto out; + if (!live_map) { + r = dm_calculate_queue_limits(table, &limits); + if (r) { + map = ERR_PTR(r); + goto out; + } } map = __bind(md, table, &limits); @@ -2719,52 +2760,42 @@ EXPORT_SYMBOL_GPL(dm_noflush_suspending); struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { - struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); - unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; + struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); + struct kmem_cache *cachep; + unsigned int pool_size; + unsigned int front_pad; if (!pools) return NULL; - per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + if (type == DM_TYPE_BIO_BASED) { + cachep = _io_cache; + pool_size = 16; + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); + } else if (type == DM_TYPE_REQUEST_BASED) { + cachep = _rq_tio_cache; + pool_size = MIN_IOS; + front_pad = offsetof(struct dm_rq_clone_bio_info, clone); + /* per_bio_data_size is not used. See __bind_mempools(). */ + WARN_ON(per_bio_data_size != 0); + } else + goto out; - pools->io_pool = (type == DM_TYPE_BIO_BASED) ? - mempool_create_slab_pool(MIN_IOS, _io_cache) : - mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); + pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); if (!pools->io_pool) - goto free_pools_and_out; - - pools->tio_pool = NULL; - if (type == DM_TYPE_REQUEST_BASED) { - pools->tio_pool = mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); - if (!pools->tio_pool) - goto free_io_pool_and_out; - } + goto out; - pools->bs = (type == DM_TYPE_BIO_BASED) ? - bioset_create(pool_size, - per_bio_data_size + offsetof(struct dm_target_io, clone)) : - bioset_create(pool_size, - offsetof(struct dm_rq_clone_bio_info, clone)); + pools->bs = bioset_create(pool_size, front_pad); if (!pools->bs) - goto free_tio_pool_and_out; + goto out; if (integrity && bioset_integrity_create(pools->bs, pool_size)) - goto free_bioset_and_out; + goto out; return pools; -free_bioset_and_out: - bioset_free(pools->bs); - -free_tio_pool_and_out: - if (pools->tio_pool) - mempool_destroy(pools->tio_pool); - -free_io_pool_and_out: - mempool_destroy(pools->io_pool); - -free_pools_and_out: - kfree(pools); +out: + dm_free_md_mempools(pools); return NULL; } @@ -2777,9 +2808,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) if (pools->io_pool) mempool_destroy(pools->io_pool); - if (pools->tio_pool) - mempool_destroy(pools->tio_pool); - if (pools->bs) bioset_free(pools->bs); |