From d15b774c2920d55e3d58275c97fbe3adc3afde38 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm: fix idr leak on module removal Destroy _minor_idr when unloading the core dm module. (Found by kmemleak.) Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0cf68b47887..41abc6dd481 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,6 +37,8 @@ static const char *_name = DM_NAME; static unsigned int major = 0; static unsigned int _major = 0; +static DEFINE_IDR(_minor_idr); + static DEFINE_SPINLOCK(_minor_lock); /* * For bio-based dm. @@ -313,6 +315,12 @@ static void __exit dm_exit(void) while (i--) _exits[i](); + + /* + * Should be empty by this point. + */ + idr_remove_all(&_minor_idr); + idr_destroy(&_minor_idr); } /* @@ -1705,8 +1713,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ -static DEFINE_IDR(_minor_idr); - static void free_minor(int minor) { spin_lock(&_minor_lock); -- cgit v1.2.3-70-g09d2 From 936688d7eb0f39be96c5791be1a04994cc8d6aa0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm table: fix discard support Remove 'discards_supported' from the dm_table structure. The same information can be easily discovered from the table's target(s) in dm_table_supports_discards(). Before this fix dm_table_supports_discards() would skip checking the individual targets' 'discards_supported' flag if any one target in the table didn't set num_discard_requests > 0. Now the per-target 'discards_supported' flag is effective at insuring the final DM device advertises discard support. But, to be clear, targets that don't support discards (!num_discard_requests) will not receive discard requests. Also DMWARN if a target sets 'discards_supported' override but forgets to set 'num_discard_requests'. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 15 +++++++-------- drivers/md/dm.c | 3 ++- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bfe9c2333ce..3909fa259f5 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -54,7 +54,6 @@ struct dm_table { sector_t *highs; struct dm_target *targets; - unsigned discards_supported:1; unsigned integrity_supported:1; /* @@ -209,7 +208,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); atomic_set(&t->holders, 0); - t->discards_supported = 1; if (!num_targets) num_targets = KEYS_PER_NODE; @@ -791,8 +789,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests) - t->discards_supported = 0; + if (!tgt->num_discard_requests && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + dm_device_name(t->md), type); return 0; @@ -1359,19 +1358,19 @@ bool dm_table_supports_discards(struct dm_table *t) struct dm_target *ti; unsigned i = 0; - if (!t->discards_supported) - return 0; - /* * Unless any target used by the table set discards_supported, * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets - * supporting discard must provide. + * supporting discard selectively must provide. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); + if (!ti->num_discard_requests) + continue; + if (ti->discards_supported) return 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 41abc6dd481..aeb0fa1ccfe 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1179,7 +1179,8 @@ static int __clone_and_map_discard(struct clone_info *ci) /* * Even though the device advertised discard support, - * reconfiguration might have changed that since the + * that does not mean every target supports it, and + * reconfiguration might also have changed that since the * check was performed. */ if (!ti->num_discard_requests) -- cgit v1.2.3-70-g09d2 From d5b9dd04bd74b774b8e8d93ced7a0d15ad403fa9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:04 +0100 Subject: dm: ignore merge_bvec for snapshots when safe Add a new flag DMF_MERGE_IS_OPTIONAL to struct mapped_device to indicate whether the device can accept bios larger than the size its merge function returns. When set, use this to send large bios to snapshots which can split them if necessary. Snapshot I/O may be significantly fragmented and this approach seems to improve peformance. Before the patch, dm_set_device_limits restricted bio size to page size if the underlying device had a merge function and the target didn't provide a merge function. After the patch, dm_set_device_limits restricts bio size to page size if the underlying device has a merge function, doesn't have DMF_MERGE_IS_OPTIONAL flag and the target doesn't provide a merge function. The snapshot target can't provide a merge function because when the merge function is called, it is impossible to determine where the bio will be remapped. Previously this led us to impose a 4k limit, which we can now remove if the snapshot store is located on a device without a merge function. Together with another patch for optimizing full chunk writes, it improves performance from 29MB/s to 40MB/s when writing to the filesystem on snapshot store. If the snapshot store is placed on a non-dm device with a merge function (such as md-raid), device mapper still limits all bios to page size. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 3 +-- drivers/md/dm.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm.h | 2 ++ 3 files changed, 64 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d58b200a353..03516c7f5be 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -540,8 +540,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, * If not we'll force DM to use PAGE_SIZE or * smaller I/O, just to be safe. */ - - if (q->merge_bvec_fn && !ti->type->merge) + if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) blk_limits_max_hw_sectors(limits, (unsigned int) (PAGE_SIZE >> 9)); return 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aeb0fa1ccfe..1000eaf984e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_MERGE_IS_OPTIONAL 6 /* * Work processed by per-device workqueue. @@ -1992,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size) i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); } +/* + * Return 1 if the queue has a compulsory merge_bvec_fn function. + * + * If this function returns 0, then the device is either a non-dm + * device without a merge_bvec_fn, or it is a dm device that is + * able to split any bios it receives that are too big. + */ +int dm_queue_merge_is_compulsory(struct request_queue *q) +{ + struct mapped_device *dev_md; + + if (!q->merge_bvec_fn) + return 0; + + if (q->make_request_fn == dm_request) { + dev_md = q->queuedata; + if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) + return 0; + } + + return 1; +} + +static int dm_device_merge_is_compulsory(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct block_device *bdev = dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + + return dm_queue_merge_is_compulsory(q); +} + +/* + * Return 1 if it is acceptable to ignore merge_bvec_fn based + * on the properties of the underlying devices. + */ +static int dm_table_merge_is_optional(struct dm_table *table) +{ + unsigned i = 0; + struct dm_target *ti; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) + return 0; + } + + return 1; +} + /* * Returns old map, which caller must destroy. */ @@ -2002,6 +2056,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct request_queue *q = md->queue; sector_t size; unsigned long flags; + int merge_is_optional; size = dm_table_get_size(t); @@ -2027,10 +2082,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); + merge_is_optional = dm_table_merge_is_optional(t); + write_lock_irqsave(&md->map_lock, flags); old_map = md->map; md->map = t; dm_table_set_restrictions(t, q, limits); + if (merge_is_optional) + set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); + else + clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); write_unlock_irqrestore(&md->map_lock, flags); return old_map; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1aaf16746da..6745dbd278a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); +int dm_queue_merge_is_compulsory(struct request_queue *q); + void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); void dm_set_md_type(struct mapped_device *md, unsigned type); -- cgit v1.2.3-70-g09d2 From ed8b752bccf2560e305e25125721d2f0ac759e88 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:08 +0100 Subject: dm table: set flush capability based on underlying devices DM has always advertised both REQ_FLUSH and REQ_FUA flush capabilities regardless of whether or not a given DM device's underlying devices also advertised a need for them. Block's flush-merge changes from 2.6.39 have proven to be more costly for DM devices. Performance regressions have been reported even when DM's underlying devices do not advertise that they have a write cache. Fix the performance regressions by configuring a DM device's flushing capabilities based on those of the underlying devices' capabilities. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm.c | 1 - 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 259ce99302f..986b8754bb0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1248,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t) blk_get_integrity(template_disk)); } +static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned flush = (*(unsigned *)data); + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && (q->flush_flags & flush); +} + +static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) +{ + struct dm_target *ti; + unsigned i = 0; + + /* + * Require at least one underlying device to support flushes. + * t->devices includes internal dm devices such as mirror logs + * so we need to use iterate_devices here, which targets + * supporting flushes must provide. + */ + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_flush_requests) + continue; + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_flush_capable, &flush)) + return 1; + } + + return 0; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { + unsigned flush = 0; + /* * Copy table's limits to the DM device's request_queue */ @@ -1261,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + if (dm_table_supports_flush(t, REQ_FLUSH)) { + flush |= REQ_FLUSH; + if (dm_table_supports_flush(t, REQ_FUA)) + flush |= REQ_FUA; + } + blk_queue_flush(q, flush); + dm_table_set_integrity(t); /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1000eaf984e..52b39f335bb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1808,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); blk_queue_merge_bvec(md->queue, dm_merge_bvec); - blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); } /* -- cgit v1.2.3-70-g09d2