1 files changed, 351 insertions, 116 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 7130505c242..e1650539cc2 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
 
 /*----------------------------------------------------------------*/
 
-#define PRISON_CELLS 1024
 #define MIGRATION_POOL_SIZE 128
 #define COMMIT_PERIOD HZ
 #define MIGRATION_COUNT_WINDOW 10
@@ -222,7 +221,13 @@ struct cache {
 	struct list_head need_commit_migrations;
 	sector_t migration_threshold;
 	wait_queue_head_t migration_wait;
-	atomic_t nr_migrations;
+	atomic_t nr_allocated_migrations;
+
+	/*
+	 * The number of in flight migrations that are performing
+	 * background io. eg, promotion, writeback.
+	 */
+	atomic_t nr_io_migrations;
 
 	wait_queue_head_t quiescing_wait;
 	atomic_t quiescing;
@@ -237,8 +242,9 @@ struct cache {
 	/*
 	 * origin_blocks entries, discarded if set.
 	 */
-	dm_oblock_t discard_nr_blocks;
+	dm_dblock_t discard_nr_blocks;
 	unsigned long *discard_bitset;
+	uint32_t discard_block_size; /* a power of 2 times sectors per block */
 
 	/*
 	 * Rather than reconstructing the table line for the status we just
@@ -258,7 +264,6 @@ struct cache {
 	struct dm_deferred_set *all_io_ds;
 
 	mempool_t *migration_pool;
-	struct dm_cache_migration *next_migration;
 
 	struct dm_cache_policy *policy;
 	unsigned policy_nr_args;
@@ -310,6 +315,7 @@ struct dm_cache_migration {
 	dm_cblock_t cblock;
 
 	bool err:1;
+	bool discard:1;
 	bool writeback:1;
 	bool demote:1;
 	bool promote:1;
@@ -349,10 +355,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel
 	dm_bio_prison_free_cell(cache->prison, cell);
 }
 
+static struct dm_cache_migration *alloc_migration(struct cache *cache)
+{
+	struct dm_cache_migration *mg;
+
+	mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+	if (mg) {
+		mg->cache = cache;
+		atomic_inc(&mg->cache->nr_allocated_migrations);
+	}
+
+	return mg;
+}
+
+static void free_migration(struct dm_cache_migration *mg)
+{
+	if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
+		wake_up(&mg->cache->migration_wait);
+
+	mempool_free(mg, mg->cache->migration_pool);
+}
+
 static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
 {
 	if (!p->mg) {
-		p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+		p->mg = alloc_migration(cache);
 		if (!p->mg)
 			return -ENOMEM;
 	}
@@ -381,7 +408,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
 		free_prison_cell(cache, p->cell1);
 
 	if (p->mg)
-		mempool_free(p->mg, cache->migration_pool);
+		free_migration(p->mg);
 }
 
 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
@@ -433,11 +460,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel
 
 /*----------------------------------------------------------------*/
 
-static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
 {
 	key->virtual = 0;
 	key->dev = 0;
-	key->block = from_oblock(oblock);
+	key->block_begin = from_oblock(begin);
+	key->block_end = from_oblock(end);
 }
 
 /*
@@ -447,15 +475,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
  */
 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
 
-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
-		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-		      cell_free_fn free_fn, void *free_context,
-		      struct dm_bio_prison_cell **cell_result)
+static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
+			    struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+			    cell_free_fn free_fn, void *free_context,
+			    struct dm_bio_prison_cell **cell_result)
 {
 	int r;
 	struct dm_cell_key key;
 
-	build_key(oblock, &key);
+	build_key(oblock_begin, oblock_end, &key);
 	r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
 	if (r)
 		free_fn(free_context, cell_prealloc);
@@ -463,6 +491,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock,
 	return r;
 }
 
+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+		      cell_free_fn free_fn, void *free_context,
+		      struct dm_bio_prison_cell **cell_result)
+{
+	dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
+	return bio_detain_range(cache, oblock, end, bio,
+				cell_prealloc, free_fn, free_context, cell_result);
+}
+
 static int get_cell(struct cache *cache,
 		    dm_oblock_t oblock,
 		    struct prealloc *structs,
@@ -474,7 +512,7 @@ static int get_cell(struct cache *cache,
 
 	cell_prealloc = prealloc_get_cell(structs);
 
-	build_key(oblock, &key);
+	build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
 	r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
 	if (r)
 		prealloc_put_cell(structs, cell_prealloc);
@@ -524,33 +562,57 @@ static dm_block_t block_div(dm_block_t b, uint32_t n)
 	return b;
 }
 
-static void set_discard(struct cache *cache, dm_oblock_t b)
+static dm_block_t oblocks_per_dblock(struct cache *cache)
+{
+	dm_block_t oblocks = cache->discard_block_size;
+
+	if (block_size_is_power_of_two(cache))
+		oblocks >>= cache->sectors_per_block_shift;
+	else
+		oblocks = block_div(oblocks, cache->sectors_per_block);
+
+	return oblocks;
+}
+
+static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+{
+	return to_dblock(block_div(from_oblock(oblock),
+				   oblocks_per_dblock(cache)));
+}
+
+static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
+{
+	return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
+}
+
+static void set_discard(struct cache *cache, dm_dblock_t b)
 {
 	unsigned long flags;
 
+	BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
 	atomic_inc(&cache->stats.discard_count);
 
 	spin_lock_irqsave(&cache->lock, flags);
-	set_bit(from_oblock(b), cache->discard_bitset);
+	set_bit(from_dblock(b), cache->discard_bitset);
 	spin_unlock_irqrestore(&cache->lock, flags);
 }
 
-static void clear_discard(struct cache *cache, dm_oblock_t b)
+static void clear_discard(struct cache *cache, dm_dblock_t b)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&cache->lock, flags);
-	clear_bit(from_oblock(b), cache->discard_bitset);
+	clear_bit(from_dblock(b), cache->discard_bitset);
 	spin_unlock_irqrestore(&cache->lock, flags);
 }
 
-static bool is_discarded(struct cache *cache, dm_oblock_t b)
+static bool is_discarded(struct cache *cache, dm_dblock_t b)
 {
 	int r;
 	unsigned long flags;
 
 	spin_lock_irqsave(&cache->lock, flags);
-	r = test_bit(from_oblock(b), cache->discard_bitset);
+	r = test_bit(from_dblock(b), cache->discard_bitset);
 	spin_unlock_irqrestore(&cache->lock, flags);
 
 	return r;
@@ -562,7 +624,8 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cache->lock, flags);
-	r = test_bit(from_oblock(b), cache->discard_bitset);
+	r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
+		     cache->discard_bitset);
 	spin_unlock_irqrestore(&cache->lock, flags);
 
 	return r;
@@ -687,7 +750,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
 	check_if_tick_bio_needed(cache, bio);
 	remap_to_origin(cache, bio);
 	if (bio_data_dir(bio) == WRITE)
-		clear_discard(cache, oblock);
+		clear_discard(cache, oblock_to_dblock(cache, oblock));
 }
 
 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@@ -697,7 +760,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
 	remap_to_cache(cache, bio, cblock);
 	if (bio_data_dir(bio) == WRITE) {
 		set_dirty(cache, oblock, cblock);
-		clear_discard(cache, oblock);
+		clear_discard(cache, oblock_to_dblock(cache, oblock));
 	}
 }
 
@@ -817,24 +880,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
  * Migration covers moving data from the origin device to the cache, or
  * vice versa.
  *--------------------------------------------------------------*/
-static void free_migration(struct dm_cache_migration *mg)
+static void inc_io_migrations(struct cache *cache)
 {
-	mempool_free(mg, mg->cache->migration_pool);
+	atomic_inc(&cache->nr_io_migrations);
 }
 
-static void inc_nr_migrations(struct cache *cache)
+static void dec_io_migrations(struct cache *cache)
 {
-	atomic_inc(&cache->nr_migrations);
-}
-
-static void dec_nr_migrations(struct cache *cache)
-{
-	atomic_dec(&cache->nr_migrations);
-
-	/*
-	 * Wake the worker in case we're suspending the target.
-	 */
-	wake_up(&cache->migration_wait);
+	atomic_dec(&cache->nr_io_migrations);
 }
 
 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
@@ -857,11 +910,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
 	wake_worker(cache);
 }
 
-static void cleanup_migration(struct dm_cache_migration *mg)
+static void free_io_migration(struct dm_cache_migration *mg)
 {
-	struct cache *cache = mg->cache;
+	dec_io_migrations(mg->cache);
 	free_migration(mg);
-	dec_nr_migrations(cache);
 }
 
 static void migration_failure(struct dm_cache_migration *mg)
@@ -886,7 +938,7 @@ static void migration_failure(struct dm_cache_migration *mg)
 		cell_defer(cache, mg->new_ocell, true);
 	}
 
-	cleanup_migration(mg);
+	free_io_migration(mg);
 }
 
 static void migration_success_pre_commit(struct dm_cache_migration *mg)
@@ -897,7 +949,7 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
 	if (mg->writeback) {
 		clear_dirty(cache, mg->old_oblock, mg->cblock);
 		cell_defer(cache, mg->old_ocell, false);
-		cleanup_migration(mg);
+		free_io_migration(mg);
 		return;
 
 	} else if (mg->demote) {
@@ -907,14 +959,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
 					     mg->old_oblock);
 			if (mg->promote)
 				cell_defer(cache, mg->new_ocell, true);
-			cleanup_migration(mg);
+			free_io_migration(mg);
 			return;
 		}
 	} else {
 		if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
 			DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
 			policy_remove_mapping(cache->policy, mg->new_oblock);
-			cleanup_migration(mg);
+			free_io_migration(mg);
 			return;
 		}
 	}
@@ -947,18 +999,22 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
 		} else {
 			if (mg->invalidate)
 				policy_remove_mapping(cache->policy, mg->old_oblock);
-			cleanup_migration(mg);
+			free_io_migration(mg);
 		}
 
 	} else {
-		clear_dirty(cache, mg->new_oblock, mg->cblock);
-		if (mg->requeue_holder)
+		if (mg->requeue_holder) {
+			clear_dirty(cache, mg->new_oblock, mg->cblock);
 			cell_defer(cache, mg->new_ocell, true);
-		else {
+		} else {
+			/*
+			 * The block was promoted via an overwrite, so it's dirty.
+			 */
+			set_dirty(cache, mg->new_oblock, mg->cblock);
 			bio_endio(mg->new_ocell->holder, 0);
 			cell_defer(cache, mg->new_ocell, false);
 		}
-		cleanup_migration(mg);
+		free_io_migration(mg);
 	}
 }
 
@@ -978,7 +1034,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	wake_worker(cache);
 }
 
-static void issue_copy_real(struct dm_cache_migration *mg)
+static void issue_copy(struct dm_cache_migration *mg)
 {
 	int r;
 	struct dm_io_region o_region, c_region;
@@ -1057,11 +1113,46 @@ static void avoid_copy(struct dm_cache_migration *mg)
 	migration_success_pre_commit(mg);
 }
 
-static void issue_copy(struct dm_cache_migration *mg)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+				     dm_dblock_t *b, dm_dblock_t *e)
+{
+	sector_t sb = bio->bi_iter.bi_sector;
+	sector_t se = bio_end_sector(bio);
+
+	*b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
+
+	if (se - sb < cache->discard_block_size)
+		*e = *b;
+	else
+		*e = to_dblock(block_div(se, cache->discard_block_size));
+}
+
+static void issue_discard(struct dm_cache_migration *mg)
+{
+	dm_dblock_t b, e;
+	struct bio *bio = mg->new_ocell->holder;
+
+	calc_discard_block_range(mg->cache, bio, &b, &e);
+	while (b != e) {
+		set_discard(mg->cache, b);
+		b = to_dblock(from_dblock(b) + 1);
+	}
+
+	bio_endio(bio, 0);
+	cell_defer(mg->cache, mg->new_ocell, false);
+	free_migration(mg);
+}
+
+static void issue_copy_or_discard(struct dm_cache_migration *mg)
 {
 	bool avoid;
 	struct cache *cache = mg->cache;
 
+	if (mg->discard) {
+		issue_discard(mg);
+		return;
+	}
+
 	if (mg->writeback || mg->demote)
 		avoid = !is_dirty(cache, mg->cblock) ||
 			is_discarded_oblock(cache, mg->old_oblock);
@@ -1070,13 +1161,14 @@ static void issue_copy(struct dm_cache_migration *mg)
 
 		avoid = is_discarded_oblock(cache, mg->new_oblock);
 
-		if (!avoid && bio_writes_complete_block(cache, bio)) {
+		if (writeback_mode(&cache->features) &&
+		    !avoid && bio_writes_complete_block(cache, bio)) {
 			issue_overwrite(mg, bio);
 			return;
 		}
 	}
 
-	avoid ? avoid_copy(mg) : issue_copy_real(mg);
+	avoid ? avoid_copy(mg) : issue_copy(mg);
 }
 
 static void complete_migration(struct dm_cache_migration *mg)
@@ -1161,6 +1253,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = false;
 	mg->demote = false;
 	mg->promote = true;
@@ -1173,7 +1266,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
 	mg->new_ocell = cell;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1184,6 +1277,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = true;
 	mg->demote = false;
 	mg->promote = false;
@@ -1196,7 +1290,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
 	mg->new_ocell = NULL;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1209,6 +1303,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = false;
 	mg->demote = true;
 	mg->promote = true;
@@ -1222,7 +1317,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
 	mg->new_ocell = new_ocell;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1237,6 +1332,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
 	struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
 	mg->err = false;
+	mg->discard = false;
 	mg->writeback = false;
 	mg->demote = true;
 	mg->promote = false;
@@ -1249,7 +1345,27 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
 	mg->new_ocell = NULL;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
+	quiesce_migration(mg);
+}
+
+static void discard(struct cache *cache, struct prealloc *structs,
+		    struct dm_bio_prison_cell *cell)
+{
+	struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+	mg->err = false;
+	mg->discard = true;
+	mg->writeback = false;
+	mg->demote = false;
+	mg->promote = false;
+	mg->requeue_holder = false;
+	mg->invalidate = false;
+	mg->cache = cache;
+	mg->old_ocell = NULL;
+	mg->new_ocell = cell;
+	mg->start_jiffies = jiffies;
+
 	quiesce_migration(mg);
 }
 
@@ -1286,36 +1402,32 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
 	issue(cache, bio);
 }
 
-/*
- * People generally discard large parts of a device, eg, the whole device
- * when formatting.  Splitting these large discards up into cache block
- * sized ios and then quiescing (always neccessary for discard) takes too
- * long.
- *
- * We keep it simple, and allow any size of discard to come in, and just
- * mark off blocks on the discard bitset.  No passdown occurs!
- *
- * To implement passdown we need to change the bio_prison such that a cell
- * can have a key that spans many blocks.
- */
-static void process_discard_bio(struct cache *cache, struct bio *bio)
+static void process_discard_bio(struct cache *cache, struct prealloc *structs,
+				struct bio *bio)
 {
-	dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
-						  cache->sectors_per_block);
-	dm_block_t end_block = bio_end_sector(bio);
-	dm_block_t b;
+	int r;
+	dm_dblock_t b, e;
+	struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
 
-	end_block = block_div(end_block, cache->sectors_per_block);
+	calc_discard_block_range(cache, bio, &b, &e);
+	if (b == e) {
+		bio_endio(bio, 0);
+		return;
+	}
 
-	for (b = start_block; b < end_block; b++)
-		set_discard(cache, to_oblock(b));
+	cell_prealloc = prealloc_get_cell(structs);
+	r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
+			     (cell_free_fn) prealloc_put_cell,
+			     structs, &new_ocell);
+	if (r > 0)
+		return;
 
-	bio_endio(bio, 0);
+	discard(cache, structs, new_ocell);
 }
 
 static bool spare_migration_bandwidth(struct cache *cache)
 {
-	sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
+	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
 		cache->sectors_per_block;
 	return current_volume < cache->migration_threshold;
 }
@@ -1340,9 +1452,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	dm_oblock_t block = get_bio_block(cache, bio);
 	struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
 	struct policy_result lookup_result;
-	bool discarded_block = is_discarded_oblock(cache, block);
 	bool passthrough = passthrough_mode(&cache->features);
-	bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
+	bool discarded_block, can_migrate;
 
 	/*
 	 * Check to see if that block is currently migrating.
@@ -1354,6 +1465,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	if (r > 0)
 		return;
 
+	discarded_block = is_discarded_oblock(cache, block);
+	can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
+
 	r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
 		       bio, &lookup_result);
 
@@ -1500,7 +1614,7 @@ static void process_deferred_bios(struct cache *cache)
 		if (bio->bi_rw & REQ_FLUSH)
 			process_flush_bio(cache, bio);
 		else if (bio->bi_rw & REQ_DISCARD)
-			process_discard_bio(cache, bio);
+			process_discard_bio(cache, &structs, bio);
 		else
 			process_bio(cache, &structs, bio);
 	}
@@ -1665,7 +1779,7 @@ static void stop_quiescing(struct cache *cache)
 
 static void wait_for_migrations(struct cache *cache)
 {
-	wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
+	wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
 }
 
 static void stop_worker(struct cache *cache)
@@ -1715,7 +1829,7 @@ static void do_worker(struct work_struct *ws)
 			process_invalidation_requests(cache);
 		}
 
-		process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+		process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
 		process_migrations(cache, &cache->completed_migrations, complete_migration);
 
 		if (commit_if_needed(cache)) {
@@ -1777,9 +1891,6 @@ static void destroy(struct cache *cache)
 {
 	unsigned i;
 
-	if (cache->next_migration)
-		mempool_free(cache->next_migration, cache->migration_pool);
-
 	if (cache->migration_pool)
 		mempool_destroy(cache->migration_pool);
 
@@ -2180,6 +2291,45 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
 	return 0;
 }
 
+/*
+ * We want the discard block size to be at least the size of the cache
+ * block size and have no more than 2^14 discard blocks across the origin.
+ */
+#define MAX_DISCARD_BLOCKS (1 << 14)
+
+static bool too_many_discard_blocks(sector_t discard_block_size,
+				    sector_t origin_size)
+{
+	(void) sector_div(origin_size, discard_block_size);
+
+	return origin_size > MAX_DISCARD_BLOCKS;
+}
+
+static sector_t calculate_discard_block_size(sector_t cache_block_size,
+					     sector_t origin_size)
+{
+	sector_t discard_block_size = cache_block_size;
+
+	if (origin_size)
+		while (too_many_discard_blocks(discard_block_size, origin_size))
+			discard_block_size *= 2;
+
+	return discard_block_size;
+}
+
+static void set_cache_size(struct cache *cache, dm_cblock_t size)
+{
+	dm_block_t nr_blocks = from_cblock(size);
+
+	if (nr_blocks > (1 << 20) && cache->cache_size != size)
+		DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
+			     "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
+			     "Please consider increasing the cache block size to reduce the overall cache block count.",
+			     (unsigned long long) nr_blocks);
+
+	cache->cache_size = size;
+}
+
 #define DEFAULT_MIGRATION_THRESHOLD 2048
 
 static int cache_create(struct cache_args *ca, struct cache **result)
@@ -2204,8 +2354,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	ti->num_discard_bios = 1;
 	ti->discards_supported = true;
 	ti->discard_zeroes_data_unsupported = true;
-	/* Discard bios must be split on a block boundary */
-	ti->split_discard_bios = true;
+	ti->split_discard_bios = false;
 
 	cache->features = ca->features;
 	ti->per_bio_data_size = get_per_bio_data_size(cache);
@@ -2235,10 +2384,10 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 		cache->sectors_per_block_shift = -1;
 		cache_size = block_div(cache_size, ca->block_size);
-		cache->cache_size = to_cblock(cache_size);
+		set_cache_size(cache, to_cblock(cache_size));
 	} else {
 		cache->sectors_per_block_shift = __ffs(ca->block_size);
-		cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift);
+		set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
 	}
 
 	r = create_cache_policy(cache, ca, error);
@@ -2287,7 +2436,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	INIT_LIST_HEAD(&cache->quiesced_migrations);
 	INIT_LIST_HEAD(&cache->completed_migrations);
 	INIT_LIST_HEAD(&cache->need_commit_migrations);
-	atomic_set(&cache->nr_migrations, 0);
+	atomic_set(&cache->nr_allocated_migrations, 0);
+	atomic_set(&cache->nr_io_migrations, 0);
 	init_waitqueue_head(&cache->migration_wait);
 
 	init_waitqueue_head(&cache->quiescing_wait);
@@ -2303,13 +2453,17 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	}
 	clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
 
-	cache->discard_nr_blocks = cache->origin_blocks;
-	cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks));
+	cache->discard_block_size =
+		calculate_discard_block_size(cache->sectors_per_block,
+					     cache->origin_sectors);
+	cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
+							      cache->discard_block_size));
+	cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
 	if (!cache->discard_bitset) {
 		*error = "could not allocate discard bitset";
 		goto bad;
 	}
-	clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks));
+	clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
 
 	cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(cache->copier)) {
@@ -2327,7 +2481,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	INIT_DELAYED_WORK(&cache->waker, do_waker);
 	cache->last_commit_jiffies = jiffies;
 
-	cache->prison = dm_bio_prison_create(PRISON_CELLS);
+	cache->prison = dm_bio_prison_create();
 	if (!cache->prison) {
 		*error = "could not create bio prison";
 		goto bad;
@@ -2346,8 +2500,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 		goto bad;
 	}
 
-	cache->next_migration = NULL;
-
 	cache->need_tick_bio = true;
 	cache->sized = false;
 	cache->invalidate = false;
@@ -2549,11 +2701,11 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso
 static int cache_map(struct dm_target *ti, struct bio *bio)
 {
 	int r;
-	struct dm_bio_prison_cell *cell;
+	struct dm_bio_prison_cell *cell = NULL;
 	struct cache *cache = ti->private;
 
 	r = __cache_map(cache, bio, &cell);
-	if (r == DM_MAPIO_REMAPPED) {
+	if (r == DM_MAPIO_REMAPPED && cell) {
 		inc_ds(cache, bio, cell);
 		cell_defer(cache, cell, false);
 	}
@@ -2599,16 +2751,16 @@ static int write_discard_bitset(struct cache *cache)
 {
 	unsigned i, r;
 
-	r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block,
-					   cache->origin_blocks);
+	r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
+					   cache->discard_nr_blocks);
 	if (r) {
 		DMERR("could not resize on-disk discard bitset");
 		return r;
 	}
 
-	for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) {
-		r = dm_cache_set_discard(cache->cmd, to_oblock(i),
-					 is_discarded(cache, to_oblock(i)));
+	for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
+		r = dm_cache_set_discard(cache->cmd, to_dblock(i),
+					 is_discarded(cache, to_dblock(i)));
 		if (r)
 			return r;
 	}
@@ -2680,15 +2832,86 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
 	return 0;
 }
 
+/*
+ * The discard block size in the on disk metadata is not
+ * neccessarily the same as we're currently using.  So we have to
+ * be careful to only set the discarded attribute if we know it
+ * covers a complete block of the new size.
+ */
+struct discard_load_info {
+	struct cache *cache;
+
+	/*
+	 * These blocks are sized using the on disk dblock size, rather
+	 * than the current one.
+	 */
+	dm_block_t block_size;
+	dm_block_t discard_begin, discard_end;
+};
+
+static void discard_load_info_init(struct cache *cache,
+				   struct discard_load_info *li)
+{
+	li->cache = cache;
+	li->discard_begin = li->discard_end = 0;
+}
+
+static void set_discard_range(struct discard_load_info *li)
+{
+	sector_t b, e;
+
+	if (li->discard_begin == li->discard_end)
+		return;
+
+	/*
+	 * Convert to sectors.
+	 */
+	b = li->discard_begin * li->block_size;
+	e = li->discard_end * li->block_size;
+
+	/*
+	 * Then convert back to the current dblock size.
+	 */
+	b = dm_sector_div_up(b, li->cache->discard_block_size);
+	sector_div(e, li->cache->discard_block_size);
+
+	/*
+	 * The origin may have shrunk, so we need to check we're still in
+	 * bounds.
+	 */
+	if (e > from_dblock(li->cache->discard_nr_blocks))
+		e = from_dblock(li->cache->discard_nr_blocks);
+
+	for (; b < e; b++)
+		set_discard(li->cache, to_dblock(b));
+}
+
 static int load_discard(void *context, sector_t discard_block_size,
-			dm_oblock_t oblock, bool discard)
+			dm_dblock_t dblock, bool discard)
 {
-	struct cache *cache = context;
+	struct discard_load_info *li = context;
 
-	if (discard)
-		set_discard(cache, oblock);
-	else
-		clear_discard(cache, oblock);
+	li->block_size = discard_block_size;
+
+	if (discard) {
+		if (from_dblock(dblock) == li->discard_end)
+			/*
+			 * We're already in a discard range, just extend it.
+			 */
+			li->discard_end = li->discard_end + 1ULL;
+
+		else {
+			/*
+			 * Emit the old range and start a new one.
+			 */
+			set_discard_range(li);
+			li->discard_begin = from_dblock(dblock);
+			li->discard_end = li->discard_begin + 1ULL;
+		}
+	} else {
+		set_discard_range(li);
+		li->discard_begin = li->discard_end = 0;
+	}
 
 	return 0;
 }
@@ -2730,7 +2953,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
 		return r;
 	}
 
-	cache->cache_size = new_size;
+	set_cache_size(cache, new_size);
 
 	return 0;
 }
@@ -2772,11 +2995,22 @@ static int cache_preresume(struct dm_target *ti)
 	}
 
 	if (!cache->loaded_discards) {
-		r = dm_cache_load_discards(cache->cmd, load_discard, cache);
+		struct discard_load_info li;
+
+		/*
+		 * The discard bitset could have been resized, or the
+		 * discard block size changed.  To be safe we start by
+		 * setting every dblock to not discarded.
+		 */
+		clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
+
+		discard_load_info_init(cache, &li);
+		r = dm_cache_load_discards(cache->cmd, load_discard, &li);
 		if (r) {
 			DMERR("could not load origin discards");
 			return r;
 		}
+		set_discard_range(&li);
 
 		cache->loaded_discards = true;
 	}
@@ -3079,8 +3313,9 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
 	/*
 	 * FIXME: these limits may be incompatible with the cache device
 	 */
-	limits->max_discard_sectors = cache->sectors_per_block;
-	limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT;
+	limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+					    cache->origin_sectors);
+	limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
 }
 
 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -3104,7 +3339,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {1, 5, 0},
+	.version = {1, 6, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,