From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:50 -0400 Subject: block: Use accessor functions for queue limits Convert all external users of queue limits to using wrapper functions instead of poking the request queue variables directly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c08d7559be5..925507e7d67 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev) */ if (rdev1->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); if (!smallest || (rdev1->sectors < smallest->sectors)) -- cgit v1.2.3-70-g09d2 From dc58266385e51420298275c90a616c34f1473a73 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:18:43 +1000 Subject: md: raid0: Replace hash table lookup by looping over all strip_zones. The number of strip_zones of a raid0 array is bounded by the number of drives in the array and is in fact much smaller for typical setups. For example, any raid0 array containing identical disks will have only a single strip_zone. Therefore, the hash tables which are used for quickly finding the strip_zone that holds a particular sector are of questionable value and add quite a bit of unnecessary complexity. This patch replaces the hash table lookup by equivalent code which simply loops over all strip zones to find the zone that holds the given sector. In order to make this loop as fast as possible, the zone->start field of struct strip_zone has been renamed to zone_end, and it now stores the beginning of the next zone in sectors. This allows to save one addition in the loop. Subsequent cleanup patches will remove the hash table structure. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 40 ++++++++++++++++++++-------------------- drivers/md/raid0.h | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 925507e7d67..bb245a6d16c 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -52,7 +52,6 @@ static int raid0_congested(void *data, int bits) return ret; } - static int create_strip_zones (mddev_t *mddev) { int i, c, j; @@ -158,7 +157,7 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = cnt; zone->sectors = smallest->sectors * cnt; - zone->zone_start = 0; + zone->zone_end = zone->sectors; current_start = smallest->sectors; curr_zone_start = zone->sectors; @@ -198,14 +197,13 @@ static int create_strip_zones (mddev_t *mddev) printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", zone->nb_dev, (unsigned long long)zone->sectors); - zone->zone_start = curr_zone_start; + zone->zone_end = curr_zone_start + zone->sectors; curr_zone_start += zone->sectors; current_start = smallest->sectors; printk(KERN_INFO "raid0: current zone start: %llu\n", (unsigned long long)current_start); } - /* Now find appropriate hash spacing. * We want a number which causes most hash entries to cover * at most two strips, but the hash table must be at most @@ -398,6 +396,19 @@ static int raid0_stop (mddev_t *mddev) return 0; } +/* Find the zone which holds a particular offset */ +static struct strip_zone *find_zone(struct raid0_private_data *conf, + sector_t sector) +{ + int i; + struct strip_zone *z = conf->strip_zone; + + for (i = 0; i < conf->nr_strip_zones; i++) + if (sector < z[i].zone_end) + return z + i; + BUG(); +} + static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; @@ -443,22 +454,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - - - { - sector_t x = sector >> conf->sector_shift; - sector_div(x, (u32)conf->spacing); - zone = conf->hash_table[x]; - } - - while (sector >= zone->zone_start + zone->sectors) - zone++; - + zone = find_zone(conf, sector); sect_in_chunk = bio->bi_sector & (chunk_sects - 1); - - { - sector_t x = (sector - zone->zone_start) >> chunksect_bits; + sector_t x = (zone->sectors + sector - zone->zone_end) + >> chunksect_bits; sector_div(x, zone->nb_dev); chunk = x; @@ -503,8 +503,8 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "%s/", bdevname( conf->strip_zone[j].dev[k]->bdev,b)); - seq_printf(seq, "] zs=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_start, + seq_printf(seq, "] ze=%d ds=%d s=%d\n", + conf->strip_zone[j].zone_end, conf->strip_zone[j].dev_start, conf->strip_zone[j].sectors); } diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 824b12eb1d4..556666fec3a 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -3,7 +3,7 @@ struct strip_zone { - sector_t zone_start; /* Zone offset in md_dev (in sectors) */ + sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ -- cgit v1.2.3-70-g09d2 From d27a43abd7be0ab4b2337e4587feca8c7340e5f9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:46:46 +1000 Subject: md/raid0: two cleanups in create_stripe_zones. 1/ remove current_start. The same value is available in zone->dev_start and storing it separately doesn't gain anything. 2/ rename curr_zone_start to curr_zone_end as we are now more focused on the 'end' of each zone. We end up storing the same number though - the old name was a little confusing (and what does 'current' mean in this context anyway). Signed-off-by: NeilBrown --- drivers/md/raid0.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index bb245a6d16c..1afdfd120bb 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -55,7 +55,7 @@ static int raid0_congested(void *data, int bits) static int create_strip_zones (mddev_t *mddev) { int i, c, j; - sector_t current_start, curr_zone_start; + sector_t curr_zone_end; sector_t min_spacing; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; @@ -159,8 +159,7 @@ static int create_strip_zones (mddev_t *mddev) zone->sectors = smallest->sectors * cnt; zone->zone_end = zone->sectors; - current_start = smallest->sectors; - curr_zone_start = zone->sectors; + curr_zone_end = zone->sectors; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) @@ -169,7 +168,7 @@ static int create_strip_zones (mddev_t *mddev) zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; printk(KERN_INFO "raid0: zone %d\n", i); - zone->dev_start = current_start; + zone->dev_start = smallest->sectors; smallest = NULL; c = 0; @@ -178,7 +177,7 @@ static int create_strip_zones (mddev_t *mddev) rdev = conf->strip_zone[0].dev[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); - if (rdev->sectors <= current_start) { + if (rdev->sectors <= zone->dev_start) { printk(KERN_INFO " nope.\n"); continue; } @@ -193,16 +192,15 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = c; - zone->sectors = (smallest->sectors - current_start) * c; + zone->sectors = (smallest->sectors - zone->dev_start) * c; printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", zone->nb_dev, (unsigned long long)zone->sectors); - zone->zone_end = curr_zone_start + zone->sectors; - curr_zone_start += zone->sectors; + curr_zone_end += zone->sectors; + zone->zone_end = curr_zone_end; - current_start = smallest->sectors; printk(KERN_INFO "raid0: current zone start: %llu\n", - (unsigned long long)current_start); + (unsigned long long)smallest->sectors); } /* Now find appropriate hash spacing. * We want a number which causes most hash entries to cover @@ -212,8 +210,8 @@ static int create_strip_zones (mddev_t *mddev) * strip though as it's size has no bearing on the efficacy of the hash * table. */ - conf->spacing = curr_zone_start; - min_spacing = curr_zone_start; + conf->spacing = curr_zone_end; + min_spacing = curr_zone_end; sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); for (i=0; i < conf->nr_strip_zones-1; i++) { sector_t s = 0; -- cgit v1.2.3-70-g09d2 From 09770e0b6ee649313611a2d6a9b44f456072dbd6 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:46:48 +1000 Subject: md: raid0: Remove hash table. The raid0 hash table has become unused due to the changes in the previous patch. This patch removes the hash table allocation and setup code and kills the hash_table field of struct raid0_private_data. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 12 ------------ drivers/md/raid0.h | 1 - 2 files changed, 13 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 1afdfd120bb..d4c9c5d5d7f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -326,22 +326,14 @@ static int raid0_run (mddev_t *mddev) nb_zone = s + round; } printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); - - printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", - nb_zone*sizeof(struct strip_zone*)); - conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); - if (!conf->hash_table) - goto out_free_conf; sectors = conf->strip_zone[cur].sectors; - conf->hash_table[0] = conf->strip_zone + cur; for (i=1; i< nb_zone; i++) { while (sectors <= conf->spacing) { cur++; sectors += conf->strip_zone[cur].sectors; } sectors -= conf->spacing; - conf->hash_table[i] = conf->strip_zone + cur; } if (conf->sector_shift) { conf->spacing >>= conf->sector_shift; @@ -384,8 +376,6 @@ static int raid0_stop (mddev_t *mddev) raid0_conf_t *conf = mddev_to_conf(mddev); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - kfree(conf->hash_table); - conf->hash_table = NULL; kfree(conf->strip_zone); conf->strip_zone = NULL; kfree(conf); @@ -494,8 +484,6 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) h = 0; for (j = 0; j < conf->nr_strip_zones; j++) { seq_printf(seq, " z%d", j); - if (conf->hash_table[h] == conf->strip_zone+j) - seq_printf(seq, "(h%d)", h++); seq_printf(seq, "=["); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) seq_printf(seq, "%s/", bdevname( diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 556666fec3a..a14630a25aa 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -12,7 +12,6 @@ struct strip_zone struct raid0_private_data { - struct strip_zone **hash_table; /* Table of indexes into strip_zone */ struct strip_zone *strip_zone; mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; -- cgit v1.2.3-70-g09d2 From 8f79cfcdb65472f1504ade2f53e5f2bfdaeb95da Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:47:10 +1000 Subject: md: raid0: Remove hash spacing and sector shift. The "sector_shift" and "spacing" fields of struct raid0_private_data were only used for the hash table lookups. So the removal of the hash table allows get rid of these fields as well which simplifies create_strip_zones() and raid0_run() quite a bit. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 63 +----------------------------------------------------- drivers/md/raid0.h | 3 --- 2 files changed, 1 insertion(+), 65 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d4c9c5d5d7f..edffc4940b4 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -56,7 +56,6 @@ static int create_strip_zones (mddev_t *mddev) { int i, c, j; sector_t curr_zone_end; - sector_t min_spacing; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct strip_zone *zone; @@ -202,28 +201,7 @@ static int create_strip_zones (mddev_t *mddev) printk(KERN_INFO "raid0: current zone start: %llu\n", (unsigned long long)smallest->sectors); } - /* Now find appropriate hash spacing. - * We want a number which causes most hash entries to cover - * at most two strips, but the hash table must be at most - * 1 PAGE. We choose the smallest strip, or contiguous collection - * of strips, that has big enough size. We never consider the last - * strip though as it's size has no bearing on the efficacy of the hash - * table. - */ - conf->spacing = curr_zone_end; - min_spacing = curr_zone_end; - sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); - for (i=0; i < conf->nr_strip_zones-1; i++) { - sector_t s = 0; - for (j = i; j < conf->nr_strip_zones - 1 && - s < min_spacing; j++) - s += conf->strip_zone[j].sectors; - if (s >= min_spacing && s < conf->spacing) - conf->spacing = s; - } - mddev->queue->unplug_fn = raid0_unplug; - mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; @@ -273,10 +251,8 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) return array_sectors; } -static int raid0_run (mddev_t *mddev) +static int raid0_run(mddev_t *mddev) { - unsigned cur=0, i=0, nb_zone; - s64 sectors; raid0_conf_t *conf; if (mddev->chunk_size == 0) { @@ -306,43 +282,6 @@ static int raid0_run (mddev_t *mddev) printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", (unsigned long long)mddev->array_sectors); - printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", - (unsigned long long)conf->spacing); - { - sector_t s = raid0_size(mddev, 0, 0); - sector_t space = conf->spacing; - int round; - conf->sector_shift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - /*shift down space and s so that sector_div will work */ - while (space > (sector_t) (~(u32)0)) { - s >>= 1; - space >>= 1; - s += 1; /* force round-up */ - conf->sector_shift++; - } - } - round = sector_div(s, (u32)space) ? 1 : 0; - nb_zone = s + round; - } - printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); - sectors = conf->strip_zone[cur].sectors; - - for (i=1; i< nb_zone; i++) { - while (sectors <= conf->spacing) { - cur++; - sectors += conf->strip_zone[cur].sectors; - } - sectors -= conf->spacing; - } - if (conf->sector_shift) { - conf->spacing >>= conf->sector_shift; - /* round spacing up so when we divide by it, we - * err on the side of too-low, which is safest - */ - conf->spacing++; - } - /* calculate the max read-ahead size. * For read-ahead of large files to be effective, we need to * readahead at least twice a whole stripe. i.e. number of devices diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index a14630a25aa..dbcf1da916b 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -15,9 +15,6 @@ struct raid0_private_data struct strip_zone *strip_zone; mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; - - sector_t spacing; - int sector_shift; /* shift this before divide by spacing */ }; typedef struct raid0_private_data raid0_conf_t; -- cgit v1.2.3-70-g09d2 From 5568a6035d9fca2cd8f1ef7005e215eae4e65fab Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:47:21 +1000 Subject: md: raid0: Make raid0_run() return a proper error code. Currently raid0_run() always returns -ENOMEM on errors. This is incorrect as running the array might fail for other reasons, for example because not all component devices were available. This patch changes create_strip_zones() so that it returns a proper error code (either -ENOMEM or -EINVAL) rather than 1 on errors and makes raid0_run(), its single caller, return that value instead of -ENOMEM. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index edffc4940b4..e5648b660e7 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -105,12 +105,12 @@ static int create_strip_zones (mddev_t *mddev) conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); if (!conf->strip_zone) - return 1; + return -ENOMEM; conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* conf->nr_strip_zones*mddev->raid_disks, GFP_KERNEL); if (!conf->devlist) - return 1; + return -ENOMEM; /* The first zone must contain all devices, so here we check that * there is a proper alignment of slots to devices and find them all @@ -207,8 +207,8 @@ static int create_strip_zones (mddev_t *mddev) printk(KERN_INFO "raid0: done.\n"); return 0; - abort: - return 1; +abort: + return -EINVAL; } /** @@ -254,6 +254,7 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) static int raid0_run(mddev_t *mddev) { raid0_conf_t *conf; + int ret; if (mddev->chunk_size == 0) { printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); @@ -269,12 +270,13 @@ static int raid0_run(mddev_t *mddev) conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); if (!conf) - goto out; + return -ENOMEM; mddev->private = (void *)conf; conf->strip_zone = NULL; conf->devlist = NULL; - if (create_strip_zones (mddev)) + ret = create_strip_zones(mddev); + if (ret < 0) goto out_free_conf; /* calculate array device size */ @@ -306,8 +308,7 @@ out_free_conf: kfree(conf->devlist); kfree(conf); mddev->private = NULL; -out: - return -ENOMEM; + return ret; } static int raid0_stop (mddev_t *mddev) -- cgit v1.2.3-70-g09d2 From ed7b00380d957ec770b5e90380d012c6062c13cc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:47:36 +1000 Subject: md: raid0: Allocate all buffers for the raid0 configuration in one function. Currently the raid0 configuration is allocated in raid0_run() while the buffers for the strip_zone and the dev_list arrays are allocated in create_strip_zones(). On errors, all three buffers are freed in raid0_run(). It's easier and more readable to do the allocation and cleanup within a single function. So move that code into create_strip_zones(). Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 47 +++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 30 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e5648b660e7..99cee51734e 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -52,21 +52,18 @@ static int raid0_congested(void *data, int bits) return ret; } -static int create_strip_zones (mddev_t *mddev) +static int create_strip_zones(mddev_t *mddev) { - int i, c, j; + int i, c, j, err; sector_t curr_zone_end; - raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; - - /* - * The number of 'same size groups' - */ - conf->nr_strip_zones = 0; - + raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL); + + if (!conf) + return -ENOMEM; list_for_each_entry(rdev1, &mddev->disks, same_set) { printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); @@ -101,16 +98,16 @@ static int create_strip_zones (mddev_t *mddev) } } printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); - + err = -ENOMEM; conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); if (!conf->strip_zone) - return -ENOMEM; + goto abort; conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* conf->nr_strip_zones*mddev->raid_disks, GFP_KERNEL); if (!conf->devlist) - return -ENOMEM; + goto abort; /* The first zone must contain all devices, so here we check that * there is a proper alignment of slots to devices and find them all @@ -119,6 +116,7 @@ static int create_strip_zones (mddev_t *mddev) cnt = 0; smallest = NULL; zone->dev = conf->devlist; + err = -EINVAL; list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; @@ -206,9 +204,14 @@ static int create_strip_zones (mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; printk(KERN_INFO "raid0: done.\n"); + mddev->private = conf; return 0; abort: - return -EINVAL; + kfree(conf->strip_zone); + kfree(conf->devlist); + kfree(conf); + mddev->private = NULL; + return err; } /** @@ -253,7 +256,6 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) static int raid0_run(mddev_t *mddev) { - raid0_conf_t *conf; int ret; if (mddev->chunk_size == 0) { @@ -268,16 +270,9 @@ static int raid0_run(mddev_t *mddev) blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); mddev->queue->queue_lock = &mddev->queue->__queue_lock; - conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); - if (!conf) - return -ENOMEM; - mddev->private = (void *)conf; - - conf->strip_zone = NULL; - conf->devlist = NULL; ret = create_strip_zones(mddev); if (ret < 0) - goto out_free_conf; + return ret; /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); @@ -299,16 +294,8 @@ static int raid0_run(mddev_t *mddev) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } - blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); return 0; - -out_free_conf: - kfree(conf->strip_zone); - kfree(conf->devlist); - kfree(conf); - mddev->private = NULL; - return ret; } static int raid0_stop (mddev_t *mddev) -- cgit v1.2.3-70-g09d2 From fb5ab4b5d6e16fd5006c9f800d0116f3547cb760 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:48:19 +1000 Subject: md: raid0: Fix a memory leak when stopping a raid0 array. raid0_stop() removes all references to the raid0 configuration but misses to free the ->devlist buffer. This patch closes this leak, removes a pointless initialization and fixes a coding style issue in raid0_stop(). Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 99cee51734e..0d62ad6df21 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -298,16 +298,15 @@ static int raid0_run(mddev_t *mddev) return 0; } -static int raid0_stop (mddev_t *mddev) +static int raid0_stop(mddev_t *mddev) { raid0_conf_t *conf = mddev_to_conf(mddev); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->strip_zone); - conf->strip_zone = NULL; + kfree(conf->devlist); kfree(conf); mddev->private = NULL; - return 0; } -- cgit v1.2.3-70-g09d2 From 49f357a22b3fa3eeac042dfa0a6cae920c174e48 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:50:35 +1000 Subject: md: raid0: remove ->sectors from the strip_zone structure. storing ->sectors is redundant as is can be computed from the difference z->zone_end - (z-1)->zone_end The one place where it is used, it is just as efficient to use a zone_end value instead. And removing it makes strip_zone smaller, so they array of these that is searched on every request has a better chance to say in cache. So discard the field and get the value from elsewhere. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 33 +++++++++++++++++++-------------- drivers/md/raid0.h | 1 - 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 0d62ad6df21..07ef936afc7 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -55,7 +55,7 @@ static int raid0_congested(void *data, int bits) static int create_strip_zones(mddev_t *mddev) { int i, c, j, err; - sector_t curr_zone_end; + sector_t curr_zone_end, sectors; mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct strip_zone *zone; int cnt; @@ -153,10 +153,9 @@ static int create_strip_zones(mddev_t *mddev) goto abort; } zone->nb_dev = cnt; - zone->sectors = smallest->sectors * cnt; - zone->zone_end = zone->sectors; + zone->zone_end = smallest->sectors * cnt; - curr_zone_end = zone->sectors; + curr_zone_end = zone->zone_end; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) @@ -189,11 +188,11 @@ static int create_strip_zones(mddev_t *mddev) } zone->nb_dev = c; - zone->sectors = (smallest->sectors - zone->dev_start) * c; + sectors = (smallest->sectors - zone->dev_start) * c; printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", - zone->nb_dev, (unsigned long long)zone->sectors); + zone->nb_dev, (unsigned long long)sectors); - curr_zone_end += zone->sectors; + curr_zone_end += sectors; zone->zone_end = curr_zone_end; printk(KERN_INFO "raid0: current zone start: %llu\n", @@ -310,16 +309,22 @@ static int raid0_stop(mddev_t *mddev) return 0; } -/* Find the zone which holds a particular offset */ +/* Find the zone which holds a particular offset + * Update *sectorp to be an offset in that zone + */ static struct strip_zone *find_zone(struct raid0_private_data *conf, - sector_t sector) + sector_t *sectorp) { int i; struct strip_zone *z = conf->strip_zone; + sector_t sector = *sectorp; for (i = 0; i < conf->nr_strip_zones; i++) - if (sector < z[i].zone_end) + if (sector < z[i].zone_end) { + if (i) + *sectorp = sector - z[i-1].zone_end; return z + i; + } BUG(); } @@ -331,7 +336,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) struct strip_zone *zone; mdk_rdev_t *tmp_dev; sector_t chunk; - sector_t sector, rsect; + sector_t sector, rsect, sector_offset; const int rw = bio_data_dir(bio); int cpu; @@ -368,11 +373,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - zone = find_zone(conf, sector); + sector_offset = sector; + zone = find_zone(conf, §or_offset); sect_in_chunk = bio->bi_sector & (chunk_sects - 1); { - sector_t x = (zone->sectors + sector - zone->zone_end) - >> chunksect_bits; + sector_t x = sector_offset >> chunksect_bits; sector_div(x, zone->nb_dev); chunk = x; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index dbcf1da916b..124ba34c8ee 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -5,7 +5,6 @@ struct strip_zone { sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ mdk_rdev_t **dev; /* Devices attached to the zone */ }; -- cgit v1.2.3-70-g09d2 From b414579f4573b6dc8583e31b01dcffd13f49fd62 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:50:52 +1000 Subject: md: raid0: remove ->dev pointer from strip_zone structure If we treat conf->devlist more like a 2 dimensional array, we can get the devlist for a particular zone simply by indexing that array, so we don't need to store the pointers to subarrays in strip_zone. This makes strip_zone smaller and so (hopefully) searches faster. Signed-of-by: NeilBrown --- drivers/md/raid0.c | 21 +++++++++++---------- drivers/md/raid0.h | 1 - 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 07ef936afc7..af0df78223b 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -27,7 +27,7 @@ static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + mdk_rdev_t **devlist = conf->devlist; int i; for (i=0; iraid_disks; i++) { @@ -41,7 +41,7 @@ static int raid0_congested(void *data, int bits) { mddev_t *mddev = data; raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { @@ -56,7 +56,7 @@ static int create_strip_zones(mddev_t *mddev) { int i, c, j, err; sector_t curr_zone_end, sectors; - mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; @@ -115,7 +115,7 @@ static int create_strip_zones(mddev_t *mddev) zone = &conf->strip_zone[0]; cnt = 0; smallest = NULL; - zone->dev = conf->devlist; + dev = conf->devlist; err = -EINVAL; list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; @@ -125,12 +125,12 @@ static int create_strip_zones(mddev_t *mddev) "aborting!\n", j); goto abort; } - if (zone->dev[j]) { + if (dev[j]) { printk(KERN_ERR "raid0: multiple devices for %d - " "aborting!\n", j); goto abort; } - zone->dev[j] = rdev1; + dev[j] = rdev1; blk_queue_stack_limits(mddev->queue, rdev1->bdev->bd_disk->queue); @@ -161,7 +161,7 @@ static int create_strip_zones(mddev_t *mddev) for (i = 1; i < conf->nr_strip_zones; i++) { zone = conf->strip_zone + i; - zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; + dev = conf->devlist + i * mddev->raid_disks; printk(KERN_INFO "raid0: zone %d\n", i); zone->dev_start = smallest->sectors; @@ -170,7 +170,7 @@ static int create_strip_zones(mddev_t *mddev) for (j=0; jstrip_zone[0].dev[j]; + rdev = conf->devlist[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); if (rdev->sectors <= zone->dev_start) { @@ -178,7 +178,7 @@ static int create_strip_zones(mddev_t *mddev) continue; } printk(KERN_INFO " contained as device %d\n", c); - zone->dev[c] = rdev; + dev[c] = rdev; c++; if (!smallest || rdev->sectors < smallest->sectors) { smallest = rdev; @@ -383,7 +383,8 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) chunk = x; x = sector >> chunksect_bits; - tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; + tmp_dev = conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks + + sector_div(x, zone->nb_dev)]; } rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 124ba34c8ee..7b3605e570c 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -6,7 +6,6 @@ struct strip_zone sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ int nb_dev; /* # of devices attached to the zone */ - mdk_rdev_t **dev; /* Devices attached to the zone */ }; struct raid0_private_data -- cgit v1.2.3-70-g09d2 From a6b3deafe0c50e3e873e8ed5cc8abfcb25c05eff Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:54:07 +1000 Subject: md: raid0: remove setting of segment boundary. This setting doesn't seem to make sense (half the chunk size??) and shouldn't be needed. The segment boundary exported by raid0 should simply be the minimum of the segment boundary of all component devices. And we already get that right. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index af0df78223b..e2e9c183333 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -261,12 +261,7 @@ static int raid0_run(mddev_t *mddev) printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); return -EINVAL; } - printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", - mdname(mddev), - mddev->chunk_size >> 9, - (mddev->chunk_size>>1)-1); blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); - blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); mddev->queue->queue_lock = &mddev->queue->__queue_lock; ret = create_strip_zones(mddev); -- cgit v1.2.3-70-g09d2 From 070ec55d07157a3041f92654135c3c6e2eaaf901 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:54:21 +1000 Subject: md: remove mddev_to_conf "helper" macro Having a macro just to cast a void* isn't really helpful. I would must rather see that we are simply de-referencing ->private, than have to know what the macro does. So open code the macro everywhere and remove the pointless cast. Signed-off-by: NeilBrown --- drivers/md/linear.c | 12 ++++++------ drivers/md/linear.h | 2 -- drivers/md/multipath.c | 20 ++++++++++---------- drivers/md/multipath.h | 6 ------ drivers/md/raid0.c | 10 +++++----- drivers/md/raid0.h | 2 -- drivers/md/raid1.c | 38 +++++++++++++++++++------------------- drivers/md/raid1.h | 6 ------ drivers/md/raid10.c | 42 +++++++++++++++++++++--------------------- drivers/md/raid10.h | 6 ------ drivers/md/raid5.c | 36 ++++++++++++++++++------------------ drivers/md/raid5.h | 2 -- 12 files changed, 79 insertions(+), 103 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 64f1f3e046e..31f8ec7131b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -28,7 +28,7 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { dev_info_t *hash; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; sector_t idx = sector >> conf->sector_shift; /* @@ -79,7 +79,7 @@ static int linear_mergeable_bvec(struct request_queue *q, static void linear_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; int i; for (i=0; i < mddev->raid_disks; i++) { @@ -91,7 +91,7 @@ static void linear_unplug(struct request_queue *q) static int linear_congested(void *data, int bits) { mddev_t *mddev = data; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { @@ -103,7 +103,7 @@ static int linear_congested(void *data, int bits) static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; WARN_ONCE(sectors || raid_disks, "%s does not support generic reshape\n", __func__); @@ -294,7 +294,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) if (!newconf) return -ENOMEM; - newconf->prev = mddev_to_conf(mddev); + newconf->prev = mddev->private; mddev->private = newconf; mddev->raid_disks++; md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); @@ -304,7 +304,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) static int linear_stop (mddev_t *mddev) { - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ do { diff --git a/drivers/md/linear.h b/drivers/md/linear.h index bf8179587f9..76078f1cded 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -24,6 +24,4 @@ struct linear_private_data typedef struct linear_private_data linear_conf_t; -#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) - #endif diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 4ee31aa13c4..c1ca63f278a 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -58,7 +58,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) { unsigned long flags; mddev_t *mddev = mp_bh->mddev; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&mp_bh->retry_list, &conf->retry_list); @@ -75,7 +75,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) { struct bio *bio = mp_bh->master_bio; - multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); + multipath_conf_t *conf = mp_bh->mddev->private; bio_endio(bio, err); mempool_free(mp_bh, conf->pool); @@ -85,7 +85,7 @@ static void multipath_end_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private); - multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); + multipath_conf_t *conf = mp_bh->mddev->private; mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev; if (uptodate) @@ -107,7 +107,7 @@ static void multipath_end_request(struct bio *bio, int error) static void unplug_slaves(mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -138,7 +138,7 @@ static void multipath_unplug(struct request_queue *q) static int multipath_make_request (struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); @@ -180,7 +180,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) static void multipath_status (struct seq_file *seq, mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i; seq_printf (seq, " [%d/%d] [", conf->raid_disks, @@ -195,7 +195,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) static int multipath_congested(void *data, int bits) { mddev_t *mddev = data; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -220,7 +220,7 @@ static int multipath_congested(void *data, int bits) */ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; if (conf->working_disks <= 1) { /* @@ -367,7 +367,7 @@ static void multipathd (mddev_t *mddev) struct multipath_bh *mp_bh; struct bio *bio; unsigned long flags; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; md_check_recovery(mddev); @@ -531,7 +531,7 @@ out: static int multipath_stop (mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; md_unregister_thread(mddev->thread); mddev->thread = NULL; diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h index 6fa70b400cd..d1c2a8d7839 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/multipath.h @@ -18,12 +18,6 @@ struct multipath_private_data { typedef struct multipath_private_data multipath_conf_t; -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private) - /* * this is our 'private' 'collective' MULTIPATH buffer head. * it contains information about what kind of IO operations were started diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e2e9c183333..77764dad1bc 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -26,7 +26,7 @@ static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; mdk_rdev_t **devlist = conf->devlist; int i; @@ -40,7 +40,7 @@ static void raid0_unplug(struct request_queue *q) static int raid0_congested(void *data, int bits) { mddev_t *mddev = data; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; @@ -294,7 +294,7 @@ static int raid0_run(mddev_t *mddev) static int raid0_stop(mddev_t *mddev) { - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->strip_zone); @@ -327,7 +327,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; unsigned int sect_in_chunk, chunksect_bits, chunk_sects; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; struct strip_zone *zone; mdk_rdev_t *tmp_dev; sector_t chunk; @@ -406,7 +406,7 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) #ifdef MD_DEBUG int j, k, h; char b[BDEVNAME_SIZE]; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; h = 0; for (j = 0; j < conf->nr_strip_zones; j++) { diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 7b3605e570c..91f8e876ee6 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -17,6 +17,4 @@ struct raid0_private_data typedef struct raid0_private_data raid0_conf_t; -#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) - #endif diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e23758b4a34..5ea5bca53a5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -182,7 +182,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) static void free_r1bio(r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; /* * Wake up any possible resync thread that waits for the device @@ -196,7 +196,7 @@ static void free_r1bio(r1bio_t *r1_bio) static void put_buf(r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; int i; for (i=0; iraid_disks; i++) { @@ -214,7 +214,7 @@ static void reschedule_retry(r1bio_t *r1_bio) { unsigned long flags; mddev_t *mddev = r1_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&r1_bio->retry_list, &conf->retry_list); @@ -253,7 +253,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio) */ static inline void update_head_pos(int disk, r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; conf->mirrors[disk].head_position = r1_bio->sector + (r1_bio->sectors); @@ -264,7 +264,7 @@ static void raid1_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); int mirror; - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; mirror = r1_bio->read_disk; /* @@ -309,7 +309,7 @@ static void raid1_end_write_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; struct bio *to_put = NULL; @@ -541,7 +541,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) static void unplug_slaves(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -573,7 +573,7 @@ static void raid1_unplug(struct request_queue *q) static int raid1_congested(void *data, int bits) { mddev_t *mddev = data; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -772,7 +772,7 @@ do_sync_io: static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; mirror_info_t *mirror; r1bio_t *r1_bio; struct bio *read_bio; @@ -991,7 +991,7 @@ static int make_request(struct request_queue *q, struct bio * bio) static void status(struct seq_file *seq, mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; seq_printf(seq, " [%d/%d] [", conf->raid_disks, @@ -1010,7 +1010,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; /* * If it is not operational, then we have already marked it as dead @@ -1214,7 +1214,7 @@ static void end_sync_write(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); mddev_t *mddev = r1_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; int mirror=0; @@ -1248,7 +1248,7 @@ static void end_sync_write(struct bio *bio, int error) static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; int disks = conf->raid_disks; struct bio *bio, *wbio; @@ -1562,7 +1562,7 @@ static void raid1d(mddev_t *mddev) r1bio_t *r1_bio; struct bio *bio; unsigned long flags; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; @@ -1585,7 +1585,7 @@ static void raid1d(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r1_bio->mddev; - conf = mddev_to_conf(mddev); + conf = mddev->private; if (test_bit(R1BIO_IsSync, &r1_bio->state)) { sync_request_write(mddev, r1_bio); unplug = 1; @@ -1706,7 +1706,7 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; r1bio_t *r1_bio; struct bio *bio; sector_t max_sector, nr_sectors; @@ -2087,7 +2087,7 @@ out: static int stop(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct bitmap *bitmap = mddev->bitmap; int behind_wait = 0; @@ -2155,7 +2155,7 @@ static int raid1_reshape(mddev_t *mddev) mempool_t *newpool, *oldpool; struct pool_info *newpoolinfo; mirror_info_t *newmirrors; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int cnt, raid_disks; unsigned long flags; int d, d2, err; @@ -2252,7 +2252,7 @@ static int raid1_reshape(mddev_t *mddev) static void raid1_quiesce(mddev_t *mddev, int state) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; switch(state) { case 1: diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 1620eea3d57..e87b84deff6 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -63,12 +63,6 @@ struct r1_private_data_s { typedef struct r1_private_data_s conf_t; -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - /* * this is our 'private' RAID1 bio. * diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 750550c1166..9a5beb4fd95 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -188,7 +188,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) static void free_r10bio(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; /* * Wake up any possible resync thread that waits for the device @@ -202,7 +202,7 @@ static void free_r10bio(r10bio_t *r10_bio) static void put_buf(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; mempool_free(r10_bio, conf->r10buf_pool); @@ -213,7 +213,7 @@ static void reschedule_retry(r10bio_t *r10_bio) { unsigned long flags; mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); @@ -245,7 +245,7 @@ static void raid_end_bio_io(r10bio_t *r10_bio) */ static inline void update_head_pos(int slot, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; conf->mirrors[r10_bio->devs[slot].devnum].head_position = r10_bio->devs[slot].addr + (r10_bio->sectors); @@ -256,7 +256,7 @@ static void raid10_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; slot = r10_bio->read_slot; @@ -297,7 +297,7 @@ static void raid10_end_write_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; for (slot = 0; slot < conf->copies; slot++) if (r10_bio->devs[slot].bio == bio) @@ -596,7 +596,7 @@ rb_out: static void unplug_slaves(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -628,7 +628,7 @@ static void raid10_unplug(struct request_queue *q) static int raid10_congested(void *data, int bits) { mddev_t *mddev = data; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -788,7 +788,7 @@ static void unfreeze_array(conf_t *conf) static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; @@ -981,7 +981,7 @@ static int make_request(struct request_queue *q, struct bio * bio) static void status(struct seq_file *seq, mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; if (conf->near_copies < conf->raid_disks) @@ -1006,7 +1006,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; /* * If it is not operational, then we have already marked it as dead @@ -1215,7 +1215,7 @@ abort: static void end_sync_read(struct bio *bio, int error) { r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; int i,d; for (i=0; icopies; i++) @@ -1253,7 +1253,7 @@ static void end_sync_write(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i,d; for (i = 0; i < conf->copies; i++) @@ -1300,7 +1300,7 @@ static void end_sync_write(struct bio *bio, int error) */ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, first; struct bio *tbio, *fbio; @@ -1400,7 +1400,7 @@ done: static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, d; struct bio *bio, *wbio; @@ -1549,7 +1549,7 @@ static void raid10d(mddev_t *mddev) r10bio_t *r10_bio; struct bio *bio; unsigned long flags; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; @@ -1572,7 +1572,7 @@ static void raid10d(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r10_bio->mddev; - conf = mddev_to_conf(mddev); + conf = mddev->private; if (test_bit(R10BIO_IsSync, &r10_bio->state)) { sync_request_write(mddev, r10_bio); unplug = 1; @@ -1680,7 +1680,7 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; r10bio_t *r10_bio; struct bio *biolist = NULL, *bio; sector_t max_sector, nr_sectors; @@ -2026,7 +2026,7 @@ static sector_t raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) { sector_t size; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; if (!raid_disks) raid_disks = mddev->raid_disks; @@ -2227,7 +2227,7 @@ out: static int stop(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; raise_barrier(conf, 0); lower_barrier(conf); @@ -2245,7 +2245,7 @@ static int stop(mddev_t *mddev) static void raid10_quiesce(mddev_t *mddev, int state) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; switch(state) { case 1: diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 244dbe507a5..59cd1efb8d3 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -61,12 +61,6 @@ struct r10_private_data_s { typedef struct r10_private_data_s conf_t; -/* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - /* * this is our 'private' RAID10 bio. * diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bef87669823..7fb97c65ad3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3284,7 +3284,7 @@ static void activate_bit_delay(raid5_conf_t *conf) static void unplug_slaves(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -3308,7 +3308,7 @@ static void unplug_slaves(mddev_t *mddev) static void raid5_unplug_device(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); @@ -3327,7 +3327,7 @@ static void raid5_unplug_device(struct request_queue *q) static int raid5_congested(void *data, int bits) { mddev_t *mddev = data; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; /* No difference between reads and writes. Just check * how busy the stripe_cache is @@ -3440,7 +3440,7 @@ static void raid5_align_endio(struct bio *bi, int error) bio_put(bi); mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; - conf = mddev_to_conf(mddev); + conf = mddev->private; rdev = (void*)raid_bi->bi_next; raid_bi->bi_next = NULL; @@ -3482,7 +3482,7 @@ static int bio_fits_rdev(struct bio *bi) static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned int dd_idx; struct bio* align_bi; mdk_rdev_t *rdev; @@ -3599,7 +3599,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) static int make_request(struct request_queue *q, struct bio * bi) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int dd_idx; sector_t new_sector; sector_t logical_sector, last_sector; @@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) static void raid5d(mddev_t *mddev) { struct stripe_head *sh; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int handled; pr_debug("+++ raid5d active\n"); @@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev) static ssize_t raid5_show_stripe_cache_size(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->max_nr_stripes); else @@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) static ssize_t raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; int err; @@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, static ssize_t raid5_show_preread_threshold(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->bypass_threshold); else @@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page) static ssize_t raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; if (len >= PAGE_SIZE) return -EINVAL; @@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, static ssize_t stripe_cache_active_show(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); else @@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = { static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!sectors) sectors = mddev->dev_sectors; @@ -4845,7 +4845,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) static int raid5_check_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && @@ -4890,7 +4890,7 @@ static int raid5_check_reshape(mddev_t *mddev) static int raid5_start_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; mdk_rdev_t *rdev; int spares = 0; int added_devices = 0; @@ -5022,7 +5022,7 @@ static void end_reshape(raid5_conf_t *conf) static void raid5_finish_reshape(mddev_t *mddev) { struct block_device *bdev; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5061,7 +5061,7 @@ static void raid5_finish_reshape(mddev_t *mddev) static void raid5_quiesce(mddev_t *mddev, int state) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; switch(state) { case 2: /* resume for a suspend */ @@ -5157,7 +5157,7 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) * For larger arrays we record the new value - after validation * to be used by a reshape pass. */ - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) return -EINVAL; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 52ba99954de..1a25c9e252b 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -408,8 +408,6 @@ struct raid5_private_data { typedef struct raid5_private_data raid5_conf_t; -#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) - /* * Our supported algorithms */ -- cgit v1.2.3-70-g09d2 From 1b9614291eb319fad96de45392eb4452ad39f0ee Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 16:57:40 +1000 Subject: md: have raid0 compile with MD_DEBUG on Because of the removal of the device list from the strips raid0 did not compile with MD_DEBUG flag on Signed-off-by: NeilBrown --- drivers/md/raid0.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 77764dad1bc..d8692fc1796 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -400,7 +400,7 @@ bad_map: return 0; } -static void raid0_status (struct seq_file *seq, mddev_t *mddev) +static void raid0_status(struct seq_file *seq, mddev_t *mddev) { #undef MD_DEBUG #ifdef MD_DEBUG @@ -408,18 +408,24 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) char b[BDEVNAME_SIZE]; raid0_conf_t *conf = mddev->private; + sector_t zone_size; + sector_t zone_start = 0; h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { seq_printf(seq, " z%d", j); seq_printf(seq, "=["); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) seq_printf(seq, "%s/", bdevname( - conf->strip_zone[j].dev[k]->bdev,b)); - - seq_printf(seq, "] ze=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_end, - conf->strip_zone[j].dev_start, - conf->strip_zone[j].sectors); + conf->devlist[j*mddev->raid_disks + k] + ->bdev, b)); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; } #endif seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); -- cgit v1.2.3-70-g09d2 From 46994191ae8fdf1cbcc1f29282576b269a638c69 Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:00:54 +1000 Subject: md: have raid0 report its formation Report to the user what are the raid zones Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid0.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d8692fc1796..62fde23bf28 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -52,6 +52,38 @@ static int raid0_congested(void *data, int bits) return ret; } +/* + * inform the user of the raid configuration +*/ +static void dump_zones(mddev_t *mddev) +{ + int j, k, h; + sector_t zone_size = 0; + sector_t zone_start = 0; + char b[BDEVNAME_SIZE]; + raid0_conf_t *conf = mddev->private; + printk(KERN_INFO "******* %s configuration *********\n", + mdname(mddev)); + h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { + printk(KERN_INFO "zone%d=[", j); + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) + printk("%s/", + bdevname(conf->devlist[j*mddev->raid_disks + + k]->bdev, b)); + printk("]\n"); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + printk(KERN_INFO " zone offset=%llukb " + "device offset=%llukb size=%llukb\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; + } + printk(KERN_INFO "**********************************\n\n"); +} + static int create_strip_zones(mddev_t *mddev) { int i, c, j, err; @@ -289,6 +321,7 @@ static int raid0_run(mddev_t *mddev) } blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); + dump_zones(mddev); return 0; } -- cgit v1.2.3-70-g09d2 From 92e59b6ba21845fadd2cce725010a9351740b76e Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:00:57 +1000 Subject: md: raid0: chunk size check in raid0_run have raid0 check chunk size in run method instead of in md. This is part of a series moving the checks from common code to the personalities where they belong. hardsect is short and chunksize is an int, so it is safe to use %. Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid0.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 62fde23bf28..39936a217f9 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -234,6 +234,16 @@ static int create_strip_zones(mddev_t *mddev) mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if (mddev->chunk_size % queue_logical_block_size(mddev->queue)) { + printk(KERN_ERR "%s chunk_size of %d not valid\n", + mdname(mddev), + mddev->chunk_size); + goto abort; + } printk(KERN_INFO "raid0: done.\n"); mddev->private = conf; return 0; @@ -289,8 +299,9 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0) { - printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); + if (mddev->chunk_size == 0 || + !is_power_of_2(mddev->chunk_size)) { + printk(KERN_ERR "md/raid0: chunk size must be a power of 2.\n"); return -EINVAL; } blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); -- cgit v1.2.3-70-g09d2 From fbb704efb784e2c8418e34dc3013af76bdd58101 Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:02:05 +1000 Subject: md: raid0 :Enables chunk size other than powers of 2. Maintain two flows, one for pow2 chunk sizes (which uses masks and shift), and a flow for the general case (which uses sector_div). This is for the sake of performance. - introduce map_sector and is_io_in_chunk_boundary to encapsulate those two flows better for raid0_make_request - fix blk_mergeable to support the two flows. Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid0.c | 107 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 30 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 39936a217f9..7cd2671cc79 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -273,7 +273,12 @@ static int raid0_mergeable_bvec(struct request_queue *q, unsigned int chunk_sectors = mddev->chunk_size >> 9; unsigned int bio_sectors = bvm->bi_size >> 9; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; + if (is_power_of_2(mddev->chunk_size)) + max = (chunk_sectors - ((sector & (chunk_sectors-1)) + + bio_sectors)) << 9; + else + max = (chunk_sectors - (sector_div(sector, chunk_sectors) + + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ if (max <= biovec->bv_len && bio_sectors == 0) return biovec->bv_len; @@ -299,9 +304,8 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0 || - !is_power_of_2(mddev->chunk_size)) { - printk(KERN_ERR "md/raid0: chunk size must be a power of 2.\n"); + if (mddev->chunk_size == 0) { + printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); @@ -367,15 +371,65 @@ static struct strip_zone *find_zone(struct raid0_private_data *conf, BUG(); } -static int raid0_make_request (struct request_queue *q, struct bio *bio) +/* + * remaps the bio to the target device. we separate two flows. + * power 2 flow and a general flow for the sake of perfromance +*/ +static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, + sector_t sector, sector_t *sector_offset) { - mddev_t *mddev = q->queuedata; - unsigned int sect_in_chunk, chunksect_bits, chunk_sects; + unsigned int sect_in_chunk; + sector_t chunk; raid0_conf_t *conf = mddev->private; + unsigned int chunk_sects = mddev->chunk_size >> 9; + + if (is_power_of_2(mddev->chunk_size)) { + int chunksect_bits = ffz(~chunk_sects); + /* find the sector offset inside the chunk */ + sect_in_chunk = sector & (chunk_sects - 1); + sector >>= chunksect_bits; + /* chunk in zone */ + chunk = *sector_offset; + /* quotient is the chunk in real device*/ + sector_div(chunk, zone->nb_dev << chunksect_bits); + } else{ + sect_in_chunk = sector_div(sector, chunk_sects); + chunk = *sector_offset; + sector_div(chunk, chunk_sects * zone->nb_dev); + } + /* + * position the bio over the real device + * real sector = chunk in device + starting of zone + * + the position in the chunk + */ + *sector_offset = (chunk * chunk_sects) + sect_in_chunk; + return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks + + sector_div(sector, zone->nb_dev)]; +} + +/* + * Is io distribute over 1 or more chunks ? +*/ +static inline int is_io_in_chunk_boundary(mddev_t *mddev, + unsigned int chunk_sects, struct bio *bio) +{ + if (likely(is_power_of_2(mddev->chunk_size))) { + return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + + (bio->bi_size >> 9)); + } else{ + sector_t sector = bio->bi_sector; + return chunk_sects >= (sector_div(sector, chunk_sects) + + (bio->bi_size >> 9)); + } +} + +static int raid0_make_request(struct request_queue *q, struct bio *bio) +{ + mddev_t *mddev = q->queuedata; + unsigned int chunk_sects; + sector_t sector_offset; struct strip_zone *zone; mdk_rdev_t *tmp_dev; - sector_t chunk; - sector_t sector, rsect, sector_offset; const int rw = bio_data_dir(bio); int cpu; @@ -391,10 +445,8 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) part_stat_unlock(); chunk_sects = mddev->chunk_size >> 9; - chunksect_bits = ffz(~chunk_sects); - sector = bio->bi_sector; - - if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { + if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { + sector_t sector = bio->bi_sector; struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ if (bio->bi_vcnt != 1 || @@ -403,7 +455,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); + if (likely(is_power_of_2(mddev->chunk_size))) + bp = bio_split(bio, chunk_sects - (sector & + (chunk_sects-1))); + else + bp = bio_split(bio, chunk_sects - + sector_div(sector, chunk_sects)); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) @@ -412,24 +469,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - sector_offset = sector; - zone = find_zone(conf, §or_offset); - sect_in_chunk = bio->bi_sector & (chunk_sects - 1); - { - sector_t x = sector_offset >> chunksect_bits; - - sector_div(x, zone->nb_dev); - chunk = x; - x = sector >> chunksect_bits; - tmp_dev = conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks - + sector_div(x, zone->nb_dev)]; - } - rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; - + sector_offset = bio->bi_sector; + zone = find_zone(mddev->private, §or_offset); + tmp_dev = map_sector(mddev, zone, bio->bi_sector, + §or_offset); bio->bi_bdev = tmp_dev->bdev; - bio->bi_sector = rsect + tmp_dev->data_offset; - + bio->bi_sector = sector_offset + zone->dev_start + + tmp_dev->data_offset; /* * Let the main block layer submit the IO and resolve recursion: */ -- cgit v1.2.3-70-g09d2 From 9d8f0363623b3da12c43007cf77f5e1a4e8a5964 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:45:01 +1000 Subject: md: Make mddev->chunk_size sector-based. This patch renames the chunk_size field to chunk_sectors with the implied change of semantics. Since is_power_of_2(chunk_size) = is_power_of_2(chunk_sectors << 9) = is_power_of_2(chunk_sectors) these bits don't need an adjustment for the shift. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 +- drivers/md/md.c | 51 ++++++++++++++++++++++++++------------------------- drivers/md/md.h | 2 +- drivers/md/raid0.c | 27 ++++++++++++++------------- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 15 ++++++++------- drivers/md/raid5.c | 41 ++++++++++++++++++++++------------------- 7 files changed, 74 insertions(+), 68 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 9b02a73fbc6..9f7cec42dd8 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -305,7 +305,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) static void linear_status (struct seq_file *seq, mddev_t *mddev) { - seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } diff --git a/drivers/md/md.c b/drivers/md/md.c index a02bde70874..abcc0fef30e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -869,7 +869,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->minor_version = sb->minor_version; mddev->patch_version = sb->patch_version; mddev->external = 0; - mddev->chunk_size = sb->chunk_size; + mddev->chunk_sectors = sb->chunk_size >> 9; mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; @@ -892,7 +892,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; } if (sb->state & (1<recovery_cp = 0; sb->layout = mddev->layout; - sb->chunk_size = mddev->chunk_size; + sb->chunk_size = mddev->chunk_sectors << 9; if (mddev->bitmap && mddev->bitmap_file == NULL) sb->state |= (1<major_version = 1; mddev->patch_version = 0; mddev->external = 0; - mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; + mddev->chunk_sectors = le32_to_cpu(sb->chunksize); mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); mddev->level = le32_to_cpu(sb->level); @@ -1310,7 +1310,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; } } else if (mddev->pers == NULL) { @@ -1382,7 +1382,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); - sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -2753,7 +2753,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (IS_ERR(priv)) { mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; mddev->raid_disks -= mddev->delta_disks; mddev->delta_disks = 0; module_put(pers->owner); @@ -2771,7 +2771,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->level = mddev->new_level; mddev->layout = mddev->new_layout; - mddev->chunk_size = mddev->new_chunk; + mddev->chunk_sectors = mddev->new_chunk >> 9; mddev->delta_disks = 0; pers->run(mddev); mddev_resume(mddev); @@ -2864,10 +2864,10 @@ static ssize_t chunk_size_show(mddev_t *mddev, char *page) { if (mddev->reshape_position != MaxSector && - mddev->chunk_size != mddev->new_chunk) + mddev->chunk_sectors << 9 != mddev->new_chunk) return sprintf(page, "%d (%d)\n", mddev->new_chunk, - mddev->chunk_size); - return sprintf(page, "%d\n", mddev->chunk_size); + mddev->chunk_sectors << 9); + return sprintf(page, "%d\n", mddev->chunk_sectors << 9); } static ssize_t @@ -2889,7 +2889,7 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) } else { mddev->new_chunk = n; if (mddev->reshape_position == MaxSector) - mddev->chunk_size = n; + mddev->chunk_sectors = n >> 9; } return len; } @@ -3534,9 +3534,9 @@ min_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { + if (mddev->chunk_sectors) { sector_t temp = min; - if (sector_div(temp, (mddev->chunk_size>>9))) + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_min = min; @@ -3572,9 +3572,9 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { + if (mddev->chunk_sectors) { sector_t temp = max; - if (sector_div(temp, (mddev->chunk_size>>9))) + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_max = max; @@ -3665,7 +3665,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; return len; } @@ -4007,7 +4007,7 @@ static int do_md_run(mddev_t * mddev) analyze_sbs(mddev); } - chunk_size = mddev->chunk_size; + chunk_size = mddev->chunk_sectors << 9; if (chunk_size) { if (chunk_size > MAX_CHUNK_SIZE) { @@ -4406,7 +4406,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->flags = 0; mddev->ro = 0; mddev->metadata_type[0] = 0; - mddev->chunk_size = 0; + mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; mddev->layout = 0; mddev->max_disks = 0; @@ -4619,7 +4619,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.spare_disks = spare; info.layout = mddev->layout; - info.chunk_size = mddev->chunk_size; + info.chunk_size = mddev->chunk_sectors << 9; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; @@ -4844,7 +4844,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; } else rdev->sb_start = calc_dev_sboffset(rdev->bdev); - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = calc_num_sectors(rdev, + mddev->chunk_sectors << 9); err = bind_rdev_to_array(rdev, mddev); if (err) { @@ -4914,7 +4915,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) else rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = calc_num_sectors(rdev, mddev->chunk_sectors << 9); if (test_bit(Faulty, &rdev->flags)) { printk(KERN_WARNING @@ -5063,7 +5064,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->external = 0; mddev->layout = info->layout; - mddev->chunk_size = info->chunk_size; + mddev->chunk_sectors = info->chunk_size >> 9; mddev->max_disks = MD_SB_DISKS; @@ -5082,7 +5083,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) get_random_bytes(mddev->uuid, 16); mddev->new_level = mddev->level; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; mddev->new_layout = mddev->layout; mddev->delta_disks = 0; @@ -5192,7 +5193,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) mddev->level != info->level || /* mddev->layout != info->layout || */ !mddev->persistent != info->not_persistent|| - mddev->chunk_size != info->chunk_size || + mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) ) diff --git a/drivers/md/md.h b/drivers/md/md.h index 8227ab909d4..5d78830043d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -145,7 +145,7 @@ struct mddev_s int external; /* metadata is * managed externally */ char metadata_type[17]; /* externally set*/ - int chunk_size; + int chunk_sectors; time_t ctime, utime; int level, layout; char clevel[16]; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7cd2671cc79..f20b18ff796 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -238,10 +238,10 @@ static int create_strip_zones(mddev_t *mddev) * now since we have the hard sector sizes, we can make sure * chunk size is a multiple of that sector size */ - if (mddev->chunk_size % queue_logical_block_size(mddev->queue)) { + if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { printk(KERN_ERR "%s chunk_size of %d not valid\n", mdname(mddev), - mddev->chunk_size); + mddev->chunk_sectors << 9); goto abort; } printk(KERN_INFO "raid0: done.\n"); @@ -270,10 +270,10 @@ static int raid0_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - if (is_power_of_2(mddev->chunk_size)) + if (is_power_of_2(mddev->chunk_sectors)) max = (chunk_sectors - ((sector & (chunk_sectors-1)) + bio_sectors)) << 9; else @@ -304,11 +304,11 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0) { + if (mddev->chunk_sectors == 0) { printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } - blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); + blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; ret = create_strip_zones(mddev); @@ -330,7 +330,8 @@ static int raid0_run(mddev_t *mddev) * chunksize should be used in that case. */ { - int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; + int stripe = mddev->raid_disks * + (mddev->chunk_sectors << 9) / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } @@ -381,9 +382,9 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, unsigned int sect_in_chunk; sector_t chunk; raid0_conf_t *conf = mddev->private; - unsigned int chunk_sects = mddev->chunk_size >> 9; + unsigned int chunk_sects = mddev->chunk_sectors; - if (is_power_of_2(mddev->chunk_size)) { + if (is_power_of_2(mddev->chunk_sectors)) { int chunksect_bits = ffz(~chunk_sects); /* find the sector offset inside the chunk */ sect_in_chunk = sector & (chunk_sects - 1); @@ -413,7 +414,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, static inline int is_io_in_chunk_boundary(mddev_t *mddev, unsigned int chunk_sects, struct bio *bio) { - if (likely(is_power_of_2(mddev->chunk_size))) { + if (likely(is_power_of_2(mddev->chunk_sectors))) { return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + (bio->bi_size >> 9)); } else{ @@ -444,7 +445,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); - chunk_sects = mddev->chunk_size >> 9; + chunk_sects = mddev->chunk_sectors; if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { sector_t sector = bio->bi_sector; struct bio_pair *bp; @@ -455,7 +456,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - if (likely(is_power_of_2(mddev->chunk_size))) + if (likely(is_power_of_2(mddev->chunk_sectors))) bp = bio_split(bio, chunk_sects - (sector & (chunk_sects-1))); else @@ -519,7 +520,7 @@ static void raid0_status(struct seq_file *seq, mddev_t *mddev) zone_start = conf->strip_zone[j].zone_end; } #endif - seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); return; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5ea5bca53a5..388635735ae 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2161,10 +2161,10 @@ static int raid1_reshape(mddev_t *mddev) int d, d2, err; /* Cannot change chunk_size, layout, or level */ - if (mddev->chunk_size != mddev->new_chunk || + if (mddev->chunk_sectors << 9 != mddev->new_chunk || mddev->layout != mddev->new_layout || mddev->level != mddev->new_level) { - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk = mddev->chunk_sectors << 9; mddev->new_layout = mddev->layout; mddev->new_level = mddev->level; return -EINVAL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 06bef686f91..30029a312cf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -461,7 +461,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; @@ -985,7 +985,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) int i; if (conf->near_copies < conf->raid_disks) - seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); if (conf->near_copies > 1) seq_printf(seq, " %d near-copies", conf->near_copies); if (conf->far_copies > 1) { @@ -2050,8 +2050,8 @@ static int run(mddev_t *mddev) int nc, fc, fo; sector_t stride, size; - if (mddev->chunk_size < PAGE_SIZE || - !is_power_of_2(mddev->chunk_size)) { + if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || + !is_power_of_2(mddev->chunk_sectors)) { printk(KERN_ERR "md/raid10: chunk size must be " "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); return -EINVAL; @@ -2096,8 +2096,8 @@ static int run(mddev_t *mddev) conf->far_copies = fc; conf->copies = nc*fc; conf->far_offset = fo; - conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; - conf->chunk_shift = ffz(~mddev->chunk_size) - 9; + conf->chunk_mask = mddev->chunk_sectors - 1; + conf->chunk_shift = ffz(~mddev->chunk_sectors); size = mddev->dev_sectors >> conf->chunk_shift; sector_div(size, fc); size = size * conf->raid_disks; @@ -2205,7 +2205,8 @@ static int run(mddev_t *mddev) * maybe... */ { - int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); + int stripe = conf->raid_disks * + ((mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index be4e62f611b..1e4fd5e8bfd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3352,13 +3352,13 @@ static int raid5_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ - if (mddev->new_chunk < mddev->chunk_size) + if (mddev->new_chunk < mddev->chunk_sectors << 9) chunk_sectors = mddev->new_chunk >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; @@ -3372,10 +3372,10 @@ static int raid5_mergeable_bvec(struct request_queue *q, static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) { sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bio->bi_size >> 9; - if (mddev->new_chunk < mddev->chunk_size) + if (mddev->new_chunk < mddev->chunk_sectors << 9) chunk_sectors = mddev->new_chunk >> 9; return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); @@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * If old and new chunk sizes differ, we need to process the * largest of these */ - if (mddev->new_chunk > mddev->chunk_size) + if (mddev->new_chunk > mddev->chunk_sectors << 9) reshape_sectors = mddev->new_chunk / 512; else - reshape_sectors = mddev->chunk_size / 512; + reshape_sectors = mddev->chunk_sectors; /* we update the metadata when there is more than 3Meg * in the block range (that is rather arbitrary, should @@ -4303,7 +4303,7 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) raid_disks = conf->previous_raid_disks; } - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); sectors &= ~((sector_t)mddev->new_chunk/512 - 1); return sectors * (raid_disks - conf->max_degraded); } @@ -4412,7 +4412,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->max_nr_stripes = NR_STRIPES; conf->reshape_progress = mddev->reshape_position; if (conf->reshape_progress != MaxSector) { - conf->prev_chunk = mddev->chunk_size; + conf->prev_chunk = mddev->chunk_sectors << 9; conf->prev_algo = mddev->layout; } @@ -4484,7 +4484,7 @@ static int run(mddev_t *mddev) } /* here_new is the stripe we will write to */ here_old = mddev->reshape_position; - sector_div(here_old, (mddev->chunk_size>>9)* + sector_div(here_old, mddev->chunk_sectors * (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ @@ -4499,7 +4499,7 @@ static int run(mddev_t *mddev) } else { BUG_ON(mddev->level != mddev->new_level); BUG_ON(mddev->layout != mddev->new_layout); - BUG_ON(mddev->chunk_size != mddev->new_chunk); + BUG_ON(mddev->chunk_sectors << 9 != mddev->new_chunk); BUG_ON(mddev->delta_disks != 0); } @@ -4533,7 +4533,7 @@ static int run(mddev_t *mddev) } /* device size must be a multiple of chunk size */ - mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); + mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > 0 && @@ -4582,7 +4582,7 @@ static int run(mddev_t *mddev) { int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * - (mddev->chunk_size / PAGE_SIZE); + ((mddev->chunk_sectors << 9) / PAGE_SIZE); if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -4679,7 +4679,8 @@ static void status(struct seq_file *seq, mddev_t *mddev) raid5_conf_t *conf = (raid5_conf_t *) mddev->private; int i; - seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout); + seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, + mddev->chunk_sectors / 2, mddev->layout); seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", @@ -4827,7 +4828,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) * any io in the removed space completes, but it hardly seems * worth it. */ - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); md_set_array_sectors(mddev, raid5_size(mddev, sectors, mddev->raid_disks)); if (mddev->array_sectors > @@ -4850,7 +4851,7 @@ static int raid5_check_reshape(mddev_t *mddev) if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && - mddev->new_chunk == mddev->chunk_size) + mddev->new_chunk == mddev->chunk_sectors << 9) return -EINVAL; /* nothing to do */ if (mddev->bitmap) /* Cannot grow a bitmap yet */ @@ -4878,10 +4879,11 @@ static int raid5_check_reshape(mddev_t *mddev) * If the chunk size is greater, user-space should request more * stripe_heads first. */ - if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || + if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes || (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", - (max(mddev->chunk_size, mddev->new_chunk) + (max(mddev->chunk_sectors << 9, mddev->new_chunk) / STRIPE_SIZE)*4); return -ENOSPC; } @@ -5054,7 +5056,7 @@ static void raid5_finish_reshape(mddev_t *mddev) raid5_remove_disk(mddev, d); } mddev->layout = conf->algorithm; - mddev->chunk_size = conf->chunk_size; + mddev->chunk_sectors = conf->chunk_size >> 9; mddev->reshape_position = MaxSector; mddev->delta_disks = 0; } @@ -5183,7 +5185,8 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) } if (new_chunk > 0) { conf->chunk_size = new_chunk; - mddev->chunk_size = mddev->new_chunk = new_chunk; + mddev->new_chunk = new_chunk; + mddev->chunk_sectors = new_chunk >> 9; } set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); -- cgit v1.2.3-70-g09d2 From d6e412eaa52db82010f12ea7d2c9b9468e933c44 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:47:00 +1000 Subject: md: raid0: chunk_sectors cleanups. following the conversion to chunk_sectors, there is room for cleaning up a little. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f20b18ff796..11e38425371 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -273,7 +273,7 @@ static int raid0_mergeable_bvec(struct request_queue *q, unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - if (is_power_of_2(mddev->chunk_sectors)) + if (is_power_of_2(chunk_sectors)) max = (chunk_sectors - ((sector & (chunk_sectors-1)) + bio_sectors)) << 9; else @@ -384,7 +384,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, raid0_conf_t *conf = mddev->private; unsigned int chunk_sects = mddev->chunk_sectors; - if (is_power_of_2(mddev->chunk_sectors)) { + if (is_power_of_2(chunk_sects)) { int chunksect_bits = ffz(~chunk_sects); /* find the sector offset inside the chunk */ sect_in_chunk = sector & (chunk_sects - 1); @@ -414,7 +414,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, static inline int is_io_in_chunk_boundary(mddev_t *mddev, unsigned int chunk_sects, struct bio *bio) { - if (likely(is_power_of_2(mddev->chunk_sectors))) { + if (likely(is_power_of_2(chunk_sects))) { return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + (bio->bi_size >> 9)); } else{ @@ -456,7 +456,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - if (likely(is_power_of_2(mddev->chunk_sectors))) + if (likely(is_power_of_2(chunk_sects))) bp = bio_split(bio, chunk_sects - (sector & (chunk_sects-1))); else -- cgit v1.2.3-70-g09d2 From 13f2682b7216ebebd72b3d5868fe7fccec91a92d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:48:55 +1000 Subject: md: raid0/linear: ensure device sizes are rounded to chunk size. This is currently ensured by common code, but it is more reliable to ensure it where it is needed in personality code. All the other personalities that care already round the size to the chunk_size. raid0 and linear are the only hold-outs. Signed-off-by: NeilBrown --- drivers/md/linear.c | 6 ++++++ drivers/md/raid0.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 9f7cec42dd8..dda2f1b64a6 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -135,6 +135,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) list_for_each_entry(rdev, &mddev->disks, same_set) { int j = rdev->raid_disk; dev_info_t *disk = conf->disks + j; + sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { printk("linear: disk numbering problem. Aborting!\n"); @@ -142,6 +143,11 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) } disk->rdev = rdev; + if (mddev->chunk_sectors) { + sectors = rdev->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev->sectors = sectors * mddev->chunk_sectors; + } blk_queue_stack_limits(mddev->queue, rdev->bdev->bd_disk->queue); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 11e38425371..717e64a4af9 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -100,6 +100,12 @@ static int create_strip_zones(mddev_t *mddev) printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); c = 0; + + /* round size to chunk_size */ + sectors = rdev1->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev1->sectors = sectors * mddev->chunk_sectors; + list_for_each_entry(rdev2, &mddev->disks, same_set) { printk(KERN_INFO "raid0: comparing %s(%llu)", bdevname(rdev1->bdev,b), -- cgit v1.2.3-70-g09d2 From 0894cc3066aaa3e75a99383c0d25feebf9b688ac Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:49:23 +1000 Subject: md: Move check for bitmap presence to personality code. If the superblock of a component device indicates the presence of a bitmap but the corresponding raid personality does not support bitmaps (raid0, linear, multipath, faulty), then something is seriously wrong and we'd better refuse to run such an array. Currently, this check is performed while the superblocks are examined, i.e. before entering personality code. Therefore the generic md layer must know which raid levels support bitmaps and which do not. This patch avoids this layer violation without adding identical code to various personalities. This is accomplished by introducing a new public function to md.c, md_check_no_bitmap(), which replaces the hard-coded checks in the superblock loading functions. A call to md_check_no_bitmap() is added to the ->run method of each personality which does not support bitmaps and assembly is aborted if at least one component device contains a bitmap. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/faulty.c | 6 +++++- drivers/md/linear.c | 2 ++ drivers/md/md.c | 40 ++++++++++++++++++---------------------- drivers/md/md.h | 1 + drivers/md/multipath.c | 3 +++ drivers/md/raid0.c | 2 ++ 6 files changed, 31 insertions(+), 23 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 6e83b38d931..87d88dbb667 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -299,8 +299,12 @@ static int run(mddev_t *mddev) { mdk_rdev_t *rdev; int i; + conf_t *conf; - conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); + if (md_check_no_bitmap(mddev)) + return -EINVAL; + + conf = kmalloc(sizeof(*conf), GFP_KERNEL); if (!conf) return -ENOMEM; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index dda2f1b64a6..564c390f8a1 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -189,6 +189,8 @@ static int linear_run (mddev_t *mddev) { linear_conf_t *conf; + if (md_check_no_bitmap(mddev)) + return -EINVAL; mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); diff --git a/drivers/md/md.c b/drivers/md/md.c index 0f11fd1417a..09be637d52c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -735,6 +735,24 @@ struct super_type { sector_t num_sectors); }; +/* + * Check that the given mddev has no bitmap. + * + * This function is called from the run method of all personalities that do not + * support bitmaps. It prints an error message and returns non-zero if mddev + * has a bitmap. Otherwise, it returns 0. + * + */ +int md_check_no_bitmap(mddev_t *mddev) +{ + if (!mddev->bitmap_file && !mddev->bitmap_offset) + return 0; + printk(KERN_ERR "%s: bitmaps are not supported for %s\n", + mdname(mddev), mddev->pers->name); + return 1; +} +EXPORT_SYMBOL(md_check_no_bitmap); + /* * load_super for 0.90.0 */ @@ -788,17 +806,6 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version rdev->data_offset = 0; rdev->sb_size = MD_SB_BYTES; - if (sb->state & (1<level != 1 && sb->level != 4 - && sb->level != 5 && sb->level != 6 - && sb->level != 10) { - /* FIXME use a better test */ - printk(KERN_WARNING - "md: bitmaps not supported for this level.\n"); - goto abort; - } - } - if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; else @@ -1176,17 +1183,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) bdevname(rdev->bdev,b)); return -EINVAL; } - if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) { - if (sb->level != cpu_to_le32(1) && - sb->level != cpu_to_le32(4) && - sb->level != cpu_to_le32(5) && - sb->level != cpu_to_le32(6) && - sb->level != cpu_to_le32(10)) { - printk(KERN_WARNING - "md: bitmaps not supported for this level.\n"); - return -EINVAL; - } - } rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); diff --git a/drivers/md/md.h b/drivers/md/md.h index ea2c441449d..9430a110db9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -430,5 +430,6 @@ extern void md_new_event(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); +extern int md_check_no_bitmap(mddev_t *mddev); #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c1ca63f278a..cbe368fa659 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -421,6 +421,9 @@ static int multipath_run (mddev_t *mddev) struct multipath_info *disk; mdk_rdev_t *rdev; + if (md_check_no_bitmap(mddev)) + return -EINVAL; + if (mddev->level != LEVEL_MULTIPATH) { printk("multipath: %s: raid level not set to multipath IO (%d)\n", mdname(mddev), mddev->level); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 717e64a4af9..ab4a489d869 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -314,6 +314,8 @@ static int raid0_run(mddev_t *mddev) printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } + if (md_check_no_bitmap(mddev)) + return -EINVAL; blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; -- cgit v1.2.3-70-g09d2