From 55ebbb59c1c6eb1b040f62b8c4ae0b724de6e55a Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 22 Jan 2013 21:42:18 -0600 Subject: DM-RAID: Fix RAID10's check for sufficient redundancy Before attempting to activate a RAID array, it is checked for sufficient redundancy. That is, we make sure that there are not too many failed devices - or devices specified for rebuild - to undermine our ability to activate the array. The current code performs this check twice - once to ensure there were not too many devices specified for rebuild by the user ('validate_rebuild_devices') and again after possibly experiencing a failure to read the superblock ('analyse_superblocks'). Neither of these checks are sufficient. The first check is done properly but with insufficient information about the possible failure state of the devices to make a good determination if the array can be activated. The second check is simply done wrong in the case of RAID10 because it doesn't account for the independence of the stripes (i.e. mirror sets). The solution is to use the properly written check ('validate_rebuild_devices'), but perform the check after the superblocks have been read and we know which devices have failed. This gives us one check instead of two and performs it in a location where it can be done right. Only RAID10 was affected and it was affected in the following ways: - the code did not properly catch the condition where a user specified a device for rebuild that already had a failed device in the same mirror set. (This condition would, however, be caught at a deeper level in MD.) - the code triggers a false positive and denies activation when devices in independent mirror sets have failed - counting the failures as though they were all in the same set. The most likely place this error was introduced (or this patch should have been included) is in commit 4ec1e369 - first introduced in v3.7-rc1. Consequently this fix should also go in v3.7.y, however there is a small conflict on the .version in raid_target, so I'll submit a separate patch to -stable. Cc: stable@vger.kernel.org Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- Documentation/device-mapper/dm-raid.txt | 1 + drivers/md/dm-raid.c | 101 ++++++++++++-------------------- 2 files changed, 38 insertions(+), 64 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 728c38c242d..56fb62b09fc 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -141,3 +141,4 @@ Version History 1.2.0 Handle creation of arrays that contain failed devices. 1.3.0 Added support for RAID 10 1.3.1 Allow device replacement/rebuild for RAID 10 +1.3.2 Fix/improve redundancy checking for RAID10 diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 3d8984edeff..9e58dbd8d8c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -340,24 +340,22 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) } /* - * validate_rebuild_devices + * validate_raid_redundancy * @rs * - * Determine if the devices specified for rebuild can result in a valid - * usable array that is capable of rebuilding the given devices. + * Determine if there are enough devices in the array that haven't + * failed (or are being rebuilt) to form a usable array. * * Returns: 0 on success, -EINVAL on failure. */ -static int validate_rebuild_devices(struct raid_set *rs) +static int validate_raid_redundancy(struct raid_set *rs) { unsigned i, rebuild_cnt = 0; unsigned rebuilds_per_group, copies, d; - if (!(rs->print_flags & DMPF_REBUILD)) - return 0; - for (i = 0; i < rs->md.raid_disks; i++) - if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) + if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || + !rs->dev[i].rdev.sb_page) rebuild_cnt++; switch (rs->raid_type->level) { @@ -393,27 +391,24 @@ static int validate_rebuild_devices(struct raid_set *rs) * A A B B C * C D D E E */ - rebuilds_per_group = 0; for (i = 0; i < rs->md.raid_disks * copies; i++) { + if (!(i % copies)) + rebuilds_per_group = 0; d = i % rs->md.raid_disks; - if (!test_bit(In_sync, &rs->dev[d].rdev.flags) && + if ((!rs->dev[d].rdev.sb_page || + !test_bit(In_sync, &rs->dev[d].rdev.flags)) && (++rebuilds_per_group >= copies)) goto too_many; - if (!((i + 1) % copies)) - rebuilds_per_group = 0; } break; default: - DMERR("The rebuild parameter is not supported for %s", - rs->raid_type->name); - rs->ti->error = "Rebuild not supported for this RAID type"; - return -EINVAL; + if (rebuild_cnt) + return -EINVAL; } return 0; too_many: - rs->ti->error = "Too many rebuild devices specified"; return -EINVAL; } @@ -664,9 +659,6 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } rs->md.dev_sectors = sectors_per_dev; - if (validate_rebuild_devices(rs)) - return -EINVAL; - /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; @@ -995,28 +987,10 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev) static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) { int ret; - unsigned redundancy = 0; struct raid_dev *dev; struct md_rdev *rdev, *tmp, *freshest; struct mddev *mddev = &rs->md; - switch (rs->raid_type->level) { - case 1: - redundancy = rs->md.raid_disks - 1; - break; - case 4: - case 5: - case 6: - redundancy = rs->raid_type->parity_devs; - break; - case 10: - redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; - break; - default: - ti->error = "Unknown RAID type"; - return -EINVAL; - } - freshest = NULL; rdev_for_each_safe(rdev, tmp, mddev) { /* @@ -1045,44 +1019,43 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) break; default: dev = container_of(rdev, struct raid_dev, rdev); - if (redundancy--) { - if (dev->meta_dev) - dm_put_device(ti, dev->meta_dev); - - dev->meta_dev = NULL; - rdev->meta_bdev = NULL; + if (dev->meta_dev) + dm_put_device(ti, dev->meta_dev); - if (rdev->sb_page) - put_page(rdev->sb_page); + dev->meta_dev = NULL; + rdev->meta_bdev = NULL; - rdev->sb_page = NULL; + if (rdev->sb_page) + put_page(rdev->sb_page); - rdev->sb_loaded = 0; + rdev->sb_page = NULL; - /* - * We might be able to salvage the data device - * even though the meta device has failed. For - * now, we behave as though '- -' had been - * set for this device in the table. - */ - if (dev->data_dev) - dm_put_device(ti, dev->data_dev); + rdev->sb_loaded = 0; - dev->data_dev = NULL; - rdev->bdev = NULL; + /* + * We might be able to salvage the data device + * even though the meta device has failed. For + * now, we behave as though '- -' had been + * set for this device in the table. + */ + if (dev->data_dev) + dm_put_device(ti, dev->data_dev); - list_del(&rdev->same_set); + dev->data_dev = NULL; + rdev->bdev = NULL; - continue; - } - ti->error = "Failed to load superblock"; - return ret; + list_del(&rdev->same_set); } } if (!freshest) return 0; + if (validate_raid_redundancy(rs)) { + rs->ti->error = "Insufficient redundancy to activate array"; + return -EINVAL; + } + /* * Validation of the freshest device provides the source of * validation for the remaining devices. @@ -1432,7 +1405,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 4, 0}, + .version = {1, 4, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3-70-g09d2