From 34f8ac6d79e5446c6242e4bcb474f152c857c5c6 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 27 Jan 2012 14:53:53 -0600 Subject: Prevent DM RAID from loading bitmap twice. The life cycle of a device-mapper target is: 1) create 2) resume 3) suspend *) possibly repeat from 2 4) destroy The dm-raid target is unconditionally calling MD's bitmap_load function upon every resume. If steps 2 & 3 above are repeated, bitmap_load is called multiple times. It is only written to be called once; otherwise, it allocates new memory for the bitmap (without freeing the old) and incrementing the number of pages it thinks it has without zeroing first. This ultimately leads to access beyond allocated memory and lost memory. Simply avoiding the bitmap_load call upon resume is not sufficient. If the target was suspended while the initial recovery was only partially complete, it needs to be restarted when the target is resumed. This is why 'md_wakeup_thread' is called before issuing the 'mddev_resume'. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c2907d836e4..86cb7e5d83d 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -56,7 +56,8 @@ struct raid_dev { struct raid_set { struct dm_target *ti; - uint64_t print_flags; + uint32_t bitmap_loaded; + uint32_t print_flags; struct mddev md; struct raid_type *raid_type; @@ -1085,7 +1086,7 @@ static int raid_status(struct dm_target *ti, status_type_t type, raid_param_cnt += 2; } - raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2); + raid_param_cnt += (hweight32(rs->print_flags & ~DMPF_REBUILD) * 2); if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) raid_param_cnt--; @@ -1197,7 +1198,12 @@ static void raid_resume(struct dm_target *ti) { struct raid_set *rs = ti->private; - bitmap_load(&rs->md); + if (!rs->bitmap_loaded) { + bitmap_load(&rs->md); + rs->bitmap_loaded = 1; + } else + md_wakeup_thread(rs->md.thread); + mddev_resume(&rs->md); } -- cgit v1.2.3-70-g09d2 From 3aa3b2b2b1edb813dc5342d0108befc39541542d Mon Sep 17 00:00:00 2001 From: Jonathan E Brassow Date: Wed, 7 Mar 2012 19:09:47 +0000 Subject: dm raid: set MD_CHANGE_DEVS when rebuilding The 'rebuild' parameter is used to rebuild individual devices in an array (e.g. resynchronize a RAID1 device or recalculate a parity device in higher RAID). The MD_CHANGE_DEVS flag must be set when this parameter is given in order to write out the superblocks and make the change take immediate effect. The code that handles new devices in super_load already sets MD_CHANGE_DEVS and 'FirstUse'. (The 'FirstUse' flag was being set as a special case for rebuilds in super_init_validation.) Add a condition for rebuilds in super_load to take care of both flags without the special case in 'super_init_validation'. Signed-off-by: Jonathan Brassow Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 86cb7e5d83d..f53bb389c18 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -668,7 +668,14 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) return ret; sb = page_address(rdev->sb_page); - if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) { + + /* + * Two cases that we want to write new superblocks and rebuild: + * 1) New device (no matching magic number) + * 2) Device specified for rebuild (!In_sync w/ offset == 0) + */ + if ((sb->magic != cpu_to_le32(DM_RAID_MAGIC)) || + (!test_bit(In_sync, &rdev->flags) && !rdev->recovery_offset)) { super_sync(rdev->mddev, rdev); set_bit(FirstUse, &rdev->flags); @@ -745,11 +752,8 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) */ rdev_for_each(r, t, mddev) { if (!test_bit(In_sync, &r->flags)) { - if (!test_bit(FirstUse, &r->flags)) - DMERR("Superblock area of " - "rebuild device %d should have been " - "cleared.", r->raid_disk); - set_bit(FirstUse, &r->flags); + DMINFO("Device %d specified for rebuild: " + "Clearing superblock", r->raid_disk); rebuilds++; } else if (test_bit(FirstUse, &r->flags)) new_devs++; -- cgit v1.2.3-70-g09d2 From 0ca93de9b789e0eb05e103f0c04de72df13da73a Mon Sep 17 00:00:00 2001 From: Jonathan E Brassow Date: Wed, 7 Mar 2012 19:09:48 +0000 Subject: dm raid: fix flush support Fix dm-raid flush support. Both md and dm have support for flush, but the dm-raid target forgot to set the flag to indicate that flushes should be passed on. (Important for data integrity e.g. with writeback cache enabled.) Signed-off-by: Jonathan Brassow Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f53bb389c18..787022c1818 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -975,6 +975,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; + ti->num_flush_requests = 1; mutex_lock(&rs->md.reconfig_mutex); ret = md_run(&rs->md); -- cgit v1.2.3-70-g09d2 From dafb20fa34320a472deb7442f25a0c086e0feb33 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Mar 2012 12:46:39 +1100 Subject: md: tidy up rdev_for_each usage. md.h has an 'rdev_for_each()' macro for iterating the rdevs in an mddev. However it uses the 'safe' version of list_for_each_entry, and so requires the extra variable, but doesn't include 'safe' in the name, which is useful documentation. Consequently some places use this safe version without needing it, and many use an explicity list_for_each entry. So: - rename rdev_for_each to rdev_for_each_safe - create a new rdev_for_each which uses the plain list_for_each_entry, - use the 'safe' version only where needed, and convert all other list_for_each_entry calls to use rdev_for_each. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 2 +- drivers/md/dm-raid.c | 16 +++++------ drivers/md/faulty.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/md.c | 74 +++++++++++++++++++++++++------------------------- drivers/md/md.h | 5 +++- drivers/md/multipath.c | 2 +- drivers/md/raid0.c | 10 +++---- drivers/md/raid1.c | 4 +-- drivers/md/raid10.c | 4 +-- drivers/md/raid5.c | 8 +++--- 11 files changed, 66 insertions(+), 63 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 239af9a9aad..2c5dbc6248d 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -171,7 +171,7 @@ static struct page *read_sb_page(struct mddev *mddev, loff_t offset, did_alloc = 1; } - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (! test_bit(In_sync, &rdev->flags) || test_bit(Faulty, &rdev->flags)) continue; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 787022c1818..c5a875d7b88 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -615,14 +615,14 @@ static int read_disk_sb(struct md_rdev *rdev, int size) static void super_sync(struct mddev *mddev, struct md_rdev *rdev) { - struct md_rdev *r, *t; + struct md_rdev *r; uint64_t failed_devices; struct dm_raid_superblock *sb; sb = page_address(rdev->sb_page); failed_devices = le64_to_cpu(sb->failed_devices); - rdev_for_each(r, t, mddev) + rdev_for_each(r, mddev) if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) failed_devices |= (1ULL << r->raid_disk); @@ -707,7 +707,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) struct dm_raid_superblock *sb; uint32_t new_devs = 0; uint32_t rebuilds = 0; - struct md_rdev *r, *t; + struct md_rdev *r; struct dm_raid_superblock *sb2; sb = page_address(rdev->sb_page); @@ -750,7 +750,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) * case the In_sync bit will /not/ be set and * recovery_cp must be MaxSector. */ - rdev_for_each(r, t, mddev) { + rdev_for_each(r, mddev) { if (!test_bit(In_sync, &r->flags)) { DMINFO("Device %d specified for rebuild: " "Clearing superblock", r->raid_disk); @@ -782,7 +782,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) * Now we set the Faulty bit for those devices that are * recorded in the superblock as failed. */ - rdev_for_each(r, t, mddev) { + rdev_for_each(r, mddev) { if (!r->sb_page) continue; sb2 = page_address(r->sb_page); @@ -855,11 +855,11 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev) static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) { int ret; - struct md_rdev *rdev, *freshest, *tmp; + struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; freshest = NULL; - rdev_for_each(rdev, tmp, mddev) { + rdev_for_each(rdev, mddev) { if (!rdev->meta_bdev) continue; @@ -888,7 +888,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) if (super_validate(mddev, freshest)) return -EINVAL; - rdev_for_each(rdev, tmp, mddev) + rdev_for_each(rdev, mddev) if ((rdev != freshest) && super_validate(mddev, rdev)) return -EINVAL; diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index feb2c3c7bb4..45135f69509 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -315,7 +315,7 @@ static int run(struct mddev *mddev) } conf->nfaults = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) conf->rdev = rdev; md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 627456542fb..67940741b19 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -138,7 +138,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) cnt = 0; conf->array_sectors = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { int j = rdev->raid_disk; struct dev_info *disk = conf->disks + j; sector_t sectors; diff --git a/drivers/md/md.c b/drivers/md/md.c index 115a6dd8583..119de175bf1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -439,7 +439,7 @@ static void submit_flushes(struct work_struct *ws) INIT_WORK(&mddev->flush_work, md_submit_flush_data); atomic_set(&mddev->flush_pending, 1); rcu_read_lock(); - list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) { /* Take two references, one is dropped @@ -749,7 +749,7 @@ static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr) { struct md_rdev *rdev; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->desc_nr == nr) return rdev; @@ -760,7 +760,7 @@ static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev) { struct md_rdev *rdev; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->bdev->bd_dev == dev) return rdev; @@ -1342,7 +1342,7 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) sb->state |= (1<disks[0].state = (1<disks, same_set) { + rdev_for_each(rdev2, mddev) { mdp_disk_t *d; int desc_nr; int is_active = test_bit(In_sync, &rdev2->flags); @@ -1816,7 +1816,7 @@ retry: } max_dev = 0; - list_for_each_entry(rdev2, &mddev->disks, same_set) + rdev_for_each(rdev2, mddev) if (rdev2->desc_nr+1 > max_dev) max_dev = rdev2->desc_nr+1; @@ -1833,7 +1833,7 @@ retry: for (i=0; idev_roles[i] = cpu_to_le16(0xfffe); - list_for_each_entry(rdev2, &mddev->disks, same_set) { + rdev_for_each(rdev2, mddev) { i = rdev2->desc_nr; if (test_bit(Faulty, &rdev2->flags)) sb->dev_roles[i] = cpu_to_le16(0xfffe); @@ -1948,7 +1948,7 @@ int md_integrity_register(struct mddev *mddev) return 0; /* nothing to do */ if (!mddev->gendisk || blk_get_integrity(mddev->gendisk)) return 0; /* shouldn't register, or already is */ - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { /* skip spares and non-functional disks */ if (test_bit(Faulty, &rdev->flags)) continue; @@ -2175,7 +2175,7 @@ static void export_array(struct mddev *mddev) { struct md_rdev *rdev, *tmp; - rdev_for_each(rdev, tmp, mddev) { + rdev_for_each_safe(rdev, tmp, mddev) { if (!rdev->mddev) { MD_BUG(); continue; @@ -2307,11 +2307,11 @@ static void md_print_devices(void) bitmap_print_sb(mddev->bitmap); else printk("%s: ", mdname(mddev)); - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) printk("<%s>", bdevname(rdev->bdev,b)); printk("\n"); - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) print_rdev(rdev, mddev->major_version); } printk("md: **********************************\n"); @@ -2328,7 +2328,7 @@ static void sync_sbs(struct mddev * mddev, int nospares) * with the rest of the array) */ struct md_rdev *rdev; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->sb_events == mddev->events || (nospares && rdev->raid_disk < 0 && @@ -2351,7 +2351,7 @@ static void md_update_sb(struct mddev * mddev, int force_change) repeat: /* First make sure individual recovery_offsets are correct */ - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && mddev->delta_disks >= 0 && !test_bit(In_sync, &rdev->flags) && @@ -2364,7 +2364,7 @@ repeat: clear_bit(MD_CHANGE_DEVS, &mddev->flags); if (!mddev->external) { clear_bit(MD_CHANGE_PENDING, &mddev->flags); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->badblocks.changed) { md_ack_all_badblocks(&rdev->badblocks); md_error(mddev, rdev); @@ -2430,7 +2430,7 @@ repeat: mddev->events --; } - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->badblocks.changed) any_badblocks_changed++; if (test_bit(Faulty, &rdev->flags)) @@ -2444,7 +2444,7 @@ repeat: mdname(mddev), mddev->in_sync); bitmap_update_sb(mddev->bitmap); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { char b[BDEVNAME_SIZE]; if (rdev->sb_loaded != 1) @@ -2493,7 +2493,7 @@ repeat: if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (test_and_clear_bit(FaultRecorded, &rdev->flags)) clear_bit(Blocked, &rdev->flags); @@ -2896,7 +2896,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) struct md_rdev *rdev2; mddev_lock(mddev); - list_for_each_entry(rdev2, &mddev->disks, same_set) + rdev_for_each(rdev2, mddev) if (rdev->bdev == rdev2->bdev && rdev != rdev2 && overlaps(rdev->data_offset, rdev->sectors, @@ -3193,7 +3193,7 @@ static void analyze_sbs(struct mddev * mddev) char b[BDEVNAME_SIZE]; freshest = NULL; - rdev_for_each(rdev, tmp, mddev) + rdev_for_each_safe(rdev, tmp, mddev) switch (super_types[mddev->major_version]. load_super(rdev, freshest, mddev->minor_version)) { case 1: @@ -3214,7 +3214,7 @@ static void analyze_sbs(struct mddev * mddev) validate_super(mddev, freshest); i = 0; - rdev_for_each(rdev, tmp, mddev) { + rdev_for_each_safe(rdev, tmp, mddev) { if (mddev->max_disks && (rdev->desc_nr >= mddev->max_disks || i > mddev->max_disks)) { @@ -3403,7 +3403,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) return -EINVAL; } - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) rdev->new_raid_disk = rdev->raid_disk; /* ->takeover must set new_* and/or delta_disks @@ -3456,7 +3456,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->safemode = 0; } - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->raid_disk < 0) continue; if (rdev->new_raid_disk >= mddev->raid_disks) @@ -3465,7 +3465,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) continue; sysfs_unlink_rdev(mddev, rdev); } - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->raid_disk < 0) continue; if (rdev->new_raid_disk == rdev->raid_disk) @@ -4796,7 +4796,7 @@ int md_run(struct mddev *mddev) * the only valid external interface is through the md * device. */ - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (test_bit(Faulty, &rdev->flags)) continue; sync_blockdev(rdev->bdev); @@ -4867,8 +4867,8 @@ int md_run(struct mddev *mddev) struct md_rdev *rdev2; int warned = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) - list_for_each_entry(rdev2, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) + rdev_for_each(rdev2, mddev) { if (rdev < rdev2 && rdev->bdev->bd_contains == rdev2->bdev->bd_contains) { @@ -4945,7 +4945,7 @@ int md_run(struct mddev *mddev) mddev->in_sync = 1; smp_wmb(); mddev->ready = 1; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->raid_disk >= 0) if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; @@ -5175,7 +5175,7 @@ static int do_md_stop(struct mddev * mddev, int mode, int is_open) /* tell userspace to handle 'inactive' */ sysfs_notify_dirent_safe(mddev->sysfs_state); - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->raid_disk >= 0) sysfs_unlink_rdev(mddev, rdev); @@ -5226,7 +5226,7 @@ static void autorun_array(struct mddev *mddev) printk(KERN_INFO "md: running: "); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { char b[BDEVNAME_SIZE]; printk("<%s>", bdevname(rdev->bdev,b)); } @@ -5356,7 +5356,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) struct md_rdev *rdev; nr=working=insync=failed=spare=0; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { nr++; if (test_bit(Faulty, &rdev->flags)) failed++; @@ -5923,7 +5923,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) * grow, and re-add. */ return -EBUSY; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { sector_t avail = rdev->sectors; if (fit && (num_sectors == 0 || num_sectors > avail)) @@ -6758,7 +6758,7 @@ static int md_seq_show(struct seq_file *seq, void *v) } sectors = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { char b[BDEVNAME_SIZE]; seq_printf(seq, " %s[%d]", bdevname(rdev->bdev,b), rdev->desc_nr); @@ -7170,7 +7170,7 @@ void md_do_sync(struct mddev *mddev) max_sectors = mddev->dev_sectors; j = MaxSector; rcu_read_lock(); - list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && @@ -7342,7 +7342,7 @@ void md_do_sync(struct mddev *mddev) if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) mddev->curr_resync = MaxSector; rcu_read_lock(); - list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && mddev->delta_disks >= 0 && !test_bit(Faulty, &rdev->flags) && @@ -7388,7 +7388,7 @@ static int remove_and_add_spares(struct mddev *mddev) mddev->curr_resync_completed = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || @@ -7406,7 +7406,7 @@ static int remove_and_add_spares(struct mddev *mddev) "degraded"); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) @@ -7451,7 +7451,7 @@ static void reap_sync_thread(struct mddev *mddev) * do the superblock for an incrementally recovered device * written out. */ - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (!mddev->degraded || test_bit(In_sync, &rdev->flags)) rdev->saved_raid_disk = -1; @@ -7529,7 +7529,7 @@ void md_check_recovery(struct mddev *mddev) * failed devices. */ struct md_rdev *rdev; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && test_bit(Faulty, &rdev->flags) && diff --git a/drivers/md/md.h b/drivers/md/md.h index 44c63dfeeb2..39acfe90cc2 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -519,7 +519,10 @@ static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) /* * iterates through the 'same array disks' ringlist */ -#define rdev_for_each(rdev, tmp, mddev) \ +#define rdev_for_each(rdev, mddev) \ + list_for_each_entry(rdev, &((mddev)->disks), same_set) + +#define rdev_for_each_safe(rdev, tmp, mddev) \ list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set) #define rdev_for_each_rcu(rdev, mddev) \ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index a222f516660..9339e67fcc7 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -428,7 +428,7 @@ static int multipath_run (struct mddev *mddev) } working_disks = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { disk_idx = rdev->raid_disk; if (disk_idx < 0 || disk_idx >= mddev->raid_disks) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7294bd115e3..7ef5cbf31bb 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -91,7 +91,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) if (!conf) return -ENOMEM; - list_for_each_entry(rdev1, &mddev->disks, same_set) { + rdev_for_each(rdev1, mddev) { pr_debug("md/raid0:%s: looking at %s\n", mdname(mddev), bdevname(rdev1->bdev, b)); @@ -102,7 +102,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) sector_div(sectors, mddev->chunk_sectors); rdev1->sectors = sectors * mddev->chunk_sectors; - list_for_each_entry(rdev2, &mddev->disks, same_set) { + rdev_for_each(rdev2, mddev) { pr_debug("md/raid0:%s: comparing %s(%llu)" " with %s(%llu)\n", mdname(mddev), @@ -157,7 +157,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) smallest = NULL; dev = conf->devlist; err = -EINVAL; - list_for_each_entry(rdev1, &mddev->disks, same_set) { + rdev_for_each(rdev1, mddev) { int j = rdev1->raid_disk; if (mddev->level == 10) { @@ -329,7 +329,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks WARN_ONCE(sectors || raid_disks, "%s does not support generic reshape\n", __func__); - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) array_sectors += rdev->sectors; return array_sectors; @@ -543,7 +543,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) return ERR_PTR(-EINVAL); } - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { /* check slot number for a disk */ if (rdev->raid_disk == mddev->raid_disks-1) { printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n", diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 118e0f69f22..a933bd4065a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2504,7 +2504,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) err = -EINVAL; spin_lock_init(&conf->device_lock); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { int disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) @@ -2622,7 +2622,7 @@ static int run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (!mddev->gendisk) continue; disk_stack_limits(mddev->gendisk, rdev->bdev, diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2ae7021320e..52bb37d4026 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3253,7 +3253,7 @@ static int run(struct mddev *mddev) blk_queue_io_opt(mddev->queue, chunk_size * (conf->raid_disks / conf->near_copies)); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { disk_idx = rdev->raid_disk; if (disk_idx >= conf->raid_disks @@ -3419,7 +3419,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev) conf = setup_conf(mddev); if (!IS_ERR(conf)) { - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->raid_disk >= 0) rdev->new_raid_disk = rdev->raid_disk * 2; conf->barrier = 1; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d38d235cc39..23ac880bba9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4842,7 +4842,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) pr_debug("raid456: run(%s) called.\n", mdname(mddev)); - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { raid_disk = rdev->raid_disk; if (raid_disk >= max_disks || raid_disk < 0) @@ -5177,7 +5177,7 @@ static int run(struct mddev *mddev) blk_queue_io_opt(mddev->queue, chunk_size * (conf->raid_disks - conf->max_degraded)); - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); } @@ -5500,7 +5500,7 @@ static int raid5_start_reshape(struct mddev *mddev) if (!check_stripe_cache(mddev)) return -ENOSPC; - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (!test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) spares++; @@ -5546,7 +5546,7 @@ static int raid5_start_reshape(struct mddev *mddev) * such devices during the reshape and confusion could result. */ if (mddev->delta_disks >= 0) { - list_for_each_entry(rdev, &mddev->disks, same_set) + rdev_for_each(rdev, mddev) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { -- cgit v1.2.3-70-g09d2 From 0447568fc51e0268e201f7086d2450cf986e0411 Mon Sep 17 00:00:00 2001 From: Jonathan E Brassow Date: Wed, 28 Mar 2012 18:41:26 +0100 Subject: dm raid: handle failed devices during start up The dm-raid code currently fails to create a RAID array if any of the superblocks cannot be read. This was an oversight as there is already code to handle this case if the values ('- -') were provided for the failed array position. With this patch, if a superblock cannot be read, the array position's fields are initialized as though '- -' was set in the table. That is, the device is failed and the position should not be used, but if there is sufficient redundancy, the array should still be activated. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c5a875d7b88..b0ba52459ed 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -604,7 +604,9 @@ static int read_disk_sb(struct md_rdev *rdev, int size) return 0; if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { - DMERR("Failed to read device superblock"); + DMERR("Failed to read superblock of device at position %d", + rdev->raid_disk); + set_bit(Faulty, &rdev->flags); return -EINVAL; } @@ -855,9 +857,25 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev) static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) { int ret; + unsigned redundancy = 0; + struct raid_dev *dev; struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; + switch (rs->raid_type->level) { + case 1: + redundancy = rs->md.raid_disks - 1; + break; + case 4: + case 5: + case 6: + redundancy = rs->raid_type->parity_devs; + break; + default: + ti->error = "Unknown RAID type"; + return -EINVAL; + } + freshest = NULL; rdev_for_each(rdev, mddev) { if (!rdev->meta_bdev) @@ -872,6 +890,37 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) case 0: break; default: + dev = container_of(rdev, struct raid_dev, rdev); + if (redundancy--) { + if (dev->meta_dev) + dm_put_device(ti, dev->meta_dev); + + dev->meta_dev = NULL; + rdev->meta_bdev = NULL; + + if (rdev->sb_page) + put_page(rdev->sb_page); + + rdev->sb_page = NULL; + + rdev->sb_loaded = 0; + + /* + * We might be able to salvage the data device + * even though the meta device has failed. For + * now, we behave as though '- -' had been + * set for this device in the table. + */ + if (dev->data_dev) + dm_put_device(ti, dev->data_dev); + + dev->data_dev = NULL; + rdev->bdev = NULL; + + list_del(&rdev->same_set); + + continue; + } ti->error = "Failed to load superblock"; return ret; } @@ -1214,7 +1263,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3-70-g09d2