From 7c44ece988a3419e51c833cf14be186a22224dd1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Aug 2010 17:35:52 +1000 Subject: Move .gitignore from drivers/md to lib/raid6 Another missing bit of the raid6 -> /lib move. Reported-by: Andreas Schwab Signed-off-by: NeilBrown --- drivers/md/.gitignore | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 drivers/md/.gitignore (limited to 'drivers/md') diff --git a/drivers/md/.gitignore b/drivers/md/.gitignore deleted file mode 100644 index a7afec6b19c..00000000000 --- a/drivers/md/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -mktables -raid6altivec*.c -raid6int*.c -raid6tables.c -- cgit v1.2.3-70-g09d2 From bd52b746262c8d77e73903d6608014fb2fcdcd9d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Aug 2010 17:33:33 +1000 Subject: md: don't clear MD_CHANGE_CLEAN in md_update_sb() for external arrays If this bit is cleared in md_update_sb() the kernel will allow writes to the array if userspace triggers md_allow_write(), e.g. through stripe_cache_size, when mdmon is not active. When mdmon is active the array transitions to active-idle bypassing write-pending, setting up a race for mdmon to set the array clean before a write arrives. Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index c148b630215..a1f6b59b8b3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2167,9 +2167,10 @@ repeat: rdev->recovery_offset = mddev->curr_resync_completed; } - if (mddev->external || !mddev->persistent) { + if (!mddev->persistent) { + if (!mddev->external) + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); wake_up(&mddev->sb_wait); return; } -- cgit v1.2.3-70-g09d2 From 070dc6dd7103b6b3f7e4d46e754354a5c15f366e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Aug 2010 17:33:34 +1000 Subject: md: resolve confusion of MD_CHANGE_CLEAN MD_CHANGE_CLEAN is used for two different purposes and this leads to confusion. One of the purposes is largely mirrored by MD_CHANGE_PENDING which is not used for anything else, so have MD_CHANGE_PENDING take over that purpose fully. The two purposes are: 1/ tell md_update_sb that an update is needed and that it is just a clean/dirty transition. 2/ tell user-space that an transition from clean to dirty is pending (something wants to write), and tell te kernel (by clearin the flag) that the transition is OK. The first purpose remains wit MD_CHANGE_CLEAN, the second is moved fully to MD_CHANGE_PENDING. This means that various places which conditionally set or cleared MD_CHANGE_CLEAN no longer need to be conditional. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 3 +-- drivers/md/md.c | 24 +++++++++--------------- drivers/md/md.h | 2 +- 3 files changed, 11 insertions(+), 18 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1ba1e122e94..ed4900ade93 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1542,8 +1542,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) atomic_read(&bitmap->mddev->recovery_active) == 0); bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; - if (bitmap->mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); + set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { diff --git a/drivers/md/md.c b/drivers/md/md.c index a1f6b59b8b3..43cf9cc9c1d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2168,8 +2168,7 @@ repeat: } if (!mddev->persistent) { - if (!mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); wake_up(&mddev->sb_wait); return; @@ -2179,7 +2178,6 @@ repeat: mddev->utime = get_seconds(); - set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) @@ -3372,7 +3370,7 @@ array_state_show(mddev_t *mddev, char *page) case 0: if (mddev->in_sync) st = clean; - else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + else if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) st = write_pending; else if (mddev->safemode) st = active_idle; @@ -3453,9 +3451,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) mddev->in_sync = 1; if (mddev->safemode == 1) mddev->safemode = 0; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, - &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } err = 0; } else @@ -3467,8 +3463,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) case active: if (mddev->pers) { restart_array(mddev); - if (mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_PENDING, &mddev->flags); wake_up(&mddev->sb_wait); err = 0; } else { @@ -6573,6 +6568,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); md_wakeup_thread(mddev->thread); did_change = 1; } @@ -6581,7 +6577,6 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } @@ -6617,6 +6612,7 @@ int md_allow_write(mddev_t *mddev) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (mddev->safemode_delay && mddev->safemode == 0) mddev->safemode = 1; @@ -6626,7 +6622,7 @@ int md_allow_write(mddev_t *mddev) } else spin_unlock_irq(&mddev->write_lock); - if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) return -EAGAIN; else return 0; @@ -6824,8 +6820,7 @@ void md_do_sync(mddev_t *mddev) atomic_read(&mddev->recovery_active) == 0); mddev->curr_resync_completed = mddev->curr_resync; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -7104,8 +7099,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery_cp == MaxSector) { mddev->in_sync = 1; did_change = 1; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } if (mddev->safemode == 1) mddev->safemode = 0; diff --git a/drivers/md/md.h b/drivers/md/md.h index a953fe2808a..3931299788d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -140,7 +140,7 @@ struct mddev_s unsigned long flags; #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ -#define MD_CHANGE_PENDING 2 /* superblock update in progress */ +#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ int suspended; atomic_t active_io; -- cgit v1.2.3-70-g09d2 From 126925c090155f13e90b9e7e8c4010e96027c00a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 7 Sep 2010 17:02:47 +1000 Subject: md: call md_update_sb even for 'external' metadata arrays. Now that we depend on md_update_sb to clear variable bits in mddev->flags (rather than trying not to set them) it is important to always call md_update_sb when appropriate. md_check_recovery has this job but explicitly avoids it for ->external metadata arrays. This is not longer appropraite, or needed. However we do want to avoid taking the mddev lock if only MD_CHANGE_PENDING is set as that is not cleared by md_update_sb for external-metadata arrays. Reported-by: "Kwolek, Adam" Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 43cf9cc9c1d..bdd9bba577b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7069,7 +7069,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( - (mddev->flags && !mddev->external) || + (mddev->flags & ~ (1<recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || -- cgit v1.2.3-70-g09d2 From ddcf3522cf03a147c867a2e0155761652dbd156a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 8 Sep 2010 16:48:17 +1000 Subject: md: fix v1.x metadata update when a disk is missing. If an array with 1.x metadata is assembled with the last disk missing, md doesn't properly record the fact that the disk was missing. This is unlikely to cause a real problem as the event count will be different to the count on the missing disk so it won't be included in the array. However it could still cause confusion. So make sure we clear all the relevant slots, not just the early ones. Signed-off-by: NeilBrown --- drivers/md/md.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index bdd9bba577b..f20d13e717d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1643,7 +1643,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev->sb_size = (rdev->sb_size | bmask) + 1; - } + } else + max_dev = le32_to_cpu(sb->max_dev); + for (i=0; idev_roles[i] = cpu_to_le16(0xfffe); -- cgit v1.2.3-70-g09d2 From 7571ae887d3b96d8e7ce63d43828866c6a91cdc6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Oct 2010 11:54:46 +1100 Subject: md/raid1: avoid overflow in raid1 resync when bitmap is in use. bitmap_start_sync returns - via a pass-by-reference variable - the number of sectors before we need to check with the bitmap again. Since commit ef4256733506f245 this number can be substantially larger, 2^27 is a common value. Unfortunately it is an 'int' and so when raid1.c:sync_request shifts it 9 places to the left it becomes 0. This results in a zero-length read which the scsi layer justifiably complains about. This patch just removes the shift so the common case becomes safe with a trivially-correct patch. In the next merge window we will convert this 'int' to a 'sector_t' Reported-by: "George Spelvin" Signed-off-by: NeilBrown --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ad83a4dcadc..fba4d2feaeb 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1912,7 +1912,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; BUG_ON(sync_blocks < (PAGE_SIZE>>9)); - if (len > (sync_blocks<<9)) + if ((len >> 9) > sync_blocks) len = sync_blocks<<9; } -- cgit v1.2.3-70-g09d2 From db8d9d3591f77cc6b66248b2cdfa1c43deee5ec9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Oct 2010 12:00:50 +1100 Subject: md/raid1: minor bio initialisation improvements. When performing a resync we pre-allocate some bios and repeatedly use them. This requires us to re-initialise them each time. One field (bi_comp_cpu) and some flags weren't being initiaised reliably. Signed-off-by: NeilBrown --- drivers/md/raid1.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fba4d2feaeb..0b830bbe1d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1839,7 +1839,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i /* take from bio_init */ bio->bi_next = NULL; + bio->bi_flags &= ~(BIO_POOL_MASK-1); bio->bi_flags |= 1 << BIO_UPTODATE; + bio->bi_comp_cpu = -1; bio->bi_rw = READ; bio->bi_vcnt = 0; bio->bi_idx = 0; -- cgit v1.2.3-70-g09d2 From 5c04f5512f8134c75cd36e5b0354e10e330f4e6e Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Fri, 1 Oct 2010 14:18:12 -0700 Subject: md: check return code of read_sb_page Function read_sb_page may return ERR_PTR(...). Check for it. Signed-off-by: Vasiliy Kulikov Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index ed4900ade93..e4fb58db545 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1000,10 +1000,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) page = bitmap->sb_page; offset = sizeof(bitmap_super_t); if (!file) - read_sb_page(bitmap->mddev, - bitmap->mddev->bitmap_info.offset, - page, - index, count); + page = read_sb_page( + bitmap->mddev, + bitmap->mddev->bitmap_info.offset, + page, + index, count); } else if (file) { page = read_page(file, index, bitmap, count); offset = 0; -- cgit v1.2.3-70-g09d2