diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 45 | ||||
-rw-r--r-- | drivers/md/md.c | 35 | ||||
-rw-r--r-- | drivers/md/raid10.c | 9 | ||||
-rw-r--r-- | drivers/md/raid5.c | 32 |
4 files changed, 97 insertions, 24 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7e65bad522c..ac89a5deaca 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -238,15 +238,47 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde } +static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) +{ + /* Iterate the disks of an mddev, using rcu to protect access to the + * linked list, and raising the refcount of devices we return to ensure + * they don't disappear while in use. + * As devices are only added or removed when raid_disk is < 0 and + * nr_pending is 0 and In_sync is clear, the entries we return will + * still be in the same position on the list when we re-enter + * list_for_each_continue_rcu. + */ + struct list_head *pos; + rcu_read_lock(); + if (rdev == NULL) + /* start at the beginning */ + pos = &mddev->disks; + else { + /* release the previous rdev and start from there. */ + rdev_dec_pending(rdev, mddev); + pos = &rdev->same_set; + } + list_for_each_continue_rcu(pos, &mddev->disks) { + rdev = list_entry(pos, mdk_rdev_t, same_set); + if (rdev->raid_disk >= 0 && + test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags)) { + /* this is a usable devices */ + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + return rdev; + } + } + rcu_read_unlock(); + return NULL; +} + static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { - mdk_rdev_t *rdev; + mdk_rdev_t *rdev = NULL; mddev_t *mddev = bitmap->mddev; - rcu_read_lock(); - rdev_for_each_rcu(rdev, mddev) - if (test_bit(In_sync, &rdev->flags) - && !test_bit(Faulty, &rdev->flags)) { + while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, @@ -281,8 +313,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) + page->index * (PAGE_SIZE/512), size, page); - } - rcu_read_unlock(); + } if (wait) md_super_wait(mddev); diff --git a/drivers/md/md.c b/drivers/md/md.c index c7aae66c6f9..4790c83d78d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2393,6 +2393,8 @@ static void analyze_sbs(mddev_t * mddev) } +static void md_safemode_timeout(unsigned long data); + static ssize_t safe_delay_show(mddev_t *mddev, char *page) { @@ -2432,9 +2434,12 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) if (msec == 0) mddev->safemode_delay = 0; else { + unsigned long old_delay = mddev->safemode_delay; mddev->safemode_delay = (msec*HZ)/1000; if (mddev->safemode_delay == 0) mddev->safemode_delay = 1; + if (mddev->safemode_delay < old_delay) + md_safemode_timeout((unsigned long)mddev); } return len; } @@ -3836,8 +3841,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) del_timer_sync(&mddev->safemode_timer); - invalidate_partition(disk, 0); - switch(mode) { case 1: /* readonly */ err = -ENXIO; @@ -4634,6 +4637,11 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) */ if (mddev->sync_thread) return -EBUSY; + if (mddev->bitmap) + /* Sorry, cannot grow a bitmap yet, just remove it, + * grow, and re-add. + */ + return -EBUSY; rdev_for_each(rdev, tmp, mddev) { sector_t avail; avail = rdev->size * 2; @@ -5993,7 +6001,7 @@ static int remove_and_add_spares(mddev_t *mddev) } } - if (mddev->degraded) { + if (mddev->degraded && ! mddev->ro) { rdev_for_each(rdev, rtmp, mddev) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && @@ -6067,6 +6075,8 @@ void md_check_recovery(mddev_t *mddev) flush_signals(current); } + if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + return; if ( ! ( (mddev->flags && !mddev->external) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || @@ -6080,6 +6090,15 @@ void md_check_recovery(mddev_t *mddev) if (mddev_trylock(mddev)) { int spares = 0; + if (mddev->ro) { + /* Only thing we do on a ro array is remove + * failed devices. + */ + remove_and_add_spares(mddev); + clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + goto unlock; + } + if (!mddev->external) { int did_change = 0; spin_lock_irq(&mddev->write_lock); @@ -6117,7 +6136,8 @@ void md_check_recovery(mddev_t *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ /* activate any spares */ if (mddev->pers->spare_active(mddev)) @@ -6169,6 +6189,7 @@ void md_check_recovery(mddev_t *mddev) } else if ((spares = remove_and_add_spares(mddev))) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); } else if (mddev->recovery_cp < MaxSector) { set_bit(MD_RECOVERY_SYNC, &mddev->recovery); @@ -6232,7 +6253,11 @@ static int md_notify_reboot(struct notifier_block *this, for_each_mddev(mddev, tmp) if (mddev_trylock(mddev)) { - do_md_stop (mddev, 1, 0); + /* Force a switch to readonly even array + * appears to still be in use. Hence + * the '100'. + */ + do_md_stop (mddev, 1, 100); mddev_unlock(mddev); } /* diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d41bebb6da0..e34cd0e6247 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -76,11 +76,13 @@ static void r10bio_pool_free(void *r10_bio, void *data) kfree(r10_bio); } +/* Maximum size of each resync request */ #define RESYNC_BLOCK_SIZE (64*1024) -//#define RESYNC_BLOCK_SIZE PAGE_SIZE -#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) -#define RESYNC_WINDOW (2048*1024) +/* amount of memory to reserve for resync requests */ +#define RESYNC_WINDOW (1024*1024) +/* maximum number of concurrent requests, memory permitting */ +#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) /* * When performing a resync, we need to read and compare, so @@ -690,7 +692,6 @@ static int flush_pending_writes(conf_t *conf) * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. */ -#define RESYNC_DEPTH 32 static void raise_barrier(conf_t *conf, int force) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 40e93967565..224de022e7c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2568,10 +2568,10 @@ static bool handle_stripe5(struct stripe_head *sh) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + if (blocked_rdev == NULL && + rdev && unlikely(test_bit(Blocked, &rdev->flags))) { blocked_rdev = rdev; atomic_inc(&rdev->nr_pending); - break; } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -2588,8 +2588,14 @@ static bool handle_stripe5(struct stripe_head *sh) rcu_read_unlock(); if (unlikely(blocked_rdev)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + if (s.syncing || s.expanding || s.expanded || + s.to_write || s.written) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + /* There is nothing for the blocked_rdev to block */ + rdev_dec_pending(blocked_rdev, conf->mddev); + blocked_rdev = NULL; } if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { @@ -2832,10 +2838,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + if (blocked_rdev == NULL && + rdev && unlikely(test_bit(Blocked, &rdev->flags))) { blocked_rdev = rdev; atomic_inc(&rdev->nr_pending); - break; } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -2853,9 +2859,16 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) rcu_read_unlock(); if (unlikely(blocked_rdev)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + if (s.syncing || s.expanding || s.expanded || + s.to_write || s.written) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + /* There is nothing for the blocked_rdev to block */ + rdev_dec_pending(blocked_rdev, conf->mddev); + blocked_rdev = NULL; } + pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -4446,6 +4459,9 @@ static int raid5_check_reshape(mddev_t *mddev) return -EINVAL; /* Cannot shrink array or change level yet */ if (mddev->delta_disks == 0) return 0; /* nothing to do */ + if (mddev->bitmap) + /* Cannot grow a bitmap yet */ + return -EBUSY; /* Can only proceed if there are plenty of stripe_heads. * We need a minimum of one full stripe,, and for sensible progress |