summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-28 11:39:22 +1000
committerNeilBrown <neilb@suse.de>2011-07-28 11:39:22 +1000
commit7f0da59bdc2f65795a57009d78f7753d3aea1de3 (patch)
tree4ab3ae7188122657c12dfab61626d96360fb6240
parent31c176ecdf3563140e6395249eda51a18130d9f6 (diff)
md/raid5: use bad-block log to improve handling of uncorrectable read errors.
If we get an uncorrectable read error - record a bad block rather than failing the device. And if these errors (which may be due to known bad blocks) cause recovery to be impossible, record a bad block on the recovering devices, or abort the recovery. As we might abort a recovery without failing a device we need to teach RAID5 about recovery_disabled handling. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c59
-rw-r--r--drivers/md/raid5.h2
2 files changed, 53 insertions, 8 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a2d68389ee7..5fc621673e6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2232,9 +2232,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags))
- /* multiple read failures in one stripe */
- md_error(conf->mddev, rdev);
+ atomic_inc(&rdev->nr_pending);
+ else
+ rdev = NULL;
rcu_read_unlock();
+ if (rdev) {
+ if (!rdev_set_badblocks(
+ rdev,
+ sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
}
spin_lock_irq(&conf->device_lock);
/* fail all writes first */
@@ -2313,6 +2322,41 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
md_wakeup_thread(conf->mddev->thread);
}
+static void
+handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
+ struct stripe_head_state *s)
+{
+ int abort = 0;
+ int i;
+
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ s->syncing = 0;
+ /* There is nothing more to do for sync/check/repair.
+ * For recover we need to record a bad block on all
+ * non-sync devices, or abort the recovery
+ */
+ if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
+ return;
+ /* During recovery devices cannot be removed, so locking and
+ * refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ mdk_rdev_t *rdev = conf->disks[i].rdev;
+ if (!rdev
+ || test_bit(Faulty, &rdev->flags)
+ || test_bit(In_sync, &rdev->flags))
+ continue;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort) {
+ conf->recovery_disabled = conf->mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ }
+}
+
/* fetch_block - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
@@ -3067,11 +3111,8 @@ static void handle_stripe(struct stripe_head *sh)
*/
if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
- if (s.failed > conf->max_degraded && s.syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s.syncing = 0;
- }
+ if (s.failed > conf->max_degraded && s.syncing)
+ handle_failed_sync(conf, sh, &s);
/*
* might be able to return some write requests if the parity blocks
@@ -4976,6 +5017,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
* isn't possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
number < conf->raid_disks) {
err = -EBUSY;
@@ -5004,6 +5046,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
int first = 0;
int last = conf->raid_disks - 1;
+ if (mddev->recovery_disabled == conf->recovery_disabled)
+ return -EBUSY;
+
if (has_failed(conf))
/* no point adding a device */
return -EINVAL;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 68c500af110..c5429d12363 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -399,7 +399,7 @@ struct raid5_private_data {
* (fresh device added).
* Cleared when a sync completes.
*/
-
+ int recovery_disabled;
/* per cpu variables */
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */