summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c194
1 files changed, 130 insertions, 64 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 05febfd9f07..509171ca7fa 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1296,27 +1296,17 @@ static struct super_type super_types[] = {
.sync_super = super_1_sync,
},
};
-
-static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev)
-{
- struct list_head *tmp;
- mdk_rdev_t *rdev;
-
- ITERATE_RDEV(mddev,rdev,tmp)
- if (rdev->bdev->bd_contains == dev->bdev->bd_contains)
- return rdev;
-
- return NULL;
-}
static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
{
- struct list_head *tmp;
- mdk_rdev_t *rdev;
+ struct list_head *tmp, *tmp2;
+ mdk_rdev_t *rdev, *rdev2;
ITERATE_RDEV(mddev1,rdev,tmp)
- if (match_dev_unit(mddev2, rdev))
- return 1;
+ ITERATE_RDEV(mddev2, rdev2, tmp2)
+ if (rdev->bdev->bd_contains ==
+ rdev2->bdev->bd_contains)
+ return 1;
return 0;
}
@@ -1325,10 +1315,10 @@ static LIST_HEAD(pending_raid_disks);
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
{
- mdk_rdev_t *same_pdev;
- char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
+ char b[BDEVNAME_SIZE];
struct kobject *ko;
char *s;
+ int err;
if (rdev->mddev) {
MD_BUG();
@@ -1342,14 +1332,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
else
mddev->size = rdev->size;
}
- same_pdev = match_dev_unit(mddev, rdev);
- if (same_pdev)
- printk(KERN_WARNING
- "%s: WARNING: %s appears to be on the same physical"
- " disk as %s. True\n protection against single-disk"
- " failure might be compromised.\n",
- mdname(mddev), bdevname(rdev->bdev,b),
- bdevname(same_pdev->bdev,b2));
/* Verify rdev->desc_nr is unique.
* If it is -1, assign a free number, else
@@ -1371,20 +1353,35 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL)
*s = '!';
- list_add(&rdev->same_set, &mddev->disks);
rdev->mddev = mddev;
printk(KERN_INFO "md: bind<%s>\n", b);
rdev->kobj.parent = &mddev->kobj;
- kobject_add(&rdev->kobj);
+ if ((err = kobject_add(&rdev->kobj)))
+ goto fail;
if (rdev->bdev->bd_part)
ko = &rdev->bdev->bd_part->kobj;
else
ko = &rdev->bdev->bd_disk->kobj;
- sysfs_create_link(&rdev->kobj, ko, "block");
+ if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
+ kobject_del(&rdev->kobj);
+ goto fail;
+ }
+ list_add(&rdev->same_set, &mddev->disks);
bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
return 0;
+
+ fail:
+ printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
+ b, mdname(mddev));
+ return err;
+}
+
+static void delayed_delete(struct work_struct *ws)
+{
+ mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
+ kobject_del(&rdev->kobj);
}
static void unbind_rdev_from_array(mdk_rdev_t * rdev)
@@ -1399,7 +1396,12 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
- kobject_del(&rdev->kobj);
+
+ /* We need to delay this, otherwise we can deadlock when
+ * writing to 'remove' to "dev/state"
+ */
+ INIT_WORK(&rdev->del_work, delayed_delete);
+ schedule_work(&rdev->del_work);
}
/*
@@ -2985,7 +2987,9 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
mddev->kobj.k_name = NULL;
snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md");
mddev->kobj.ktype = &md_ktype;
- kobject_register(&mddev->kobj);
+ if (kobject_register(&mddev->kobj))
+ printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
+ disk->disk_name);
return NULL;
}
@@ -3109,6 +3113,36 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL;
}
+ if (pers->sync_request) {
+ /* Warn if this is a potentially silly
+ * configuration.
+ */
+ char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
+ mdk_rdev_t *rdev2;
+ struct list_head *tmp2;
+ int warned = 0;
+ ITERATE_RDEV(mddev, rdev, tmp) {
+ ITERATE_RDEV(mddev, rdev2, tmp2) {
+ if (rdev < rdev2 &&
+ rdev->bdev->bd_contains ==
+ rdev2->bdev->bd_contains) {
+ printk(KERN_WARNING
+ "%s: WARNING: %s appears to be"
+ " on the same physical disk as"
+ " %s.\n",
+ mdname(mddev),
+ bdevname(rdev->bdev,b),
+ bdevname(rdev2->bdev,b2));
+ warned = 1;
+ }
+ }
+ }
+ if (warned)
+ printk(KERN_WARNING
+ "True protection against single-disk"
+ " failure might be compromised.\n");
+ }
+
mddev->recovery = 0;
mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
mddev->barriers_work = 1;
@@ -3133,9 +3167,12 @@ static int do_md_run(mddev_t * mddev)
bitmap_destroy(mddev);
return err;
}
- if (mddev->pers->sync_request)
- sysfs_create_group(&mddev->kobj, &md_redundancy_group);
- else if (mddev->ro == 2) /* auto-readonly not meaningful */
+ if (mddev->pers->sync_request) {
+ if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
+ printk(KERN_WARNING
+ "md: cannot register extra attributes for %s\n",
+ mdname(mddev));
+ } else if (mddev->ro == 2) /* auto-readonly not meaningful */
mddev->ro = 0;
atomic_set(&mddev->writes_pending,0);
@@ -3149,7 +3186,9 @@ static int do_md_run(mddev_t * mddev)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+ if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
+ printk("md: cannot register %s for %s\n",
+ nm, mdname(mddev));
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -3311,6 +3350,10 @@ static int do_md_stop(mddev_t * mddev, int mode)
set_disk_ro(disk, 0);
blk_queue_make_request(mddev->queue, md_fail_request);
mddev->pers->stop(mddev);
+ mddev->queue->merge_bvec_fn = NULL;
+ mddev->queue->unplug_fn = NULL;
+ mddev->queue->issue_flush_fn = NULL;
+ mddev->queue->backing_dev_info.congested_fn = NULL;
if (mddev->pers->sync_request)
sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
@@ -3357,6 +3400,9 @@ static int do_md_stop(mddev_t * mddev, int mode)
sysfs_remove_link(&mddev->kobj, nm);
}
+ /* make sure all delayed_delete calls have finished */
+ flush_scheduled_work();
+
export_array(mddev);
mddev->array_size = 0;
@@ -5343,6 +5389,48 @@ void md_do_sync(mddev_t *mddev)
EXPORT_SYMBOL_GPL(md_do_sync);
+static int remove_and_add_spares(mddev_t *mddev)
+{
+ mdk_rdev_t *rdev;
+ struct list_head *rtmp;
+ int spares = 0;
+
+ ITERATE_RDEV(mddev,rdev,rtmp)
+ if (rdev->raid_disk >= 0 &&
+ (test_bit(Faulty, &rdev->flags) ||
+ ! test_bit(In_sync, &rdev->flags)) &&
+ atomic_read(&rdev->nr_pending)==0) {
+ if (mddev->pers->hot_remove_disk(
+ mddev, rdev->raid_disk)==0) {
+ char nm[20];
+ sprintf(nm,"rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ rdev->raid_disk = -1;
+ }
+ }
+
+ if (mddev->degraded) {
+ ITERATE_RDEV(mddev,rdev,rtmp)
+ if (rdev->raid_disk < 0
+ && !test_bit(Faulty, &rdev->flags)) {
+ rdev->recovery_offset = 0;
+ if (mddev->pers->hot_add_disk(mddev,rdev)) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ if (sysfs_create_link(&mddev->kobj,
+ &rdev->kobj, nm))
+ printk(KERN_WARNING
+ "md: cannot register "
+ "%s for %s\n",
+ nm, mdname(mddev));
+ spares++;
+ md_new_event(mddev);
+ } else
+ break;
+ }
+ }
+ return spares;
+}
/*
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
@@ -5397,7 +5485,7 @@ void md_check_recovery(mddev_t *mddev)
return;
if (mddev_trylock(mddev)) {
- int spares =0;
+ int spares = 0;
spin_lock_irq(&mddev->write_lock);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
@@ -5460,35 +5548,13 @@ void md_check_recovery(mddev_t *mddev)
* Spare are also removed and re-added, to allow
* the personality to fail the re-add.
*/
- ITERATE_RDEV(mddev,rdev,rtmp)
- if (rdev->raid_disk >= 0 &&
- (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) &&
- atomic_read(&rdev->nr_pending)==0) {
- if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
- char nm[20];
- sprintf(nm,"rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
- rdev->raid_disk = -1;
- }
- }
-
- if (mddev->degraded) {
- ITERATE_RDEV(mddev,rdev,rtmp)
- if (rdev->raid_disk < 0
- && !test_bit(Faulty, &rdev->flags)) {
- rdev->recovery_offset = 0;
- if (mddev->pers->hot_add_disk(mddev,rdev)) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
- spares++;
- md_new_event(mddev);
- } else
- break;
- }
- }
- if (spares) {
+ if (mddev->reshape_position != MaxSector) {
+ if (mddev->pers->check_reshape(mddev) != 0)
+ /* Cannot proceed */
+ goto unlock;
+ set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+ } else if ((spares = remove_and_add_spares(mddev))) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
} else if (mddev->recovery_cp < MaxSector) {