diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 447 |
1 files changed, 224 insertions, 223 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 20f6ac33834..9dd872000ce 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) else new->md_minor = MINOR(unit) >> MdpMinorShift; + mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); @@ -440,15 +441,6 @@ static inline sector_t calc_dev_sboffset(struct block_device *bdev) return MD_NEW_SIZE_SECTORS(num_sectors); } -static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size) -{ - sector_t num_sectors = rdev->sb_start; - - if (chunk_size) - num_sectors &= ~((sector_t)chunk_size/512 - 1); - return num_sectors; -} - static int alloc_disk_sb(mdk_rdev_t * rdev) { if (rdev->sb_page) @@ -745,6 +737,24 @@ struct super_type { }; /* + * Check that the given mddev has no bitmap. + * + * This function is called from the run method of all personalities that do not + * support bitmaps. It prints an error message and returns non-zero if mddev + * has a bitmap. Otherwise, it returns 0. + * + */ +int md_check_no_bitmap(mddev_t *mddev) +{ + if (!mddev->bitmap_file && !mddev->bitmap_offset) + return 0; + printk(KERN_ERR "%s: bitmaps are not supported for %s\n", + mdname(mddev), mddev->pers->name); + return 1; +} +EXPORT_SYMBOL(md_check_no_bitmap); + +/* * load_super for 0.90.0 */ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) @@ -797,17 +807,6 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version rdev->data_offset = 0; rdev->sb_size = MD_SB_BYTES; - if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) { - if (sb->level != 1 && sb->level != 4 - && sb->level != 5 && sb->level != 6 - && sb->level != 10) { - /* FIXME use a better test */ - printk(KERN_WARNING - "md: bitmaps not supported for this level.\n"); - goto abort; - } - } - if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; else @@ -836,7 +835,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version else ret = 0; } - rdev->sectors = calc_num_sectors(rdev, sb->chunk_size); + rdev->sectors = rdev->sb_start; if (rdev->sectors < sb->size * 2 && sb->level > 1) /* "this cannot possibly happen" ... */ @@ -866,7 +865,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->minor_version = sb->minor_version; mddev->patch_version = sb->patch_version; mddev->external = 0; - mddev->chunk_size = sb->chunk_size; + mddev->chunk_sectors = sb->chunk_size >> 9; mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; @@ -883,13 +882,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = sb->delta_disks; mddev->new_level = sb->new_level; mddev->new_layout = sb->new_layout; - mddev->new_chunk = sb->new_chunk; + mddev->new_chunk_sectors = sb->new_chunk >> 9; } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; } if (sb->state & (1<<MD_SB_CLEAN)) @@ -1004,7 +1003,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_level = mddev->new_level; sb->delta_disks = mddev->delta_disks; sb->new_layout = mddev->new_layout; - sb->new_chunk = mddev->new_chunk; + sb->new_chunk = mddev->new_chunk_sectors << 9; } mddev->minor_version = sb->minor_version; if (mddev->in_sync) @@ -1018,7 +1017,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->recovery_cp = 0; sb->layout = mddev->layout; - sb->chunk_size = mddev->chunk_size; + sb->chunk_size = mddev->chunk_sectors << 9; if (mddev->bitmap && mddev->bitmap_file == NULL) sb->state |= (1<<MD_SB_BITMAP_PRESENT); @@ -1185,17 +1184,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) bdevname(rdev->bdev,b)); return -EINVAL; } - if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) { - if (sb->level != cpu_to_le32(1) && - sb->level != cpu_to_le32(4) && - sb->level != cpu_to_le32(5) && - sb->level != cpu_to_le32(6) && - sb->level != cpu_to_le32(10)) { - printk(KERN_WARNING - "md: bitmaps not supported for this level.\n"); - return -EINVAL; - } - } rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); @@ -1248,9 +1236,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (rdev->sectors < le64_to_cpu(sb->data_size)) return -EINVAL; rdev->sectors = le64_to_cpu(sb->data_size); - if (le32_to_cpu(sb->chunksize)) - rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1); - if (le64_to_cpu(sb->size) > rdev->sectors) return -EINVAL; return ret; @@ -1271,7 +1256,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->major_version = 1; mddev->patch_version = 0; mddev->external = 0; - mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; + mddev->chunk_sectors = le32_to_cpu(sb->chunksize); mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); mddev->level = le32_to_cpu(sb->level); @@ -1297,13 +1282,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = le32_to_cpu(sb->delta_disks); mddev->new_level = le32_to_cpu(sb->new_level); mddev->new_layout = le32_to_cpu(sb->new_layout); - mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; + mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk); } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; } } else if (mddev->pers == NULL) { @@ -1324,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) } if (mddev->level != LEVEL_MULTIPATH) { int role; - role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + if (rdev->desc_nr < 0 || + rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { + role = 0xffff; + rdev->desc_nr = -1; + } else + role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); switch(role) { case 0xffff: /* spare */ break; @@ -1375,7 +1365,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); - sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -1402,7 +1392,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_layout = cpu_to_le32(mddev->new_layout); sb->delta_disks = cpu_to_le32(mddev->delta_disks); sb->new_level = cpu_to_le32(mddev->new_level); - sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); + sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); } max_dev = 0; @@ -1410,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) if (rdev2->desc_nr+1 > max_dev) max_dev = rdev2->desc_nr+1; - if (max_dev > le32_to_cpu(sb->max_dev)) + if (max_dev > le32_to_cpu(sb->max_dev)) { + int bmask; sb->max_dev = cpu_to_le32(max_dev); + rdev->sb_size = max_dev * 2 + 256; + bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; + if (rdev->sb_size & bmask) + rdev->sb_size = (rdev->sb_size | bmask) + 1; + } for (i=0; i<max_dev;i++) sb->dev_roles[i] = cpu_to_le16(0xfffe); @@ -1503,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) static LIST_HEAD(pending_raid_disks); -static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) +/* + * Try to register data integrity profile for an mddev + * + * This is called when an array is started and after a disk has been kicked + * from the array. It only succeeds if all working and active component devices + * are integrity capable with matching profiles. + */ +int md_integrity_register(mddev_t *mddev) +{ + mdk_rdev_t *rdev, *reference = NULL; + + if (list_empty(&mddev->disks)) + return 0; /* nothing to do */ + if (blk_get_integrity(mddev->gendisk)) + return 0; /* already registered */ + list_for_each_entry(rdev, &mddev->disks, same_set) { + /* skip spares and non-functional disks */ + if (test_bit(Faulty, &rdev->flags)) + continue; + if (rdev->raid_disk < 0) + continue; + /* + * If at least one rdev is not integrity capable, we can not + * enable data integrity for the md device. + */ + if (!bdev_get_integrity(rdev->bdev)) + return -EINVAL; + if (!reference) { + /* Use the first rdev as the reference */ + reference = rdev; + continue; + } + /* does this rdev's profile match the reference profile? */ + if (blk_integrity_compare(reference->bdev->bd_disk, + rdev->bdev->bd_disk) < 0) + return -EINVAL; + } + /* + * All component devices are integrity capable and have matching + * profiles, register the common profile for the md device. + */ + if (blk_integrity_register(mddev->gendisk, + bdev_get_integrity(reference->bdev)) != 0) { + printk(KERN_ERR "md: failed to register integrity for %s\n", + mdname(mddev)); + return -EINVAL; + } + printk(KERN_NOTICE "md: data integrity on %s enabled\n", + mdname(mddev)); + return 0; +} +EXPORT_SYMBOL(md_integrity_register); + +/* Disable data integrity if non-capable/non-matching disk is being added */ +void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) { - struct mdk_personality *pers = mddev->pers; - struct gendisk *disk = mddev->gendisk; struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); - struct blk_integrity *bi_mddev = blk_get_integrity(disk); + struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); - /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ - if (pers && pers->level >= 4 && pers->level <= 6) + if (!bi_mddev) /* nothing to do */ return; - - /* If rdev is integrity capable, register profile for mddev */ - if (!bi_mddev && bi_rdev) { - if (blk_integrity_register(disk, bi_rdev)) - printk(KERN_ERR "%s: %s Could not register integrity!\n", - __func__, disk->disk_name); - else - printk(KERN_NOTICE "Enabling data integrity on %s\n", - disk->disk_name); + if (rdev->raid_disk < 0) /* skip spares */ return; - } - - /* Check that mddev and rdev have matching profiles */ - if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { - printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, - disk->disk_name, rdev->bdev->bd_disk->disk_name); - printk(KERN_NOTICE "Disabling data integrity on %s\n", - disk->disk_name); - blk_integrity_unregister(disk); - } + if (bi_rdev && blk_integrity_compare(mddev->gendisk, + rdev->bdev->bd_disk) >= 0) + return; + printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); + blk_integrity_unregister(mddev->gendisk); } +EXPORT_SYMBOL(md_integrity_add_rdev); static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) { @@ -1607,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) /* May as well allow recovery to be retried once */ mddev->recovery_disabled = 0; - md_integrity_check(rdev, mddev); return 0; fail: @@ -1772,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb) __u8 *uuid; uuid = sb->set_uuid; - printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" - ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" - KERN_INFO "md: Name: \"%s\" CT:%llu\n", + printk(KERN_INFO + "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" + ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" + "md: Name: \"%s\" CT:%llu\n", le32_to_cpu(sb->major_version), le32_to_cpu(sb->feature_map), uuid[0], uuid[1], uuid[2], uuid[3], @@ -1786,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb) & MD_SUPERBLOCK_1_TIME_SEC_MASK); uuid = sb->device_uuid; - printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" + printk(KERN_INFO + "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" " RO:%llu\n" - KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" - ":%02x%02x%02x%02x%02x%02x\n" - KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" - KERN_INFO "md: (MaxDev:%u) \n", + "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" + ":%02x%02x%02x%02x%02x%02x\n" + "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" + "md: (MaxDev:%u) \n", le32_to_cpu(sb->level), (unsigned long long)le64_to_cpu(sb->size), le32_to_cpu(sb->raid_disks), @@ -1897,6 +1933,7 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; + mddev->utime = get_seconds(); if (mddev->external) return; repeat: @@ -1926,7 +1963,6 @@ repeat: nospares = 0; sync_req = mddev->in_sync; - mddev->utime = get_seconds(); /* If this is just a dirty<->clean transition, and the array is clean * and 'events' is odd, we can roll back to the previous clean state */ @@ -1939,17 +1975,14 @@ repeat: /* otherwise we have to go forward and ... */ mddev->events ++; if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ - /* .. if the array isn't clean, insist on an odd 'events' */ - if ((mddev->events&1)==0) { - mddev->events++; + /* .. if the array isn't clean, an 'even' event must also go + * to spares. */ + if ((mddev->events&1)==0) nospares = 0; - } } else { - /* otherwise insist on an even 'events' (for clean states) */ - if ((mddev->events&1)) { - mddev->events++; + /* otherwise an 'odd' event must go to spares */ + if ((mddev->events&1)) nospares = 0; - } } } @@ -2597,15 +2630,6 @@ static void analyze_sbs(mddev_t * mddev) clear_bit(In_sync, &rdev->flags); } } - - - - if (mddev->recovery_cp != MaxSector && - mddev->level >= 1) - printk(KERN_ERR "md: %s: raid array is not clean" - " -- starting background reconstruction\n", - mdname(mddev)); - } static void md_safemode_timeout(unsigned long data); @@ -2680,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) ssize_t rv = len; struct mdk_personality *pers; void *priv; + mdk_rdev_t *rdev; if (mddev->pers == NULL) { if (len == 0) @@ -2746,7 +2771,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (IS_ERR(priv)) { mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->raid_disks -= mddev->delta_disks; mddev->delta_disks = 0; module_put(pers->owner); @@ -2759,12 +2784,18 @@ level_store(mddev_t *mddev, const char *buf, size_t len) mddev_suspend(mddev); mddev->pers->stop(mddev); module_put(mddev->pers->owner); + /* Invalidate devices that are now superfluous */ + list_for_each_entry(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= mddev->raid_disks) { + rdev->raid_disk = -1; + clear_bit(In_sync, &rdev->flags); + } mddev->pers = pers; mddev->private = priv; strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->level = mddev->new_level; mddev->layout = mddev->new_layout; - mddev->chunk_size = mddev->new_chunk; + mddev->chunk_sectors = mddev->new_chunk_sectors; mddev->delta_disks = 0; pers->run(mddev); mddev_resume(mddev); @@ -2800,11 +2831,14 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { int err; - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EBUSY; - err = mddev->pers->reconfig(mddev, n, -1); - if (err) + mddev->new_layout = n; + err = mddev->pers->check_reshape(mddev); + if (err) { + mddev->new_layout = mddev->layout; return err; + } } else { mddev->new_layout = n; if (mddev->reshape_position == MaxSector) @@ -2857,10 +2891,11 @@ static ssize_t chunk_size_show(mddev_t *mddev, char *page) { if (mddev->reshape_position != MaxSector && - mddev->chunk_size != mddev->new_chunk) - return sprintf(page, "%d (%d)\n", mddev->new_chunk, - mddev->chunk_size); - return sprintf(page, "%d\n", mddev->chunk_size); + mddev->chunk_sectors != mddev->new_chunk_sectors) + return sprintf(page, "%d (%d)\n", + mddev->new_chunk_sectors << 9, + mddev->chunk_sectors << 9); + return sprintf(page, "%d\n", mddev->chunk_sectors << 9); } static ssize_t @@ -2874,15 +2909,18 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { int err; - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EBUSY; - err = mddev->pers->reconfig(mddev, -1, n); - if (err) + mddev->new_chunk_sectors = n >> 9; + err = mddev->pers->check_reshape(mddev); + if (err) { + mddev->new_chunk_sectors = mddev->chunk_sectors; return err; + } } else { - mddev->new_chunk = n; + mddev->new_chunk_sectors = n >> 9; if (mddev->reshape_position == MaxSector) - mddev->chunk_size = n; + mddev->chunk_sectors = n >> 9; } return len; } @@ -3527,8 +3565,9 @@ min_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { - if (min & (sector_t)((mddev->chunk_size>>9)-1)) + if (mddev->chunk_sectors) { + sector_t temp = min; + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_min = min; @@ -3560,12 +3599,14 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) if (max < mddev->resync_min) return -EINVAL; if (max < mddev->resync_max && + mddev->ro == 0 && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { - if (max & (sector_t)((mddev->chunk_size>>9)-1)) + if (mddev->chunk_sectors) { + sector_t temp = max; + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_max = max; @@ -3589,7 +3630,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) char *e; unsigned long long new = simple_strtoull(buf, &e, 10); - if (mddev->pers->quiesce == NULL) + if (mddev->pers == NULL || + mddev->pers->quiesce == NULL) return -EINVAL; if (buf == e || (*e && *e != '\n')) return -EINVAL; @@ -3617,7 +3659,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) char *e; unsigned long long new = simple_strtoull(buf, &e, 10); - if (mddev->pers->quiesce == NULL) + if (mddev->pers == NULL || + mddev->pers->quiesce == NULL) return -EINVAL; if (buf == e || (*e && *e != '\n')) return -EINVAL; @@ -3656,7 +3699,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; return len; } @@ -3697,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) mddev->array_sectors = sectors; set_capacity(mddev->gendisk, mddev->array_sectors); - if (mddev->pers) { - struct block_device *bdev = bdget_disk(mddev->gendisk, 0); - - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } - } + if (mddev->pers) + revalidate_disk(mddev->gendisk); return len; } @@ -3860,11 +3894,9 @@ static int md_alloc(dev_t dev, char *name) flush_scheduled_work(); mutex_lock(&disks_mutex); - if (mddev->gendisk) { - mutex_unlock(&disks_mutex); - mddev_put(mddev); - return -EEXIST; - } + error = -EEXIST; + if (mddev->gendisk) + goto abort; if (name) { /* Need to ensure that 'name' is not a duplicate. @@ -3876,17 +3908,15 @@ static int md_alloc(dev_t dev, char *name) if (mddev2->gendisk && strcmp(mddev2->gendisk->disk_name, name) == 0) { spin_unlock(&all_mddevs_lock); - return -EEXIST; + goto abort; } spin_unlock(&all_mddevs_lock); } + error = -ENOMEM; mddev->queue = blk_alloc_queue(GFP_KERNEL); - if (!mddev->queue) { - mutex_unlock(&disks_mutex); - mddev_put(mddev); - return -ENOMEM; - } + if (!mddev->queue) + goto abort; mddev->queue->queuedata = mddev; /* Can be unlocked because the queue is new: no concurrency */ @@ -3896,11 +3926,9 @@ static int md_alloc(dev_t dev, char *name) disk = alloc_disk(1 << shift); if (!disk) { - mutex_unlock(&disks_mutex); blk_cleanup_queue(mddev->queue); mddev->queue = NULL; - mddev_put(mddev); - return -ENOMEM; + goto abort; } disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; @@ -3922,16 +3950,22 @@ static int md_alloc(dev_t dev, char *name) mddev->gendisk = disk; error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk_to_dev(disk)->kobj, "%s", "md"); - mutex_unlock(&disks_mutex); - if (error) + if (error) { + /* This isn't possible, but as kobject_init_and_add is marked + * __must_check, we must do something with the result + */ printk(KERN_WARNING "md: cannot register %s/md - name in use\n", disk->disk_name); - else { + error = 0; + } + abort: + mutex_unlock(&disks_mutex); + if (!error) { kobject_uevent(&mddev->kobj, KOBJ_ADD); mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); } mddev_put(mddev); - return 0; + return error; } static struct kobject *md_probe(dev_t dev, int *part, void *data) @@ -3976,11 +4010,9 @@ static int start_dirty_degraded; static int do_md_run(mddev_t * mddev) { int err; - int chunk_size; mdk_rdev_t *rdev; struct gendisk *disk; struct mdk_personality *pers; - char b[BDEVNAME_SIZE]; if (list_empty(&mddev->disks)) /* cannot run an array with no devices.. */ @@ -3998,38 +4030,6 @@ static int do_md_run(mddev_t * mddev) analyze_sbs(mddev); } - chunk_size = mddev->chunk_size; - - if (chunk_size) { - if (chunk_size > MAX_CHUNK_SIZE) { - printk(KERN_ERR "too big chunk_size: %d > %d\n", - chunk_size, MAX_CHUNK_SIZE); - return -EINVAL; - } - /* - * chunk-size has to be a power of 2 - */ - if ( (1 << ffz(~chunk_size)) != chunk_size) { - printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); - return -EINVAL; - } - - /* devices must have minimum size of one chunk */ - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (test_bit(Faulty, &rdev->flags)) - continue; - if (rdev->sectors < chunk_size / 512) { - printk(KERN_WARNING - "md: Dev %s smaller than chunk_size:" - " %llu < %d\n", - bdevname(rdev->bdev,b), - (unsigned long long)rdev->sectors, - chunk_size / 512); - return -EINVAL; - } - } - } - if (mddev->level != LEVEL_NONE) request_module("md-level-%d", mddev->level); else if (mddev->clevel[0]) @@ -4094,10 +4094,6 @@ static int do_md_run(mddev_t * mddev) } strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); - if (pers->level >= 4 && pers->level <= 6) - /* Cannot support integrity (yet) */ - blk_integrity_unregister(mddev->gendisk); - if (mddev->reshape_position != MaxSector && pers->start_reshape == NULL) { /* This personality cannot handle reshaping... */ @@ -4235,6 +4231,7 @@ static int do_md_run(mddev_t * mddev) md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ + revalidate_disk(mddev->gendisk); mddev->changed = 1; md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); @@ -4306,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) struct gendisk *disk = mddev->gendisk; mdk_rdev_t *rdev; + mutex_lock(&mddev->open_mutex); if (atomic_read(&mddev->openers) > is_open) { printk("md: %s still in use.\n",mdname(mddev)); - return -EBUSY; - } - - if (mddev->pers) { + err = -EBUSY; + } else if (mddev->pers) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -4368,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) if (mode == 1) set_disk_ro(disk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + err = 0; } - +out: + mutex_unlock(&mddev->open_mutex); + if (err) + return err; /* * Free resources if final stop */ @@ -4405,7 +4405,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->flags = 0; mddev->ro = 0; mddev->metadata_type[0] = 0; - mddev->chunk_size = 0; + mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; mddev->layout = 0; mddev->max_disks = 0; @@ -4413,7 +4413,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->delta_disks = 0; mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; - mddev->new_chunk = 0; + mddev->new_chunk_sectors = 0; mddev->curr_resync = 0; mddev->resync_mismatches = 0; mddev->suspend_lo = mddev->suspend_hi = 0; @@ -4435,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) blk_integrity_unregister(disk); md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); -out: return err; } @@ -4618,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.spare_disks = spare; info.layout = mddev->layout; - info.chunk_size = mddev->chunk_size; + info.chunk_size = mddev->chunk_sectors << 9; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; @@ -4843,7 +4842,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; } else rdev->sb_start = calc_dev_sboffset(rdev->bdev); - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = rdev->sb_start; err = bind_rdev_to_array(rdev, mddev); if (err) { @@ -4913,7 +4912,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) else rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = rdev->sb_start; if (test_bit(Faulty, &rdev->flags)) { printk(KERN_WARNING @@ -5062,7 +5061,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->external = 0; mddev->layout = info->layout; - mddev->chunk_size = info->chunk_size; + mddev->chunk_sectors = info->chunk_size >> 9; mddev->max_disks = MD_SB_DISKS; @@ -5081,7 +5080,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) get_random_bytes(mddev->uuid, 16); mddev->new_level = mddev->level; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_layout = mddev->layout; mddev->delta_disks = 0; @@ -5133,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) return -ENOSPC; } rv = mddev->pers->resize(mddev, num_sectors); - if (!rv) { - struct block_device *bdev; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } - } + if (!rv) + revalidate_disk(mddev->gendisk); return rv; } @@ -5191,7 +5180,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) mddev->level != info->level || /* mddev->layout != info->layout || */ !mddev->persistent != info->not_persistent|| - mddev->chunk_size != info->chunk_size || + mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) ) @@ -5215,10 +5204,15 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) * we don't need to do anything at the md level, the * personality will take care of it all. */ - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EINVAL; - else - return mddev->pers->reconfig(mddev, info->layout, -1); + else { + mddev->new_layout = info->layout; + rv = mddev->pers->check_reshape(mddev); + if (rv) + mddev->new_layout = mddev->layout; + return rv; + } } if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) rv = update_size(mddev, (sector_t)info->size * 2); @@ -5525,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) } BUG_ON(mddev != bdev->bd_disk->private_data); - if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) + if ((err = mutex_lock_interruptible(&mddev->open_mutex))) goto out; err = 0; atomic_inc(&mddev->openers); - mddev_unlock(mddev); + mutex_unlock(&mddev->open_mutex); check_disk_change(bdev); out: @@ -6379,10 +6373,16 @@ void md_do_sync(mddev_t *mddev) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } - if (j >= mddev->resync_max) - wait_event(mddev->recovery_wait, - mddev->resync_max > j - || kthread_should_stop()); + while (j >= mddev->resync_max && !kthread_should_stop()) { + /* As this condition is controlled by user-space, + * we can block indefinitely, so use '_interruptible' + * to avoid triggering warnings. + */ + flush_signals(current); /* just in case */ + wait_event_interruptible(mddev->recovery_wait, + mddev->resync_max > j + || kthread_should_stop()); + } if (kthread_should_stop()) goto interrupted; @@ -6717,7 +6717,8 @@ void md_check_recovery(mddev_t *mddev) */ if (mddev->reshape_position != MaxSector) { - if (mddev->pers->check_reshape(mddev) != 0) + if (mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev) != 0) /* Cannot proceed */ goto unlock; set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |