diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 256 |
1 files changed, 131 insertions, 125 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3c3626d2a1f..f9f991e6e13 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q); static struct stripe_head * get_active_stripe(raid5_conf_t *conf, sector_t sector, - int previous, int noblock) + int previous, int noblock, int noquiesce) { struct stripe_head *sh; @@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, do { wait_event_lock_irq(conf->wait_for_stripe, - conf->quiesce == 0, + conf->quiesce == 0 || noquiesce, conf->device_lock, /* nothing */); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { @@ -1274,8 +1274,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, sector_t new_sector; int algorithm = previous ? conf->prev_algo : conf->algorithm; - int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + int sectors_per_chunk = previous ? conf->prev_chunk_sectors + : conf->chunk_sectors; int raid_disks = previous ? conf->previous_raid_disks : conf->raid_disks; int data_disks = raid_disks - conf->max_degraded; @@ -1480,8 +1480,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) int raid_disks = sh->disks; int data_disks = raid_disks - conf->max_degraded; sector_t new_sector = sh->sector, check; - int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + int sectors_per_chunk = previous ? conf->prev_chunk_sectors + : conf->chunk_sectors; int algorithm = previous ? conf->prev_algo : conf->algorithm; sector_t stripe; @@ -1997,8 +1997,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, struct stripe_head *sh) { int sectors_per_chunk = - previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + previous ? conf->prev_chunk_sectors : conf->chunk_sectors; int dd_idx; int chunk_offset = sector_div(stripe, sectors_per_chunk); int disks = previous ? conf->previous_raid_disks : conf->raid_disks; @@ -2671,7 +2670,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, &dd_idx, NULL); - sh2 = get_active_stripe(conf, s, 0, 1); + sh2 = get_active_stripe(conf, s, 0, 1, 1); if (sh2 == NULL) /* so far only the early blocks of this stripe * have been requested. When later blocks @@ -2944,7 +2943,7 @@ static bool handle_stripe5(struct stripe_head *sh) /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1); + = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { /* sh cannot be written until sh2 has been read. * so arrange for sh to be delayed a little @@ -3189,7 +3188,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1); + = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { /* sh cannot be written until sh2 has been read. * so arrange for sh to be delayed a little @@ -3284,11 +3283,11 @@ static void activate_bit_delay(raid5_conf_t *conf) static void unplug_slaves(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int i; rcu_read_lock(); - for (i=0; i<mddev->raid_disks; i++) { + for (i = 0; i < conf->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { struct request_queue *r_queue = bdev_get_queue(rdev->bdev); @@ -3308,7 +3307,7 @@ static void unplug_slaves(mddev_t *mddev) static void raid5_unplug_device(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); @@ -3327,7 +3326,7 @@ static void raid5_unplug_device(struct request_queue *q) static int raid5_congested(void *data, int bits) { mddev_t *mddev = data; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; /* No difference between reads and writes. Just check * how busy the stripe_cache is @@ -3352,14 +3351,14 @@ static int raid5_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ - if (mddev->new_chunk < mddev->chunk_size) - chunk_sectors = mddev->new_chunk >> 9; + if (mddev->new_chunk_sectors < mddev->chunk_sectors) + chunk_sectors = mddev->new_chunk_sectors; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; if (max <= biovec->bv_len && bio_sectors == 0) @@ -3372,11 +3371,11 @@ static int raid5_mergeable_bvec(struct request_queue *q, static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) { sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bio->bi_size >> 9; - if (mddev->new_chunk < mddev->chunk_size) - chunk_sectors = mddev->new_chunk >> 9; + if (mddev->new_chunk_sectors < mddev->chunk_sectors) + chunk_sectors = mddev->new_chunk_sectors; return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); } @@ -3440,7 +3439,7 @@ static void raid5_align_endio(struct bio *bi, int error) bio_put(bi); mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; - conf = mddev_to_conf(mddev); + conf = mddev->private; rdev = (void*)raid_bi->bi_next; raid_bi->bi_next = NULL; @@ -3463,10 +3462,10 @@ static int bio_fits_rdev(struct bio *bi) { struct request_queue *q = bdev_get_queue(bi->bi_bdev); - if ((bi->bi_size>>9) > q->max_sectors) + if ((bi->bi_size>>9) > queue_max_sectors(q)) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments) + if (bi->bi_phys_segments > queue_max_phys_segments(q)) return 0; if (q->merge_bvec_fn) @@ -3482,7 +3481,7 @@ static int bio_fits_rdev(struct bio *bi) static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned int dd_idx; struct bio* align_bi; mdk_rdev_t *rdev; @@ -3599,7 +3598,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) static int make_request(struct request_queue *q, struct bio * bi) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int dd_idx; sector_t new_sector; sector_t logical_sector, last_sector; @@ -3675,7 +3674,7 @@ static int make_request(struct request_queue *q, struct bio * bi) (unsigned long long)logical_sector); sh = get_active_stripe(conf, new_sector, previous, - (bi->bi_rw&RWA_MASK)); + (bi->bi_rw&RWA_MASK), 0); if (sh) { if (unlikely(previous)) { /* expansion might have moved on while waiting for a @@ -3696,6 +3695,7 @@ static int make_request(struct request_queue *q, struct bio * bi) spin_unlock_irq(&conf->device_lock); if (must_retry) { release_stripe(sh); + schedule(); goto retry; } } @@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * If old and new chunk sizes differ, we need to process the * largest of these */ - if (mddev->new_chunk > mddev->chunk_size) - reshape_sectors = mddev->new_chunk / 512; + if (mddev->new_chunk_sectors > mddev->chunk_sectors) + reshape_sectors = mddev->new_chunk_sectors; else - reshape_sectors = mddev->chunk_size / 512; + reshape_sectors = mddev->chunk_sectors; /* we update the metadata when there is more than 3Meg * in the block range (that is rather arbitrary, should @@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped safepos = conf->reshape_safe; sector_div(safepos, data_disks); if (mddev->delta_disks < 0) { - writepos -= min(reshape_sectors, writepos); + writepos -= min_t(sector_t, reshape_sectors, writepos); readpos += reshape_sectors; safepos += reshape_sectors; } else { writepos += reshape_sectors; - readpos -= min(reshape_sectors, readpos); - safepos -= min(reshape_sectors, safepos); + readpos -= min_t(sector_t, reshape_sectors, readpos); + safepos -= min_t(sector_t, reshape_sectors, safepos); } /* 'writepos' is the most advanced device address we might write. @@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { int j; int skipped = 0; - sh = get_active_stripe(conf, stripe_addr+i, 0, 0); + sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); /* If any of this stripe is beyond the end of the old @@ -3916,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped raid5_compute_sector(conf, stripe_addr*(new_data_disks), 1, &dd_idx, NULL); last_sector = - raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512) - *(new_data_disks) - 1), + raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) + * new_data_disks - 1), 1, &dd_idx, NULL); if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; while (first_sector <= last_sector) { - sh = get_active_stripe(conf, first_sector, 1, 0); + sh = get_active_stripe(conf, first_sector, 1, 0, 1); set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); @@ -3946,7 +3946,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; - mddev->curr_resync_completed = mddev->curr_resync; + mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski bitmap_cond_end_sync(mddev->bitmap, sector_nr); - sh = get_active_stripe(conf, sector_nr, 0, 1); + sh = get_active_stripe(conf, sector_nr, 0, 1, 0); if (sh == NULL) { - sh = get_active_stripe(conf, sector_nr, 0, 0); + sh = get_active_stripe(conf, sector_nr, 0, 0, 0); /* make sure we don't swamp the stripe cache if someone else * is trying to get access */ @@ -4034,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski * We don't need to check the 'failed' flag as when that gets set, * recovery aborts. */ - for (i=0; i<mddev->raid_disks; i++) + for (i = 0; i < conf->raid_disks; i++) if (conf->disks[i].rdev == NULL) still_degraded = 1; @@ -4086,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) /* already done this stripe */ continue; - sh = get_active_stripe(conf, sector, 0, 1); + sh = get_active_stripe(conf, sector, 0, 1, 0); if (!sh) { /* failed to get a stripe - must wait */ @@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) static void raid5d(mddev_t *mddev) { struct stripe_head *sh; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int handled; pr_debug("+++ raid5d active\n"); @@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev) static ssize_t raid5_show_stripe_cache_size(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->max_nr_stripes); else @@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) static ssize_t raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; int err; @@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, static ssize_t raid5_show_preread_threshold(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->bypass_threshold); else @@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page) static ssize_t raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; if (len >= PAGE_SIZE) return -EINVAL; @@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, static ssize_t stripe_cache_active_show(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); else @@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = { static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!sectors) sectors = mddev->dev_sectors; @@ -4303,8 +4303,8 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) raid_disks = conf->previous_raid_disks; } - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); - sectors &= ~((sector_t)mddev->new_chunk/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); + sectors &= ~((sector_t)mddev->new_chunk_sectors - 1); return sectors * (raid_disks - conf->max_degraded); } @@ -4336,9 +4336,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) return ERR_PTR(-EINVAL); } - if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { + if (!mddev->new_chunk_sectors || + (mddev->new_chunk_sectors << 9) % PAGE_SIZE || + !is_power_of_2(mddev->new_chunk_sectors)) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", - mddev->new_chunk, mdname(mddev)); + mddev->new_chunk_sectors << 9, mdname(mddev)); return ERR_PTR(-EINVAL); } @@ -4401,7 +4403,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->fullsync = 1; } - conf->chunk_size = mddev->new_chunk; + conf->chunk_sectors = mddev->new_chunk_sectors; conf->level = mddev->new_level; if (conf->level == 6) conf->max_degraded = 2; @@ -4411,7 +4413,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->max_nr_stripes = NR_STRIPES; conf->reshape_progress = mddev->reshape_position; if (conf->reshape_progress != MaxSector) { - conf->prev_chunk = mddev->chunk_size; + conf->prev_chunk_sectors = mddev->chunk_sectors; conf->prev_algo = mddev->layout; } @@ -4453,6 +4455,10 @@ static int run(mddev_t *mddev) int working_disks = 0; mdk_rdev_t *rdev; + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid5: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); if (mddev->reshape_position != MaxSector) { /* Check that we can continue the reshape. * Currently only disks can change, it must @@ -4475,7 +4481,7 @@ static int run(mddev_t *mddev) * geometry. */ here_new = mddev->reshape_position; - if (sector_div(here_new, (mddev->new_chunk>>9)* + if (sector_div(here_new, mddev->new_chunk_sectors * (mddev->raid_disks - max_degraded))) { printk(KERN_ERR "raid5: reshape_position not " "on a stripe boundary\n"); @@ -4483,7 +4489,7 @@ static int run(mddev_t *mddev) } /* here_new is the stripe we will write to */ here_old = mddev->reshape_position; - sector_div(here_old, (mddev->chunk_size>>9)* + sector_div(here_old, mddev->chunk_sectors * (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ @@ -4498,7 +4504,7 @@ static int run(mddev_t *mddev) } else { BUG_ON(mddev->level != mddev->new_level); BUG_ON(mddev->layout != mddev->new_layout); - BUG_ON(mddev->chunk_size != mddev->new_chunk); + BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); BUG_ON(mddev->delta_disks != 0); } @@ -4532,7 +4538,7 @@ static int run(mddev_t *mddev) } /* device size must be a multiple of chunk size */ - mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); + mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > 0 && @@ -4581,7 +4587,7 @@ static int run(mddev_t *mddev) { int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * - (mddev->chunk_size / PAGE_SIZE); + ((mddev->chunk_sectors << 9) / PAGE_SIZE); if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -4678,7 +4684,8 @@ static void status(struct seq_file *seq, mddev_t *mddev) raid5_conf_t *conf = (raid5_conf_t *) mddev->private; int i; - seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout); + seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, + mddev->chunk_sectors / 2, mddev->layout); seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", @@ -4826,7 +4833,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) * any io in the removed space completes, but it hardly seems * worth it. */ - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); md_set_array_sectors(mddev, raid5_size(mddev, sectors, mddev->raid_disks)); if (mddev->array_sectors > @@ -4843,14 +4850,37 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return 0; } -static int raid5_check_reshape(mddev_t *mddev) +static int check_stripe_cache(mddev_t *mddev) +{ + /* Can only proceed if there are plenty of stripe_heads. + * We need a minimum of one full stripe,, and for sensible progress + * it is best to have about 4 times that. + * If we require 4 times, then the default 256 4K stripe_heads will + * allow for chunk sizes up to 256K, which is probably OK. + * If the chunk size is greater, user-space should request more + * stripe_heads first. + */ + raid5_conf_t *conf = mddev->private; + if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes || + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes) { + printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", + ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) + / STRIPE_SIZE)*4); + return 0; + } + return 1; +} + +static int check_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && - mddev->new_chunk == mddev->chunk_size) - return -EINVAL; /* nothing to do */ + mddev->new_chunk_sectors == mddev->chunk_sectors) + return 0; /* nothing to do */ if (mddev->bitmap) /* Cannot grow a bitmap yet */ return -EBUSY; @@ -4869,28 +4899,15 @@ static int raid5_check_reshape(mddev_t *mddev) return -EINVAL; } - /* Can only proceed if there are plenty of stripe_heads. - * We need a minimum of one full stripe,, and for sensible progress - * it is best to have about 4 times that. - * If we require 4 times, then the default 256 4K stripe_heads will - * allow for chunk sizes up to 256K, which is probably OK. - * If the chunk size is greater, user-space should request more - * stripe_heads first. - */ - if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || - (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { - printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", - (max(mddev->chunk_size, mddev->new_chunk) - / STRIPE_SIZE)*4); + if (!check_stripe_cache(mddev)) return -ENOSPC; - } return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); } static int raid5_start_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; mdk_rdev_t *rdev; int spares = 0; int added_devices = 0; @@ -4899,6 +4916,9 @@ static int raid5_start_reshape(mddev_t *mddev) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; + if (!check_stripe_cache(mddev)) + return -ENOSPC; + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) @@ -4925,8 +4945,8 @@ static int raid5_start_reshape(mddev_t *mddev) spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; conf->raid_disks += mddev->delta_disks; - conf->prev_chunk = conf->chunk_size; - conf->chunk_size = mddev->new_chunk; + conf->prev_chunk_sectors = conf->chunk_sectors; + conf->chunk_sectors = mddev->new_chunk_sectors; conf->prev_algo = conf->algorithm; conf->algorithm = mddev->new_layout; if (mddev->delta_disks < 0) @@ -5008,7 +5028,7 @@ static void end_reshape(raid5_conf_t *conf) */ { int data_disks = conf->raid_disks - conf->max_degraded; - int stripe = data_disks * (conf->chunk_size + int stripe = data_disks * ((conf->chunk_sectors << 9) / PAGE_SIZE); if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; @@ -5022,7 +5042,7 @@ static void end_reshape(raid5_conf_t *conf) static void raid5_finish_reshape(mddev_t *mddev) { struct block_device *bdev; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5053,7 +5073,7 @@ static void raid5_finish_reshape(mddev_t *mddev) raid5_remove_disk(mddev, d); } mddev->layout = conf->algorithm; - mddev->chunk_size = conf->chunk_size; + mddev->chunk_sectors = conf->chunk_sectors; mddev->reshape_position = MaxSector; mddev->delta_disks = 0; } @@ -5061,7 +5081,7 @@ static void raid5_finish_reshape(mddev_t *mddev) static void raid5_quiesce(mddev_t *mddev, int state) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; switch(state) { case 2: /* resume for a suspend */ @@ -5111,7 +5131,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev) mddev->new_level = 5; mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; - mddev->new_chunk = chunksect << 9; + mddev->new_chunk_sectors = chunksect; return setup_conf(mddev); } @@ -5150,24 +5170,24 @@ static void *raid5_takeover_raid6(mddev_t *mddev) } -static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) +static int raid5_check_reshape(mddev_t *mddev) { /* For a 2-drive array, the layout and chunk size can be changed * immediately as not restriping is needed. * For larger arrays we record the new value - after validation * to be used by a reshape pass. */ - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; + int new_chunk = mddev->new_chunk_sectors; - if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) + if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { - if (new_chunk & (new_chunk-1)) - /* not a power of 2 */ + if (!is_power_of_2(new_chunk)) return -EINVAL; - if (new_chunk < PAGE_SIZE) + if (new_chunk < (PAGE_SIZE>>9)) return -EINVAL; - if (mddev->array_sectors & ((new_chunk>>9)-1)) + if (mddev->array_sectors & (new_chunk-1)) /* not factor of array size */ return -EINVAL; } @@ -5175,49 +5195,39 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) /* They look valid */ if (mddev->raid_disks == 2) { - - if (new_layout >= 0) { - conf->algorithm = new_layout; - mddev->layout = mddev->new_layout = new_layout; + /* can make the change immediately */ + if (mddev->new_layout >= 0) { + conf->algorithm = mddev->new_layout; + mddev->layout = mddev->new_layout; } if (new_chunk > 0) { - conf->chunk_size = new_chunk; - mddev->chunk_size = mddev->new_chunk = new_chunk; + conf->chunk_sectors = new_chunk ; + mddev->chunk_sectors = new_chunk; } set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); - } else { - if (new_layout >= 0) - mddev->new_layout = new_layout; - if (new_chunk > 0) - mddev->new_chunk = new_chunk; } - return 0; + return check_reshape(mddev); } -static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) +static int raid6_check_reshape(mddev_t *mddev) { - if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) + int new_chunk = mddev->new_chunk_sectors; + + if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { - if (new_chunk & (new_chunk-1)) - /* not a power of 2 */ + if (!is_power_of_2(new_chunk)) return -EINVAL; - if (new_chunk < PAGE_SIZE) + if (new_chunk < (PAGE_SIZE >> 9)) return -EINVAL; - if (mddev->array_sectors & ((new_chunk>>9)-1)) + if (mddev->array_sectors & (new_chunk-1)) /* not factor of array size */ return -EINVAL; } /* They look valid */ - - if (new_layout >= 0) - mddev->new_layout = new_layout; - if (new_chunk > 0) - mddev->new_chunk = new_chunk; - - return 0; + return check_reshape(mddev); } static void *raid5_takeover(mddev_t *mddev) @@ -5227,8 +5237,6 @@ static void *raid5_takeover(mddev_t *mddev) * raid1 - if there are two drives. We need to know the chunk size * raid4 - trivial - just use a raid4 layout. * raid6 - Providing it is a *_6 layout - * - * For now, just do raid1 */ if (mddev->level == 1) @@ -5310,12 +5318,11 @@ static struct mdk_personality raid6_personality = .sync_request = sync_request, .resize = raid5_resize, .size = raid5_size, - .check_reshape = raid5_check_reshape, + .check_reshape = raid6_check_reshape, .start_reshape = raid5_start_reshape, .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid6_takeover, - .reconfig = raid6_reconfig, }; static struct mdk_personality raid5_personality = { @@ -5338,7 +5345,6 @@ static struct mdk_personality raid5_personality = .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid5_takeover, - .reconfig = raid5_reconfig, }; static struct mdk_personality raid4_personality = |