summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 15:38:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 15:38:19 -0700
commit3d30701b58970425e1d45994d6cb82f828924fdd (patch)
tree8b14cf462628bebf8548c1b8c205a674564052d1 /drivers/md/raid5.c
parent8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff)
parentfd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits) md: clean up do_md_stop md: fix another deadlock with removing sysfs attributes. md: move revalidate_disk() back outside open_mutex md/raid10: fix deadlock with unaligned read during resync md/bitmap: separate out loading a bitmap from initialising the structures. md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log. md/bitmap: optimise scanning of empty bitmaps. md/bitmap: clean up plugging calls. md/bitmap: reduce dependence on sysfs. md/bitmap: white space clean up and similar. md/raid5: export raid5 unplugging interface. md/plug: optionally use plugger to unplug an array during resync/recovery. md/raid5: add simple plugging infrastructure. md/raid5: export is_congested test raid5: Don't set read-ahead when there is no queue md: add support for raising dm events. md: export various start/stop interfaces md: split out md_rdev_init md: be more careful setting MD_CHANGE_CLEAN md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk ...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c168
1 files changed, 102 insertions, 66 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 20ac2f14376..866d4b5a144 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
if (test_bit(STRIPE_HANDLE, &sh->state)) {
if (test_bit(STRIPE_DELAYED, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list);
- blk_plug_device(conf->mddev->queue);
+ plugger_set_plug(&conf->plug);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0) {
list_add_tail(&sh->lru, &conf->bitmap_list);
- blk_plug_device(conf->mddev->queue);
+ plugger_set_plug(&conf->plug);
} else {
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf)
}
static void unplug_slaves(mddev_t *mddev);
-static void raid5_unplug_device(struct request_queue *q);
static struct stripe_head *
get_active_stripe(raid5_conf_t *conf, sector_t sector,
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
< (conf->max_nr_stripes *3/4)
|| !conf->inactive_blocked),
conf->device_lock,
- raid5_unplug_device(conf->mddev->queue)
+ md_raid5_unplug_device(conf)
);
conf->inactive_blocked = 0;
} else
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num)
struct kmem_cache *sc;
int devs = max(conf->raid_disks, conf->previous_raid_disks);
- sprintf(conf->cache_name[0],
- "raid%d-%s", conf->level, mdname(conf->mddev));
- sprintf(conf->cache_name[1],
- "raid%d-%s-alt", conf->level, mdname(conf->mddev));
+ if (conf->mddev->gendisk)
+ sprintf(conf->cache_name[0],
+ "raid%d-%s", conf->level, mdname(conf->mddev));
+ else
+ sprintf(conf->cache_name[0],
+ "raid%d-%p", conf->level, conf->mddev);
+ sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
list_add_tail(&sh->lru, &conf->hold_list);
}
} else
- blk_plug_device(conf->mddev->queue);
+ plugger_set_plug(&conf->plug);
}
static void activate_bit_delay(raid5_conf_t *conf)
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev)
rcu_read_unlock();
}
-static void raid5_unplug_device(struct request_queue *q)
+void md_raid5_unplug_device(raid5_conf_t *conf)
{
- mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev->private;
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
- if (blk_remove_plug(q)) {
+ if (plugger_remove_plug(&conf->plug)) {
conf->seq_flush++;
raid5_activate_delayed(conf);
}
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(conf->mddev->thread);
spin_unlock_irqrestore(&conf->device_lock, flags);
- unplug_slaves(mddev);
+ unplug_slaves(conf->mddev);
}
+EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
-static int raid5_congested(void *data, int bits)
+static void raid5_unplug(struct plug_handle *plug)
+{
+ raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
+ md_raid5_unplug_device(conf);
+}
+
+static void raid5_unplug_queue(struct request_queue *q)
+{
+ mddev_t *mddev = q->queuedata;
+ md_raid5_unplug_device(mddev->private);
+}
+
+int md_raid5_congested(mddev_t *mddev, int bits)
{
- mddev_t *mddev = data;
raid5_conf_t *conf = mddev->private;
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
*/
- if (mddev_congested(mddev, bits))
- return 1;
if (conf->inactive_blocked)
return 1;
if (conf->quiesce)
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits)
return 0;
}
+EXPORT_SYMBOL_GPL(md_raid5_congested);
+
+static int raid5_congested(void *data, int bits)
+{
+ mddev_t *mddev = data;
+
+ return mddev_congested(mddev, bits) ||
+ md_raid5_congested(mddev, bits);
+}
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
* add failed due to overlap. Flush everything
* and wait a while
*/
- raid5_unplug_device(mddev->queue);
+ md_raid5_unplug_device(conf);
release_stripe(sh);
schedule();
goto retry;
@@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
return 0;
}
-static ssize_t
-raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
+int
+raid5_set_cache_size(mddev_t *mddev, int size)
{
raid5_conf_t *conf = mddev->private;
- unsigned long new;
int err;
- if (len >= PAGE_SIZE)
+ if (size <= 16 || size > 32768)
return -EINVAL;
- if (!conf)
- return -ENODEV;
-
- if (strict_strtoul(page, 10, &new))
- return -EINVAL;
- if (new <= 16 || new > 32768)
- return -EINVAL;
- while (new < conf->max_nr_stripes) {
+ while (size < conf->max_nr_stripes) {
if (drop_one_stripe(conf))
conf->max_nr_stripes--;
else
@@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
err = md_allow_write(mddev);
if (err)
return err;
- while (new > conf->max_nr_stripes) {
+ while (size > conf->max_nr_stripes) {
if (grow_one_stripe(conf))
conf->max_nr_stripes++;
else break;
}
+ return 0;
+}
+EXPORT_SYMBOL(raid5_set_cache_size);
+
+static ssize_t
+raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
+{
+ raid5_conf_t *conf = mddev->private;
+ unsigned long new;
+ int err;
+
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+ if (!conf)
+ return -ENODEV;
+
+ if (strict_strtoul(page, 10, &new))
+ return -EINVAL;
+ err = raid5_set_cache_size(mddev, new);
+ if (err)
+ return err;
return len;
}
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
static int run(mddev_t *mddev)
{
raid5_conf_t *conf;
- int working_disks = 0, chunk_size;
+ int working_disks = 0;
int dirty_parity_disks = 0;
mdk_rdev_t *rdev;
sector_t reshape_offset = 0;
@@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev)
"reshape");
}
- /* read-ahead size must cover two whole stripes, which is
- * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
- */
- {
- int data_disks = conf->previous_raid_disks - conf->max_degraded;
- int stripe = data_disks *
- ((mddev->chunk_sectors << 9) / PAGE_SIZE);
- if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
- mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
- }
/* Ok, everything is just fine now */
if (mddev->to_remove == &raid5_attrs_group)
mddev->to_remove = NULL;
- else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
+ else if (mddev->kobj.sd &&
+ sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
printk(KERN_WARNING
- "md/raid:%s: failed to create sysfs attributes.\n",
+ "raid5: failed to create sysfs attributes for %s\n",
mdname(mddev));
+ md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
- mddev->queue->queue_lock = &conf->device_lock;
+ plugger_init(&conf->plug, raid5_unplug);
+ mddev->plug = &conf->plug;
+ if (mddev->queue) {
+ int chunk_size;
+ /* read-ahead size must cover two whole stripes, which
+ * is 2 * (datadisks) * chunksize where 'n' is the
+ * number of raid devices
+ */
+ int data_disks = conf->previous_raid_disks - conf->max_degraded;
+ int stripe = data_disks *
+ ((mddev->chunk_sectors << 9) / PAGE_SIZE);
+ if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
+ mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
- mddev->queue->unplug_fn = raid5_unplug_device;
- mddev->queue->backing_dev_info.congested_data = mddev;
- mddev->queue->backing_dev_info.congested_fn = raid5_congested;
+ blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
- md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
+ mddev->queue->backing_dev_info.congested_data = mddev;
+ mddev->queue->backing_dev_info.congested_fn = raid5_congested;
+ mddev->queue->queue_lock = &conf->device_lock;
+ mddev->queue->unplug_fn = raid5_unplug_queue;
- blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
- chunk_size = mddev->chunk_sectors << 9;
- blk_queue_io_min(mddev->queue, chunk_size);
- blk_queue_io_opt(mddev->queue, chunk_size *
- (conf->raid_disks - conf->max_degraded));
+ chunk_size = mddev->chunk_sectors << 9;
+ blk_queue_io_min(mddev->queue, chunk_size);
+ blk_queue_io_opt(mddev->queue, chunk_size *
+ (conf->raid_disks - conf->max_degraded));
- list_for_each_entry(rdev, &mddev->disks, same_set)
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
+ }
return 0;
abort:
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev)
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
- mddev->queue->backing_dev_info.congested_fn = NULL;
- blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
+ if (mddev->queue)
+ mddev->queue->backing_dev_info.congested_fn = NULL;
+ plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
free_conf(conf);
mddev->private = NULL;
mddev->to_remove = &raid5_attrs_group;
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev)
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm))
- printk(KERN_WARNING
- "md/raid:%s: failed to create "
- " link %s\n",
- mdname(mddev), nm);
+ /* Failure here is OK */;
} else
break;
}
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf)
/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
- {
+ if (conf->mddev->queue) {
int data_disks = conf->raid_disks - conf->max_degraded;
int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);