summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-29 08:47:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-29 08:47:36 -0700
commitd8d048f69a618c531575cb1f398a7186f0532ef2 (patch)
treecb8d6ee7a3cb2aea53e5e95de0ca689cc69411a2 /drivers/md/md.c
parent53113b06e48c6c38f7612c1f8043b8a0d2adf72b (diff)
parentf3ac8bf7ce1c5abd763ea762e95d1cdcf7799372 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: tidy up device searches in read_balance. md/raid1: fix some typos in comments. md/raid1: discard unused variable. md: unplug writes to external bitmaps. md: use separate bio pool for each md device. md: change type of first arg to sync_page_io. md/raid1: perform mem allocation before disabling writes during resync. md: use bio_kmalloc rather than bio_alloc when failure is acceptable. md: Fix possible deadlock with multiple mempool allocations. md: fix and update workqueue usage md: use sector_t in bitmap_get_counter md: remove md_mutex locking. md: Fix regression with raid1 arrays without persistent metadata.
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c162
1 files changed, 127 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 225815197a3..4e957f3140a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -57,8 +57,6 @@
#define DEBUG 0
#define dprintk(x...) ((void)(DEBUG && printk(x)))
-static DEFINE_MUTEX(md_mutex);
-
#ifndef MODULE
static void autostart_arrays(int part);
#endif
@@ -69,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock);
static void md_print_devices(void);
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+static struct workqueue_struct *md_wq;
+static struct workqueue_struct *md_misc_wq;
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops;
static int start_readonly;
+/* bio_clone_mddev
+ * like bio_clone, but with a local bio set
+ */
+
+static void mddev_bio_destructor(struct bio *bio)
+{
+ mddev_t *mddev, **mddevp;
+
+ mddevp = (void*)bio;
+ mddev = mddevp[-1];
+
+ bio_free(bio, mddev->bio_set);
+}
+
+struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
+ mddev_t *mddev)
+{
+ struct bio *b;
+ mddev_t **mddevp;
+
+ if (!mddev || !mddev->bio_set)
+ return bio_alloc(gfp_mask, nr_iovecs);
+
+ b = bio_alloc_bioset(gfp_mask, nr_iovecs,
+ mddev->bio_set);
+ if (!b)
+ return NULL;
+ mddevp = (void*)b;
+ mddevp[-1] = mddev;
+ b->bi_destructor = mddev_bio_destructor;
+ return b;
+}
+EXPORT_SYMBOL_GPL(bio_alloc_mddev);
+
+struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
+ mddev_t *mddev)
+{
+ struct bio *b;
+ mddev_t **mddevp;
+
+ if (!mddev || !mddev->bio_set)
+ return bio_clone(bio, gfp_mask);
+
+ b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
+ mddev->bio_set);
+ if (!b)
+ return NULL;
+ mddevp = (void*)b;
+ mddevp[-1] = mddev;
+ b->bi_destructor = mddev_bio_destructor;
+ __bio_clone(b, bio);
+ if (bio_integrity(bio)) {
+ int ret;
+
+ ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
+
+ if (ret < 0) {
+ bio_put(b);
+ return NULL;
+ }
+ }
+
+ return b;
+}
+EXPORT_SYMBOL_GPL(bio_clone_mddev);
+
/*
* We have a system wide 'event count' that is incremented
* on any 'interesting' event, and readers of /proc/mdstat
@@ -300,7 +366,7 @@ static void md_end_flush(struct bio *bio, int err)
if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pre-request flush has finished */
- schedule_work(&mddev->flush_work);
+ queue_work(md_wq, &mddev->flush_work);
}
bio_put(bio);
}
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev)
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- bi = bio_alloc(GFP_KERNEL, 0);
+ bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
bi->bi_end_io = md_end_flush;
bi->bi_private = rdev;
bi->bi_bdev = rdev->bdev;
@@ -369,7 +435,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio)
submit_flushes(mddev);
if (atomic_dec_and_test(&mddev->flush_pending))
- schedule_work(&mddev->flush_work);
+ queue_work(md_wq, &mddev->flush_work);
}
EXPORT_SYMBOL(md_flush_request);
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws);
static void mddev_put(mddev_t *mddev)
{
+ struct bio_set *bs = NULL;
+
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
if (!mddev->raid_disks && list_empty(&mddev->disks) &&
@@ -435,19 +503,22 @@ static void mddev_put(mddev_t *mddev)
/* Array is not configured at all, and not held active,
* so destroy it */
list_del(&mddev->all_mddevs);
+ bs = mddev->bio_set;
+ mddev->bio_set = NULL;
if (mddev->gendisk) {
- /* we did a probe so need to clean up.
- * Call schedule_work inside the spinlock
- * so that flush_scheduled_work() after
- * mddev_find will succeed in waiting for the
- * work to be done.
+ /* We did a probe so need to clean up. Call
+ * queue_work inside the spinlock so that
+ * flush_workqueue() after mddev_find will
+ * succeed in waiting for the work to be done.
*/
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
- schedule_work(&mddev->del_work);
+ queue_work(md_misc_wq, &mddev->del_work);
} else
kfree(mddev);
}
spin_unlock(&all_mddevs_lock);
+ if (bs)
+ bioset_free(bs);
}
void mddev_init(mddev_t *mddev)
@@ -691,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
* if zero is reached.
* If an error occurred, call md_error
*/
- struct bio *bio = bio_alloc(GFP_NOIO, 1);
+ struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
bio->bi_bdev = rdev->bdev;
bio->bi_sector = sector;
@@ -722,16 +793,16 @@ static void bi_complete(struct bio *bio, int error)
complete((struct completion*)bio->bi_private);
}
-int sync_page_io(struct block_device *bdev, sector_t sector, int size,
- struct page *page, int rw)
+int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
+ struct page *page, int rw)
{
- struct bio *bio = bio_alloc(GFP_NOIO, 1);
+ struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
struct completion event;
int ret;
rw |= REQ_SYNC | REQ_UNPLUG;
- bio->bi_bdev = bdev;
+ bio->bi_bdev = rdev->bdev;
bio->bi_sector = sector;
bio_add_page(bio, page, size, 0);
init_completion(&event);
@@ -757,7 +828,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
return 0;
- if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ))
+ if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ))
goto fail;
rdev->sb_loaded = 1;
return 0;
@@ -1850,7 +1921,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
synchronize_rcu();
INIT_WORK(&rdev->del_work, md_delayed_delete);
kobject_get(&rdev->kobj);
- schedule_work(&rdev->del_work);
+ queue_work(md_misc_wq, &rdev->del_work);
}
/*
@@ -2108,6 +2179,8 @@ repeat:
if (!mddev->persistent) {
clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
clear_bit(MD_CHANGE_DEVS, &mddev->flags);
+ if (!mddev->external)
+ clear_bit(MD_CHANGE_PENDING, &mddev->flags);
wake_up(&mddev->sb_wait);
return;
}
@@ -4192,10 +4265,10 @@ static int md_alloc(dev_t dev, char *name)
shift = partitioned ? MdpMinorShift : 0;
unit = MINOR(mddev->unit) >> shift;
- /* wait for any previous instance if this device
- * to be completed removed (mddev_delayed_delete).
+ /* wait for any previous instance of this device to be
+ * completely removed (mddev_delayed_delete).
*/
- flush_scheduled_work();
+ flush_workqueue(md_misc_wq);
mutex_lock(&disks_mutex);
error = -EEXIST;
@@ -4378,6 +4451,9 @@ int md_run(mddev_t *mddev)
sysfs_notify_dirent_safe(rdev->sysfs_state);
}
+ if (mddev->bio_set == NULL)
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
+
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
if (!pers || !try_module_get(pers->owner)) {
@@ -5885,16 +5961,14 @@ static int md_open(struct block_device *bdev, fmode_t mode)
mddev_t *mddev = mddev_find(bdev->bd_dev);
int err;
- mutex_lock(&md_mutex);
if (mddev->gendisk != bdev->bd_disk) {
/* we are racing with mddev_put which is discarding this
* bd_disk.
*/
mddev_put(mddev);
/* Wait until bdev->bd_disk is definitely gone */
- flush_scheduled_work();
+ flush_workqueue(md_misc_wq);
/* Then retry the open from the top */
- mutex_unlock(&md_mutex);
return -ERESTARTSYS;
}
BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -5908,7 +5982,6 @@ static int md_open(struct block_device *bdev, fmode_t mode)
check_disk_size_change(mddev->gendisk, bdev);
out:
- mutex_unlock(&md_mutex);
return err;
}
@@ -5917,10 +5990,8 @@ static int md_release(struct gendisk *disk, fmode_t mode)
mddev_t *mddev = disk->private_data;
BUG_ON(!mddev);
- mutex_lock(&md_mutex);
atomic_dec(&mddev->openers);
mddev_put(mddev);
- mutex_unlock(&md_mutex);
return 0;
}
@@ -6052,7 +6123,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
if (mddev->event_work.func)
- schedule_work(&mddev->event_work);
+ queue_work(md_misc_wq, &mddev->event_work);
md_new_event_inintr(mddev);
}
@@ -7212,12 +7283,23 @@ static void md_geninit(void)
static int __init md_init(void)
{
- if (register_blkdev(MD_MAJOR, "md"))
- return -1;
- if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
- unregister_blkdev(MD_MAJOR, "md");
- return -1;
- }
+ int ret = -ENOMEM;
+
+ md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
+ if (!md_wq)
+ goto err_wq;
+
+ md_misc_wq = alloc_workqueue("md_misc", 0, 0);
+ if (!md_misc_wq)
+ goto err_misc_wq;
+
+ if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
+ goto err_md;
+
+ if ((ret = register_blkdev(0, "mdp")) < 0)
+ goto err_mdp;
+ mdp_major = ret;
+
blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
md_probe, NULL, NULL);
blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
@@ -7228,8 +7310,16 @@ static int __init md_init(void)
md_geninit();
return 0;
-}
+err_mdp:
+ unregister_blkdev(MD_MAJOR, "md");
+err_md:
+ destroy_workqueue(md_misc_wq);
+err_misc_wq:
+ destroy_workqueue(md_wq);
+err_wq:
+ return ret;
+}
#ifndef MODULE
@@ -7316,6 +7406,8 @@ static __exit void md_exit(void)
export_array(mddev);
mddev->hold_active = 0;
}
+ destroy_workqueue(md_misc_wq);
+ destroy_workqueue(md_wq);
}
subsys_initcall(md_init);