diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-29 08:47:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-29 08:47:36 -0700 |
commit | d8d048f69a618c531575cb1f398a7186f0532ef2 (patch) | |
tree | cb8d6ee7a3cb2aea53e5e95de0ca689cc69411a2 /drivers/md/md.c | |
parent | 53113b06e48c6c38f7612c1f8043b8a0d2adf72b (diff) | |
parent | f3ac8bf7ce1c5abd763ea762e95d1cdcf7799372 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
md: tidy up device searches in read_balance.
md/raid1: fix some typos in comments.
md/raid1: discard unused variable.
md: unplug writes to external bitmaps.
md: use separate bio pool for each md device.
md: change type of first arg to sync_page_io.
md/raid1: perform mem allocation before disabling writes during resync.
md: use bio_kmalloc rather than bio_alloc when failure is acceptable.
md: Fix possible deadlock with multiple mempool allocations.
md: fix and update workqueue usage
md: use sector_t in bitmap_get_counter
md: remove md_mutex locking.
md: Fix regression with raid1 arrays without persistent metadata.
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 162 |
1 files changed, 127 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 225815197a3..4e957f3140a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -57,8 +57,6 @@ #define DEBUG 0 #define dprintk(x...) ((void)(DEBUG && printk(x))) -static DEFINE_MUTEX(md_mutex); - #ifndef MODULE static void autostart_arrays(int part); #endif @@ -69,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock); static void md_print_devices(void); static DECLARE_WAIT_QUEUE_HEAD(resync_wait); +static struct workqueue_struct *md_wq; +static struct workqueue_struct *md_misc_wq; #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } @@ -149,6 +149,72 @@ static const struct block_device_operations md_fops; static int start_readonly; +/* bio_clone_mddev + * like bio_clone, but with a local bio set + */ + +static void mddev_bio_destructor(struct bio *bio) +{ + mddev_t *mddev, **mddevp; + + mddevp = (void*)bio; + mddev = mddevp[-1]; + + bio_free(bio, mddev->bio_set); +} + +struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, + mddev_t *mddev) +{ + struct bio *b; + mddev_t **mddevp; + + if (!mddev || !mddev->bio_set) + return bio_alloc(gfp_mask, nr_iovecs); + + b = bio_alloc_bioset(gfp_mask, nr_iovecs, + mddev->bio_set); + if (!b) + return NULL; + mddevp = (void*)b; + mddevp[-1] = mddev; + b->bi_destructor = mddev_bio_destructor; + return b; +} +EXPORT_SYMBOL_GPL(bio_alloc_mddev); + +struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, + mddev_t *mddev) +{ + struct bio *b; + mddev_t **mddevp; + + if (!mddev || !mddev->bio_set) + return bio_clone(bio, gfp_mask); + + b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, + mddev->bio_set); + if (!b) + return NULL; + mddevp = (void*)b; + mddevp[-1] = mddev; + b->bi_destructor = mddev_bio_destructor; + __bio_clone(b, bio); + if (bio_integrity(bio)) { + int ret; + + ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set); + + if (ret < 0) { + bio_put(b); + return NULL; + } + } + + return b; +} +EXPORT_SYMBOL_GPL(bio_clone_mddev); + /* * We have a system wide 'event count' that is incremented * on any 'interesting' event, and readers of /proc/mdstat @@ -300,7 +366,7 @@ static void md_end_flush(struct bio *bio, int err) if (atomic_dec_and_test(&mddev->flush_pending)) { /* The pre-request flush has finished */ - schedule_work(&mddev->flush_work); + queue_work(md_wq, &mddev->flush_work); } bio_put(bio); } @@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev) atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - bi = bio_alloc(GFP_KERNEL, 0); + bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev); bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bi->bi_bdev = rdev->bdev; @@ -369,7 +435,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio) submit_flushes(mddev); if (atomic_dec_and_test(&mddev->flush_pending)) - schedule_work(&mddev->flush_work); + queue_work(md_wq, &mddev->flush_work); } EXPORT_SYMBOL(md_flush_request); @@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws); static void mddev_put(mddev_t *mddev) { + struct bio_set *bs = NULL; + if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; if (!mddev->raid_disks && list_empty(&mddev->disks) && @@ -435,19 +503,22 @@ static void mddev_put(mddev_t *mddev) /* Array is not configured at all, and not held active, * so destroy it */ list_del(&mddev->all_mddevs); + bs = mddev->bio_set; + mddev->bio_set = NULL; if (mddev->gendisk) { - /* we did a probe so need to clean up. - * Call schedule_work inside the spinlock - * so that flush_scheduled_work() after - * mddev_find will succeed in waiting for the - * work to be done. + /* We did a probe so need to clean up. Call + * queue_work inside the spinlock so that + * flush_workqueue() after mddev_find will + * succeed in waiting for the work to be done. */ INIT_WORK(&mddev->del_work, mddev_delayed_delete); - schedule_work(&mddev->del_work); + queue_work(md_misc_wq, &mddev->del_work); } else kfree(mddev); } spin_unlock(&all_mddevs_lock); + if (bs) + bioset_free(bs); } void mddev_init(mddev_t *mddev) @@ -691,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, * if zero is reached. * If an error occurred, call md_error */ - struct bio *bio = bio_alloc(GFP_NOIO, 1); + struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); bio->bi_bdev = rdev->bdev; bio->bi_sector = sector; @@ -722,16 +793,16 @@ static void bi_complete(struct bio *bio, int error) complete((struct completion*)bio->bi_private); } -int sync_page_io(struct block_device *bdev, sector_t sector, int size, - struct page *page, int rw) +int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, + struct page *page, int rw) { - struct bio *bio = bio_alloc(GFP_NOIO, 1); + struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); struct completion event; int ret; rw |= REQ_SYNC | REQ_UNPLUG; - bio->bi_bdev = bdev; + bio->bi_bdev = rdev->bdev; bio->bi_sector = sector; bio_add_page(bio, page, size, 0); init_completion(&event); @@ -757,7 +828,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size) return 0; - if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) + if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ)) goto fail; rdev->sb_loaded = 1; return 0; @@ -1850,7 +1921,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) synchronize_rcu(); INIT_WORK(&rdev->del_work, md_delayed_delete); kobject_get(&rdev->kobj); - schedule_work(&rdev->del_work); + queue_work(md_misc_wq, &rdev->del_work); } /* @@ -2108,6 +2179,8 @@ repeat: if (!mddev->persistent) { clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); + if (!mddev->external) + clear_bit(MD_CHANGE_PENDING, &mddev->flags); wake_up(&mddev->sb_wait); return; } @@ -4192,10 +4265,10 @@ static int md_alloc(dev_t dev, char *name) shift = partitioned ? MdpMinorShift : 0; unit = MINOR(mddev->unit) >> shift; - /* wait for any previous instance if this device - * to be completed removed (mddev_delayed_delete). + /* wait for any previous instance of this device to be + * completely removed (mddev_delayed_delete). */ - flush_scheduled_work(); + flush_workqueue(md_misc_wq); mutex_lock(&disks_mutex); error = -EEXIST; @@ -4378,6 +4451,9 @@ int md_run(mddev_t *mddev) sysfs_notify_dirent_safe(rdev->sysfs_state); } + if (mddev->bio_set == NULL) + mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev)); + spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); if (!pers || !try_module_get(pers->owner)) { @@ -5885,16 +5961,14 @@ static int md_open(struct block_device *bdev, fmode_t mode) mddev_t *mddev = mddev_find(bdev->bd_dev); int err; - mutex_lock(&md_mutex); if (mddev->gendisk != bdev->bd_disk) { /* we are racing with mddev_put which is discarding this * bd_disk. */ mddev_put(mddev); /* Wait until bdev->bd_disk is definitely gone */ - flush_scheduled_work(); + flush_workqueue(md_misc_wq); /* Then retry the open from the top */ - mutex_unlock(&md_mutex); return -ERESTARTSYS; } BUG_ON(mddev != bdev->bd_disk->private_data); @@ -5908,7 +5982,6 @@ static int md_open(struct block_device *bdev, fmode_t mode) check_disk_size_change(mddev->gendisk, bdev); out: - mutex_unlock(&md_mutex); return err; } @@ -5917,10 +5990,8 @@ static int md_release(struct gendisk *disk, fmode_t mode) mddev_t *mddev = disk->private_data; BUG_ON(!mddev); - mutex_lock(&md_mutex); atomic_dec(&mddev->openers); mddev_put(mddev); - mutex_unlock(&md_mutex); return 0; } @@ -6052,7 +6123,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); if (mddev->event_work.func) - schedule_work(&mddev->event_work); + queue_work(md_misc_wq, &mddev->event_work); md_new_event_inintr(mddev); } @@ -7212,12 +7283,23 @@ static void md_geninit(void) static int __init md_init(void) { - if (register_blkdev(MD_MAJOR, "md")) - return -1; - if ((mdp_major=register_blkdev(0, "mdp"))<=0) { - unregister_blkdev(MD_MAJOR, "md"); - return -1; - } + int ret = -ENOMEM; + + md_wq = alloc_workqueue("md", WQ_RESCUER, 0); + if (!md_wq) + goto err_wq; + + md_misc_wq = alloc_workqueue("md_misc", 0, 0); + if (!md_misc_wq) + goto err_misc_wq; + + if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) + goto err_md; + + if ((ret = register_blkdev(0, "mdp")) < 0) + goto err_mdp; + mdp_major = ret; + blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, md_probe, NULL, NULL); blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, @@ -7228,8 +7310,16 @@ static int __init md_init(void) md_geninit(); return 0; -} +err_mdp: + unregister_blkdev(MD_MAJOR, "md"); +err_md: + destroy_workqueue(md_misc_wq); +err_misc_wq: + destroy_workqueue(md_wq); +err_wq: + return ret; +} #ifndef MODULE @@ -7316,6 +7406,8 @@ static __exit void md_exit(void) export_array(mddev); mddev->hold_active = 0; } + destroy_workqueue(md_misc_wq); + destroy_workqueue(md_wq); } subsys_initcall(md_init); |