From 2fc9f6baa24ac230166df41ed636224969523341 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Mon, 13 Oct 2014 12:42:12 +0800 Subject: Btrfs: return failure if btrfs_dev_replace_finishing() failed device replace could fail due to another running scrub process or any other errors btrfs_scrub_dev() may hit, but this failure doesn't get returned to userspace. The following steps could reproduce this issue mkfs -t btrfs -f /dev/sdb1 /dev/sdb2 mount /dev/sdb1 /mnt/btrfs while true; do btrfs scrub start -B /mnt/btrfs >/dev/null 2>&1; done & btrfs replace start -Bf /dev/sdb2 /dev/sdb3 /mnt/btrfs # if this replace succeeded, do the following and repeat until # you see this log in dmesg # BTRFS: btrfs_scrub_dev(/dev/sdb2, 2, /dev/sdb3) failed -115 #btrfs replace start -Bf /dev/sdb3 /dev/sdb2 /mnt/btrfs # once you see the error log in dmesg, check return value of # replace echo $? Introduce a new dev replace result BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS to catch -EINPROGRESS explicitly and return other errors directly to userspace. Signed-off-by: Eryu Guan Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 6f662b34ba0..971c061eb1c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -422,9 +422,15 @@ int btrfs_dev_replace_start(struct btrfs_root *root, &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(root->fs_info, ret); - WARN_ON(ret); + /* don't warn if EINPROGRESS, someone else might be running scrub */ + if (ret == -EINPROGRESS) { + args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; + ret = 0; + } else { + WARN_ON(ret); + } - return 0; + return ret; leave: dev_replace->srcdev = NULL; @@ -542,7 +548,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); - return 0; + return scrub_ret; } printk_in_rcu(KERN_INFO -- cgit v1.2.3-70-g09d2 From 084b6e7c7607bbeb28544da659c3f5981a4689b0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 30 Oct 2014 16:52:31 +0800 Subject: btrfs: Fix a lockdep warning when running xfstest. The following lockdep warning is triggered during xfstests: [ 1702.980872] ========================================================= [ 1702.981181] [ INFO: possible irq lock inversion dependency detected ] [ 1702.981482] 3.18.0-rc1 #27 Not tainted [ 1702.981781] --------------------------------------------------------- [ 1702.982095] kswapd0/77 just changed the state of lock: [ 1702.982415] (&delayed_node->mutex){+.+.-.}, at: [] __btrfs_release_delayed_node+0x41/0x1f0 [btrfs] [ 1702.982794] but this lock took another, RECLAIM_FS-unsafe lock in the past: [ 1702.983160] (&fs_info->dev_replace.lock){+.+.+.} and interrupts could create inverse lock ordering between them. [ 1702.984675] other info that might help us debug this: [ 1702.985524] Chain exists of: &delayed_node->mutex --> &found->groups_sem --> &fs_info->dev_replace.lock [ 1702.986799] Possible interrupt unsafe locking scenario: [ 1702.987681] CPU0 CPU1 [ 1702.988137] ---- ---- [ 1702.988598] lock(&fs_info->dev_replace.lock); [ 1702.989069] local_irq_disable(); [ 1702.989534] lock(&delayed_node->mutex); [ 1702.990038] lock(&found->groups_sem); [ 1702.990494] [ 1702.990938] lock(&delayed_node->mutex); [ 1702.991407] *** DEADLOCK *** It is because the btrfs_kobj_{add/rm}_device() will call memory allocation with GFP_KERNEL, which may flush fs page cache to free space, waiting for it self to do the commit, causing the deadlock. To solve the problem, move btrfs_kobj_{add/rm}_device() out of the dev_replace lock range, also involing split the btrfs_rm_dev_replace_srcdev() function into remove and free parts. Now only btrfs_rm_dev_replace_remove_srcdev() is called in dev_replace lock range, and kobj_{add/rm} and btrfs_rm_dev_replace_free_srcdev() are called out of the lock range. Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 11 ++++++----- fs/btrfs/volumes.c | 10 ++++++++-- fs/btrfs/volumes.h | 6 ++++-- 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 971c061eb1c..3fbd0628620 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -577,15 +577,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); fs_info->fs_devices->rw_devices++; - /* replace the sysfs entry */ - btrfs_kobj_rm_device(fs_info, src_device); - btrfs_kobj_add_device(fs_info, tgt_device); - btrfs_dev_replace_unlock(dev_replace); btrfs_rm_dev_replace_blocked(fs_info); - btrfs_rm_dev_replace_srcdev(fs_info, src_device); + btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); btrfs_rm_dev_replace_unblocked(fs_info); @@ -600,6 +596,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&uuid_mutex); + /* replace the sysfs entry */ + btrfs_kobj_rm_device(fs_info, src_device); + btrfs_kobj_add_device(fs_info, tgt_device); + btrfs_rm_dev_replace_free_srcdev(fs_info, src_device); + /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); if (!IS_ERR(trans)) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d47289c715c..01920515f90 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1800,8 +1800,8 @@ error_undo: goto error_brelse; } -void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *srcdev) +void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, + struct btrfs_device *srcdev) { struct btrfs_fs_devices *fs_devices; @@ -1829,6 +1829,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->bdev) fs_devices->open_devices--; +} + +void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, + struct btrfs_device *srcdev) +{ + struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; call_rcu(&srcdev->rcu, free_device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 08980fa2303..4cc00e64427 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -448,8 +448,10 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *srcdev); +void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, + struct btrfs_device *srcdev); +void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, + struct btrfs_device *srcdev); void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, struct btrfs_device *tgtdev); void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, -- cgit v1.2.3-70-g09d2 From 4245215d6a8dba1a51c50533b6667919687c0b89 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 25 Nov 2014 16:39:28 +0800 Subject: Btrfs, raid56: fix use-after-free problem in the final device replace procedure on raid56 The commit c404e0dc (Btrfs: fix use-after-free in the finishing procedure of the device replace) fixed a use-after-free problem which happened when removing the source device at the end of device replace, but at that time, btrfs didn't support device replace on raid56, so we didn't fix the problem on the raid56 profile. Currently, we implemented device replace for raid56, so we need kick that problem out before we enable that function for raid56. The fix method is very simple, we just increase the bio per-cpu counter before we submit a raid56 io, and decrease the counter when the raid56 io ends. Signed-off-by: Miao Xie --- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/dev-replace.c | 4 ++-- fs/btrfs/raid56.c | 41 ++++++++++++++++++++++++++++++++--------- fs/btrfs/raid56.h | 4 ++-- fs/btrfs/scrub.c | 2 +- fs/btrfs/volumes.c | 7 ++----- 6 files changed, 45 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe69edda11f..470e3177a7e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4097,7 +4097,12 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, /* dev-replace.c */ void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); -void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); +void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount); + +static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) +{ + btrfs_bio_counter_sub(fs_info, 1); +} /* reada.c */ struct reada_control { diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 6f662b34ba0..fa27b4e3b6c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -920,9 +920,9 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) percpu_counter_inc(&fs_info->bio_counter); } -void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) +void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount) { - percpu_counter_dec(&fs_info->bio_counter); + percpu_counter_sub(&fs_info->bio_counter, amount); if (waitqueue_active(&fs_info->replace_wait)) wake_up(&fs_info->replace_wait); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5ece565bc5f..8ab2a17bbba 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -162,6 +162,8 @@ struct btrfs_raid_bio { */ int bio_list_bytes; + int generic_bio_cnt; + atomic_t refs; atomic_t stripes_pending; @@ -354,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest, { bio_list_merge(&dest->bio_list, &victim->bio_list); dest->bio_list_bytes += victim->bio_list_bytes; + dest->generic_bio_cnt += victim->generic_bio_cnt; bio_list_init(&victim->bio_list); } @@ -891,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) { struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *next; + + if (rbio->generic_bio_cnt) + btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); + free_raid_bio(rbio); while (cur) { @@ -1775,6 +1782,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; + int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); if (IS_ERR(rbio)) { @@ -1785,12 +1793,19 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, rbio->bio_list_bytes = bio->bi_iter.bi_size; rbio->operation = BTRFS_RBIO_WRITE; + btrfs_bio_counter_inc_noblocked(root->fs_info); + rbio->generic_bio_cnt = 1; + /* * don't plug on full rbios, just get them out the door * as quickly as we can */ - if (rbio_is_full(rbio)) - return full_stripe_write(rbio); + if (rbio_is_full(rbio)) { + ret = full_stripe_write(rbio); + if (ret) + btrfs_bio_counter_dec(root->fs_info); + return ret; + } cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, sizeof(*plug)); @@ -1801,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, INIT_LIST_HEAD(&plug->rbio_list); } list_add_tail(&rbio->plug_list, &plug->rbio_list); + ret = 0; } else { - return __raid56_parity_write(rbio); + ret = __raid56_parity_write(rbio); + if (ret) + btrfs_bio_counter_dec(root->fs_info); } - return 0; + return ret; } /* @@ -2139,19 +2157,17 @@ cleanup: */ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, - u64 stripe_len, int mirror_num, int hold_bbio) + u64 stripe_len, int mirror_num, int generic_io) { struct btrfs_raid_bio *rbio; int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); if (IS_ERR(rbio)) { - __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio); + __free_bbio_and_raid_map(bbio, raid_map, generic_io); return PTR_ERR(rbio); } - if (hold_bbio) - set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); rbio->operation = BTRFS_RBIO_READ_REBUILD; bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_iter.bi_size; @@ -2159,11 +2175,18 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); - __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio); + __free_bbio_and_raid_map(bbio, raid_map, generic_io); kfree(rbio); return -EIO; } + if (generic_io) { + btrfs_bio_counter_inc_noblocked(root->fs_info); + rbio->generic_bio_cnt = 1; + } else { + set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); + } + /* * reconstruct from the q stripe if they are * asking for mirror 3 diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 3d4ddb3d861..31d4a157b5e 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -43,8 +43,8 @@ struct btrfs_raid_bio; struct btrfs_device; int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, - struct btrfs_bio *bbio, u64 *raid_map, - u64 stripe_len, int mirror_num, int hold_bbio); + struct btrfs_bio *bbio, u64 *raid_map, + u64 stripe_len, int mirror_num, int generic_io); int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, u64 stripe_len); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0ae837fd676..27f2e16cd25 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1477,7 +1477,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, page->recover->raid_map, page->recover->map_length, - page->mirror_num, 1); + page->mirror_num, 0); if (ret) return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6d8a5e8d8c3..cbb766577f3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5843,12 +5843,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } else { ret = raid56_parity_recover(root, bio, bbio, raid_map, map_length, - mirror_num, 0); + mirror_num, 1); } - /* - * FIXME, replace dosen't support raid56 yet, please fix - * it in the future. - */ + btrfs_bio_counter_dec(root->fs_info); return ret; } -- cgit v1.2.3-70-g09d2 From 5d3edd8f44aac94de7b16f4c54290e24f5e8c532 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 13 Nov 2014 11:45:38 +0800 Subject: Btrfs, replace: enable dev-replace for raid56 Signed-off-by: Zhao Lei Signed-off-by: Miao Xie --- fs/btrfs/dev-replace.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index fa27b4e3b6c..0bf41f8b1e2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -316,11 +316,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; - if (btrfs_fs_incompat(fs_info, RAID56)) { - btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); - return -EOPNOTSUPP; - } - switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: -- cgit v1.2.3-70-g09d2