From 63dd86fa79db737a50f47488e5249f24e5acebc1 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 13 Aug 2014 14:24:21 +0800 Subject: btrfs: fix rw_devices miss match after seed replace reproducer: reproducer: mount /dev/sdb /btrfs btrfs dev add /dev/sdc /btrfs btrfs rep start -B /dev/sdb /dev/sdd /btrfs umount /btrfs WARNING: CPU: 0 PID: 3882 at fs/btrfs/volumes.c:892 __btrfs_close_devices+0x1c8/0x200 [btrfs]() which is WARN_ON(fs_devices->rw_devices); The problem here is that we did not add one to the rw_devices when we replace the seed device with a writable device. Signed-off-by: Anand Jain Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index eea26e1b2fd..fb0a7fa2f70 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -562,6 +562,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, if (fs_info->fs_devices->latest_bdev == src_device->bdev) fs_info->fs_devices->latest_bdev = tgt_device->bdev; list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); + if (src_device->fs_devices->seeding) + fs_info->fs_devices->rw_devices++; /* replace the sysfs entry */ btrfs_kobj_rm_device(fs_info, src_device); -- cgit v1.2.3-70-g09d2 From de4c296f63b43794df453a3fffbb4163ccd1c6af Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 13 Aug 2014 14:24:25 +0800 Subject: btrfs: fix typo in the log message there is no matching open parenthesis for the closing parenthesis Signed-off-by: Anand Jain Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index fb0a7fa2f70..64657b3ae97 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -542,7 +542,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } printk_in_rcu(KERN_INFO - "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n", + "BTRFS: dev_replace from %s (devid %llu) to %s finished\n", src_device->missing ? "" : rcu_str_deref(src_device->name), src_device->devid, -- cgit v1.2.3-70-g09d2 From 12b894cb288d57292b01cf158177b6d5c89a6272 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Aug 2014 16:10:15 +0800 Subject: btrfs: Fix a deadlock in btrfs_dev_replace_finishing() btrfs-transacion:5657 [stack snip] btrfs_bio_map() btrfs_bio_counter_inc_blocked() percpu_counter_inc(&fs_info->bio_counter) ###bio_counter > 0(A) __btrfs_bio_map() btrfs_dev_replace_lock() mutex_lock(dev_replace->lock) ###wait mutex(B) btrfs:32612 [stack snip] btrfs_dev_replace_start() btrfs_dev_replace_lock() mutex_lock(dev_replace->lock) ###hold mutex(B) btrfs_dev_replace_finishing() btrfs_rm_dev_replace_blocked() wait until percpu_counter_sum == 0 ###wait on bio_counter(A) This bug can be triggered quite easily by the following test script: http://pastebin.com/MQmb37Cy This patch will fix the ABBA problem by calling btrfs_dev_replace_unlock() before btrfs_rm_dev_replace_blocked(). The consistency of btrfs devices list and their superblocks is protected by device_list_mutex, not btrfs_dev_replace_lock/unlock(). So it is safe the move btrfs_dev_replace_unlock() before btrfs_rm_dev_replace_blocked(). Reported-by: Zhao Lei Signed-off-by: Qu Wenruo Cc: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 64657b3ae97..a85b5f53856 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -569,6 +569,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_kobj_rm_device(fs_info, src_device); btrfs_kobj_add_device(fs_info, tgt_device); + btrfs_dev_replace_unlock(dev_replace); + btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_srcdev(fs_info, src_device); @@ -582,7 +584,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * superblock is scratched out so that it is no longer marked to * belong to this filesystem. */ - btrfs_dev_replace_unlock(dev_replace); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->chunk_mutex); -- cgit v1.2.3-70-g09d2 From c7662111c741bc04a7192f2a00aad608cbc0b205 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:31 +0800 Subject: Btrfs: cleanup double assignment of device->bytes_used when device replace finishes Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index a85b5f53856..10dfb41f4c2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -550,7 +550,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, tgt_device->is_tgtdev_for_dev_replace = 0; tgt_device->devid = src_device->devid; src_device->devid = BTRFS_DEV_REPLACE_DEVID; - tgt_device->bytes_used = src_device->bytes_used; memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); -- cgit v1.2.3-70-g09d2 From 1c43366d3b3f0fa6c6e81aaf3aa18e0550245dad Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:32 +0800 Subject: Btrfs: fix unprotected assignment of the target device We didn't protect the assignment of the target device, it might cause the problem that the super block update was skipped because we might find wrong size of the target device during the assignment. Fix it by moving the assignment sentences into the initialization function of the target device. And there is another merit that we can check if the target device is suitable more early. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 32 ++++++++------------------------ fs/btrfs/volumes.c | 23 +++++++++++++++++++---- fs/btrfs/volumes.h | 1 + 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 10dfb41f4c2..72dc02e8294 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -330,29 +330,19 @@ int btrfs_dev_replace_start(struct btrfs_root *root, return -EINVAL; mutex_lock(&fs_info->volume_mutex); - ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, - &tgt_device); - if (ret) { - btrfs_err(fs_info, "target device %s is invalid!", - args->start.tgtdev_name); - mutex_unlock(&fs_info->volume_mutex); - return -EINVAL; - } - ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, args->start.srcdev_name, &src_device); - mutex_unlock(&fs_info->volume_mutex); if (ret) { - ret = -EINVAL; - goto leave_no_lock; + mutex_unlock(&fs_info->volume_mutex); + return ret; } - if (tgt_device->total_bytes < src_device->total_bytes) { - btrfs_err(fs_info, "target device is smaller than source device!"); - ret = -EINVAL; - goto leave_no_lock; - } + ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, + src_device, &tgt_device); + mutex_unlock(&fs_info->volume_mutex); + if (ret) + return ret; btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { @@ -380,10 +370,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, src_device->devid, rcu_str_deref(tgt_device->name)); - tgt_device->total_bytes = src_device->total_bytes; - tgt_device->disk_total_bytes = src_device->disk_total_bytes; - tgt_device->bytes_used = src_device->bytes_used; - /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). @@ -426,9 +412,7 @@ leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; btrfs_dev_replace_unlock(dev_replace); -leave_no_lock: - if (tgt_device) - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); + btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 483fc6d4529..1646659f280 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2295,6 +2295,7 @@ error: } int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, + struct btrfs_device *srcdev, struct btrfs_device **device_out) { struct request_queue *q; @@ -2307,24 +2308,37 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, int ret = 0; *device_out = NULL; - if (fs_info->fs_devices->seeding) + if (fs_info->fs_devices->seeding) { + btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; + } bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, fs_info->bdev_holder); - if (IS_ERR(bdev)) + if (IS_ERR(bdev)) { + btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev); + } filemap_write_and_wait(bdev->bd_inode->i_mapping); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { + btrfs_err(fs_info, "target device is in the filesystem!"); ret = -EEXIST; goto error; } } + + if (i_size_read(bdev->bd_inode) < srcdev->total_bytes) { + btrfs_err(fs_info, "target device is smaller than source device!"); + ret = -EINVAL; + goto error; + } + + device = btrfs_alloc_device(NULL, &devid, NULL); if (IS_ERR(device)) { ret = PTR_ERR(device); @@ -2348,8 +2362,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->io_width = root->sectorsize; device->io_align = root->sectorsize; device->sector_size = root->sectorsize; - device->total_bytes = i_size_read(bdev->bd_inode); - device->disk_total_bytes = device->total_bytes; + device->total_bytes = srcdev->total_bytes; + device->disk_total_bytes = srcdev->disk_total_bytes; + device->bytes_used = srcdev->bytes_used; device->dev_root = fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 37f8bff97df..e15f2886d33 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -322,6 +322,7 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, + struct btrfs_device *srcdev, struct btrfs_device **device_out); int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); -- cgit v1.2.3-70-g09d2 From 935e5cc935bcbf9b3d0dd59fed7dbc0f2ebca6bc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:33 +0800 Subject: Btrfs: fix wrong disk size when writing super blocks total_size will be changed when resizing a device, and disk_total_size will be changed if resizing is successful. Meanwhile, the on-disk super blocks of the previous transaction might not be updated. Considering the consistency of the metadata in the previous transaction, We should use the size in the previous transaction to check if the super block is beyond the boundary of the device. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 2 +- fs/btrfs/dev-replace.c | 18 ++++++++++++++++++ fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/scrub.c | 3 ++- fs/btrfs/transaction.c | 2 ++ fs/btrfs/volumes.c | 40 +++++++++++++++++++++++++++++++++++++++- fs/btrfs/volumes.h | 18 ++++++++++++++++++ 7 files changed, 83 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e0033c843ce..cb7f3fe9c9f 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -807,7 +807,7 @@ static int btrfsic_process_superblock_dev_mirror( /* super block bytenr is always the unmapped device bytenr */ dev_bytenr = btrfs_sb_offset(superblock_mirror_num); - if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) + if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) return -1; bh = __bread(superblock_bdev, dev_bytenr / 4096, BTRFS_SUPER_INFO_SIZE); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 72dc02e8294..7877b0fc6a8 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -168,6 +168,8 @@ no_valid_dev_replace_entry_found: dev_replace->srcdev->total_bytes; dev_replace->tgtdev->disk_total_bytes = dev_replace->srcdev->disk_total_bytes; + dev_replace->tgtdev->commit_total_bytes = + dev_replace->srcdev->commit_total_bytes; dev_replace->tgtdev->bytes_used = dev_replace->srcdev->bytes_used; } @@ -329,6 +331,20 @@ int btrfs_dev_replace_start(struct btrfs_root *root, args->start.tgtdev_name[0] == '\0') return -EINVAL; + /* + * Here we commit the transaction to make sure commit_total_bytes + * of all the devices are updated. + */ + trans = btrfs_attach_transaction(root); + if (!IS_ERR(trans)) { + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + } else if (PTR_ERR(trans) != -ENOENT) { + return PTR_ERR(trans); + } + + /* the disk copy procedure reuses the scrub code */ mutex_lock(&fs_info->volume_mutex); ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, args->start.srcdev_name, @@ -539,6 +555,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); tgt_device->total_bytes = src_device->total_bytes; tgt_device->disk_total_bytes = src_device->disk_total_bytes; + ASSERT(list_empty(&src_device->resized_list)); + tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->bytes_used = src_device->bytes_used; if (fs_info->sb->s_bdev == src_device->bdev) fs_info->sb->s_bdev = tgt_device->bdev; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dbd792754b2..0cd18b72555 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3127,7 +3127,8 @@ static int write_dev_supers(struct btrfs_device *device, for (i = 0; i < max_mirrors; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + device->commit_total_bytes) break; if (wait) { @@ -3444,7 +3445,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, - dev->disk_total_bytes); + dev->commit_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 72c8981e7c0..9d80e37044d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2840,7 +2840,8 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE > + scrub_dev->commit_total_bytes) break; ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e336646508f..2f7c0bef404 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1868,6 +1868,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, sizeof(*root->fs_info->super_copy)); + btrfs_update_commit_device_size(root->fs_info); + spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1646659f280..7b5c04259a6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -74,6 +74,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) mutex_init(&fs_devs->device_list_mutex); INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->resized_devices); INIT_LIST_HEAD(&fs_devs->alloc_list); INIT_LIST_HEAD(&fs_devs->list); @@ -154,6 +155,7 @@ static struct btrfs_device *__alloc_device(void) INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); + INIT_LIST_HEAD(&dev->resized_list); spin_lock_init(&dev->io_lock); @@ -2168,6 +2170,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->sector_size = root->sectorsize; device->total_bytes = i_size_read(bdev->bd_inode); device->disk_total_bytes = device->total_bytes; + device->commit_total_bytes = device->total_bytes; device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; @@ -2364,6 +2367,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->sector_size = root->sectorsize; device->total_bytes = srcdev->total_bytes; device->disk_total_bytes = srcdev->disk_total_bytes; + ASSERT(list_empty(&srcdev->resized_list)); + device->commit_total_bytes = srcdev->commit_total_bytes; device->bytes_used = srcdev->bytes_used; device->dev_root = fs_info->dev_root; device->bdev = bdev; @@ -2448,6 +2453,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, { struct btrfs_super_block *super_copy = device->dev_root->fs_info->super_copy; + struct btrfs_fs_devices *fs_devices; u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = new_size - device->total_bytes; @@ -2457,12 +2463,17 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, device->is_tgtdev_for_dev_replace) return -EINVAL; + fs_devices = device->dev_root->fs_info->fs_devices; + btrfs_set_super_total_bytes(super_copy, old_total + diff); device->fs_devices->total_rw_bytes += diff; device->total_bytes = new_size; device->disk_total_bytes = new_size; btrfs_clear_space_info_full(device->dev_root->fs_info); + if (list_empty(&device->resized_list)) + list_add_tail(&device->resized_list, + &fs_devices->resized_devices); return btrfs_update_device(trans, device); } @@ -4011,8 +4022,11 @@ again: } lock_chunks(root); - device->disk_total_bytes = new_size; + if (list_empty(&device->resized_list)) + list_add_tail(&device->resized_list, + &root->fs_info->fs_devices->resized_devices); + /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); if (ret) { @@ -5993,6 +6007,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->devid = btrfs_device_id(leaf, dev_item); device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); device->total_bytes = device->disk_total_bytes; + device->commit_total_bytes = device->disk_total_bytes; device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); device->type = btrfs_device_type(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item); @@ -6520,3 +6535,26 @@ int btrfs_scratch_superblock(struct btrfs_device *device) return 0; } + +/* + * Update the size of all devices, which is used for writing out the + * super blocks. + */ +void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *curr, *next; + + if (list_empty(&fs_devices->resized_devices)) + return; + + mutex_lock(&fs_devices->device_list_mutex); + lock_chunks(fs_info->dev_root); + list_for_each_entry_safe(curr, next, &fs_devices->resized_devices, + resized_list) { + list_del_init(&curr->resized_list); + curr->commit_total_bytes = curr->disk_total_bytes; + } + unlock_chunks(fs_info->dev_root); + mutex_unlock(&fs_devices->device_list_mutex); +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index e15f2886d33..b30d018fa35 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -87,6 +87,21 @@ struct btrfs_device { /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_UUID_SIZE]; + /* + * size of the device on the current transaction + * + * This variant is update when committing the transaction, + * and protected by device_list_mutex + */ + u64 commit_total_bytes; + + /* + * used to manage the device which is resized + * + * It is protected by chunk_lock. + */ + struct list_head resized_list; + /* for sending down flush barriers */ int nobarriers; struct bio *flush_bio; @@ -136,6 +151,7 @@ struct btrfs_fs_devices { struct mutex device_list_mutex; struct list_head devices; + struct list_head resized_devices; /* devices not currently being allocated */ struct list_head alloc_list; struct list_head list; @@ -402,4 +418,6 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, { btrfs_dev_stat_set(dev, index, 0); } + +void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); #endif -- cgit v1.2.3-70-g09d2 From ce7213c70c37e3a66bc0b50c45edcbfea505f62f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:34 +0800 Subject: Btrfs: fix wrong device bytes_used in the super block device->bytes_used will be changed when allocating a new chunk, and disk_total_size will be changed if resizing is successful. Meanwhile, the on-disk super blocks of the previous transaction might not be updated. Considering the consistency of the metadata in the previous transaction, We should use the size in the previous transaction to check if the super block is beyond the boundary of the device. Though it is not big problem because we don't use it now, but anyway it is better that we make it be consistent with the common metadata, maybe we will use it in the future. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 3 +++ fs/btrfs/disk-io.c | 3 ++- fs/btrfs/transaction.c | 1 + fs/btrfs/volumes.c | 27 +++++++++++++++++++++++++++ fs/btrfs/volumes.h | 4 ++++ 5 files changed, 37 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7877b0fc6a8..1be03d85d26 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -172,6 +172,8 @@ no_valid_dev_replace_entry_found: dev_replace->srcdev->commit_total_bytes; dev_replace->tgtdev->bytes_used = dev_replace->srcdev->bytes_used; + dev_replace->tgtdev->commit_bytes_used = + dev_replace->srcdev->commit_bytes_used; } dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; btrfs_init_dev_replace_tgtdev_for_resume(fs_info, @@ -558,6 +560,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ASSERT(list_empty(&src_device->resized_list)); tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->bytes_used = src_device->bytes_used; + tgt_device->commit_bytes_used = src_device->bytes_used; if (fs_info->sb->s_bdev == src_device->bdev) fs_info->sb->s_bdev = tgt_device->bdev; if (fs_info->fs_devices->latest_bdev == src_device->bdev) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0cd18b72555..a224fb9b34a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3446,7 +3446,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->commit_total_bytes); - btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_bytes_used(dev_item, + dev->commit_bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2f7c0bef404..16d0c1b62b3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1869,6 +1869,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, sizeof(*root->fs_info->super_copy)); btrfs_update_commit_device_size(root->fs_info); + btrfs_update_commit_device_bytes_used(root, cur_trans); spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7b5c04259a6..f8273bb53b3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2370,6 +2370,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, ASSERT(list_empty(&srcdev->resized_list)); device->commit_total_bytes = srcdev->commit_total_bytes; device->bytes_used = srcdev->bytes_used; + device->commit_bytes_used = device->bytes_used; device->dev_root = fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; @@ -6009,6 +6010,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->total_bytes = device->disk_total_bytes; device->commit_total_bytes = device->disk_total_bytes; device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->commit_bytes_used = device->bytes_used; device->type = btrfs_device_type(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item); device->io_width = btrfs_device_io_width(leaf, dev_item); @@ -6558,3 +6560,28 @@ void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) unlock_chunks(fs_info->dev_root); mutex_unlock(&fs_devices->device_list_mutex); } + +/* Must be invoked during the transaction commit */ +void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, + struct btrfs_transaction *transaction) +{ + struct extent_map *em; + struct map_lookup *map; + struct btrfs_device *dev; + int i; + + if (list_empty(&transaction->pending_chunks)) + return; + + /* In order to kick the device replace finish process */ + lock_chunks(root); + list_for_each_entry(em, &transaction->pending_chunks, list) { + map = (struct map_lookup *)em->bdev; + + for (i = 0; i < map->num_stripes; i++) { + dev = map->stripes[i].dev; + dev->commit_bytes_used = dev->bytes_used; + } + } + unlock_chunks(root); +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index b30d018fa35..f79d532fedb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -95,6 +95,8 @@ struct btrfs_device { */ u64 commit_total_bytes; + /* bytes used on the current transaction */ + u64 commit_bytes_used; /* * used to manage the device which is resized * @@ -420,4 +422,6 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, } void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); +void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, + struct btrfs_transaction *transaction); #endif -- cgit v1.2.3-70-g09d2 From 7cc8e58d53cd2295c3c1cee7b503bd1790ea4486 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:38 +0800 Subject: Btrfs: fix unprotected device's variants on 32bits machine ->total_bytes,->disk_total_bytes,->bytes_used is protected by chunk lock when we change them, but sometimes we read them without any lock, and we might get unexpected value. We fix this problem like inode's i_size. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 15 +++++---- fs/btrfs/ioctl.c | 6 ++-- fs/btrfs/volumes.c | 48 +++++++++++++++++------------ fs/btrfs/volumes.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 29 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 1be03d85d26..da7ac1432b1 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -418,7 +418,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, - src_device->total_bytes, + btrfs_device_get_total_bytes(src_device), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(root->fs_info, ret); @@ -555,11 +555,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); - tgt_device->total_bytes = src_device->total_bytes; - tgt_device->disk_total_bytes = src_device->disk_total_bytes; + btrfs_device_set_total_bytes(tgt_device, src_device->total_bytes); + btrfs_device_set_disk_total_bytes(tgt_device, + src_device->disk_total_bytes); + btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); ASSERT(list_empty(&src_device->resized_list)); tgt_device->commit_total_bytes = src_device->commit_total_bytes; - tgt_device->bytes_used = src_device->bytes_used; tgt_device->commit_bytes_used = src_device->bytes_used; if (fs_info->sb->s_bdev == src_device->bdev) fs_info->sb->s_bdev = tgt_device->bdev; @@ -650,6 +651,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; + struct btrfs_device *srcdev; btrfs_dev_replace_lock(dev_replace); /* even if !dev_replace_is_valid, the values are good enough for @@ -672,8 +674,9 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: + srcdev = dev_replace->srcdev; args->status.progress_1000 = div64_u64(dev_replace->cursor_left, - div64_u64(dev_replace->srcdev->total_bytes, 1000)); + div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); break; } btrfs_dev_replace_unlock(dev_replace); @@ -832,7 +835,7 @@ static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info) ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, dev_replace->committed_cursor_left, - dev_replace->srcdev->total_bytes, + btrfs_device_get_total_bytes(dev_replace->srcdev), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(fs_info, ret); WARN_ON(ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f60d1ca389f..0ff212757b9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1553,7 +1553,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, goto out_free; } - old_size = device->total_bytes; + old_size = btrfs_device_get_total_bytes(device); if (mod < 0) { if (new_size > old_size) { @@ -2740,8 +2740,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) } di_args->devid = dev->devid; - di_args->bytes_used = dev->bytes_used; - di_args->total_bytes = dev->total_bytes; + di_args->bytes_used = btrfs_device_get_bytes_used(dev); + di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); if (dev->name) { struct rcu_string *name; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d8e4a3d1ad8..41da102cdcc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1308,7 +1308,7 @@ again: if (device->bytes_used > 0) { u64 len = btrfs_dev_extent_length(leaf, extent); - device->bytes_used -= len; + btrfs_device_set_bytes_used(device, device->bytes_used - len); spin_lock(&root->fs_info->free_chunk_lock); root->fs_info->free_chunk_space += len; spin_unlock(&root->fs_info->free_chunk_lock); @@ -1462,8 +1462,10 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); - btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_set_device_total_bytes(leaf, dev_item, + btrfs_device_get_disk_total_bytes(device)); + btrfs_set_device_bytes_used(leaf, dev_item, + btrfs_device_get_bytes_used(device)); btrfs_set_device_group(leaf, dev_item, 0); btrfs_set_device_seek_speed(leaf, dev_item, 0); btrfs_set_device_bandwidth(leaf, dev_item, 0); @@ -2330,7 +2332,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } - if (i_size_read(bdev->bd_inode) < srcdev->total_bytes) { + if (i_size_read(bdev->bd_inode) < + btrfs_device_get_total_bytes(srcdev)) { btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto error; @@ -2360,11 +2363,11 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->io_width = root->sectorsize; device->io_align = root->sectorsize; device->sector_size = root->sectorsize; - device->total_bytes = srcdev->total_bytes; - device->disk_total_bytes = srcdev->disk_total_bytes; + device->total_bytes = btrfs_device_get_total_bytes(srcdev); + device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev); + device->bytes_used = btrfs_device_get_bytes_used(srcdev); ASSERT(list_empty(&srcdev->resized_list)); device->commit_total_bytes = srcdev->commit_total_bytes; - device->bytes_used = srcdev->bytes_used; device->commit_bytes_used = device->bytes_used; device->dev_root = fs_info->dev_root; device->bdev = bdev; @@ -2435,8 +2438,10 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); - btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_set_device_total_bytes(leaf, dev_item, + btrfs_device_get_disk_total_bytes(device)); + btrfs_set_device_bytes_used(leaf, dev_item, + btrfs_device_get_bytes_used(device)); btrfs_mark_buffer_dirty(leaf); out: @@ -2464,8 +2469,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, btrfs_set_super_total_bytes(super_copy, old_total + diff); device->fs_devices->total_rw_bytes += diff; - device->total_bytes = new_size; - device->disk_total_bytes = new_size; + btrfs_device_set_total_bytes(device, new_size); + btrfs_device_set_disk_total_bytes(device, new_size); btrfs_clear_space_info_full(device->dev_root->fs_info); if (list_empty(&device->resized_list)) list_add_tail(&device->resized_list, @@ -3110,11 +3115,12 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - old_size = device->total_bytes; + old_size = btrfs_device_get_total_bytes(device); size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); if (!device->writeable || - device->total_bytes - device->bytes_used > size_to_free || + btrfs_device_get_total_bytes(device) - + btrfs_device_get_bytes_used(device) > size_to_free || device->is_tgtdev_for_dev_replace) continue; @@ -3920,8 +3926,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) struct btrfs_key key; struct btrfs_super_block *super_copy = root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); - u64 old_size = device->total_bytes; - u64 diff = device->total_bytes - new_size; + u64 old_size = btrfs_device_get_total_bytes(device); + u64 diff = old_size - new_size; if (device->is_tgtdev_for_dev_replace) return -EINVAL; @@ -3934,7 +3940,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) lock_chunks(root); - device->total_bytes = new_size; + btrfs_device_set_total_bytes(device, new_size); if (device->writeable) { device->fs_devices->total_rw_bytes -= diff; spin_lock(&root->fs_info->free_chunk_lock); @@ -4000,7 +4006,7 @@ again: ret = -ENOSPC; lock_chunks(root); - device->total_bytes = old_size; + btrfs_device_set_total_bytes(device, old_size); if (device->writeable) device->fs_devices->total_rw_bytes += diff; spin_lock(&root->fs_info->free_chunk_lock); @@ -4018,7 +4024,7 @@ again: } lock_chunks(root); - device->disk_total_bytes = new_size; + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->resized_list)) list_add_tail(&device->resized_list, &root->fs_info->fs_devices->resized_devices); @@ -4429,8 +4435,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (ret) goto error_del_extent; - for (i = 0; i < map->num_stripes; i++) - map->stripes[i].dev->bytes_used += stripe_size; + for (i = 0; i < map->num_stripes; i++) { + num_bytes = map->stripes[i].dev->bytes_used + stripe_size; + btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); + } spin_lock(&extent_root->fs_info->free_chunk_lock); extent_root->fs_info->free_chunk_space -= (stripe_size * diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f79d532fedb..76600a3fedb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -32,6 +32,19 @@ struct btrfs_pending_bios { struct bio *tail; }; +/* + * Use sequence counter to get consistent device stat data on + * 32-bit processors. + */ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#include +#define __BTRFS_NEED_DEVICE_DATA_ORDERED +#define btrfs_device_data_ordered_init(device) \ + seqcount_init(&device->data_seqcount) +#else +#define btrfs_device_data_ordered_init(device) do { } while (0) +#endif + struct btrfs_device { struct list_head dev_list; struct list_head dev_alloc_list; @@ -61,6 +74,10 @@ struct btrfs_device { int can_discard; int is_tgtdev_for_dev_replace; +#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED + seqcount_t data_seqcount; +#endif + /* the internal btrfs device id */ u64 devid; @@ -133,6 +150,73 @@ struct btrfs_device { atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; }; +/* + * If we read those variants at the context of their own lock, we needn't + * use the following helpers, reading them directly is safe. + */ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#define BTRFS_DEVICE_GETSET_FUNCS(name) \ +static inline u64 \ +btrfs_device_get_##name(const struct btrfs_device *dev) \ +{ \ + u64 size; \ + unsigned int seq; \ + \ + do { \ + seq = read_seqcount_begin(&dev->data_seqcount); \ + size = dev->name; \ + } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ + return size; \ +} \ + \ +static inline void \ +btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ +{ \ + preempt_disable(); \ + write_seqcount_begin(&dev->data_seqcount); \ + dev->name = size; \ + write_seqcount_end(&dev->data_seqcount); \ + preempt_enable(); \ +} +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#define BTRFS_DEVICE_GETSET_FUNCS(name) \ +static inline u64 \ +btrfs_device_get_##name(const struct btrfs_device *dev) \ +{ \ + u64 size; \ + \ + preempt_disable(); \ + size = dev->name; \ + preempt_enable(); \ + return size; \ +} \ + \ +static inline void \ +btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ +{ \ + preempt_disable(); \ + dev->name = size; \ + preempt_enable(); \ +} +#else +#define BTRFS_DEVICE_GETSET_FUNCS(name) \ +static inline u64 \ +btrfs_device_get_##name(const struct btrfs_device *dev) \ +{ \ + return dev->name; \ +} \ + \ +static inline void \ +btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ +{ \ + dev->name = size; \ +} +#endif + +BTRFS_DEVICE_GETSET_FUNCS(total_bytes); +BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); +BTRFS_DEVICE_GETSET_FUNCS(bytes_used); + struct btrfs_fs_devices { u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ -- cgit v1.2.3-70-g09d2 From 2196d6e8a71fc901e31c1d81581fc6cc6c64913e Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:41 +0800 Subject: Btrfs: Fix misuse of chunk mutex There were several problems about chunk mutex usage: - Lock chunk mutex when updating metadata. It would cause the nested deadlock because updating metadata might need allocate new chunks that need acquire chunk mutex. We remove chunk mutex at this case, because b-tree lock and other lock mechanism can help us. - ABBA deadlock occured between device_list_mutex and chunk_mutex. When we update device status, we must acquire device_list_mutex at the beginning, and then we might get chunk_mutex during the device status update because we need allocate new chunks for metadata COW. But at most place, we acquire chunk_mutex at first and then acquire device list mutex. We need change the lock order. - Some place we needn't acquire chunk_mutex. For example we needn't get chunk_mutex when we free a empty seed fs_devices structure. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 6 +-- fs/btrfs/extent-tree.c | 2 - fs/btrfs/volumes.c | 129 ++++++++++++++++++++++++------------------------- 3 files changed, 65 insertions(+), 72 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index da7ac1432b1..aa4c82863c7 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -510,8 +510,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, WARN_ON(ret); /* keep away write_all_supers() during the finishing procedure */ - mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&root->fs_info->fs_devices->device_list_mutex); + mutex_lock(&root->fs_info->chunk_mutex); btrfs_dev_replace_lock(dev_replace); dev_replace->replace_state = scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED @@ -534,8 +534,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, src_device->devid, rcu_str_deref(tgt_device->name), scrub_ret); btrfs_dev_replace_unlock(dev_replace); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); @@ -589,8 +589,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * superblock is scratched out so that it is no longer marked to * belong to this filesystem. */ - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2191a2c7496..b30ddb49cfa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9415,8 +9415,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, memcpy(&key, &block_group->key, sizeof(key)); - btrfs_clear_space_info_full(root->fs_info); - btrfs_put_block_group(block_group); btrfs_put_block_group(block_group); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9f22398d465..105c5fe004d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1264,7 +1264,7 @@ out: static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - u64 start) + u64 start, u64 *dev_extent_len) { int ret; struct btrfs_path *path; @@ -1306,13 +1306,8 @@ again: goto out; } - if (device->bytes_used > 0) { - u64 len = btrfs_dev_extent_length(leaf, extent); - btrfs_device_set_bytes_used(device, device->bytes_used - len); - spin_lock(&root->fs_info->free_chunk_lock); - root->fs_info->free_chunk_space += len; - spin_unlock(&root->fs_info->free_chunk_lock); - } + *dev_extent_len = btrfs_dev_extent_length(leaf, extent); + ret = btrfs_del_item(trans, root, path); if (ret) { btrfs_error(root->fs_info, ret, @@ -1521,7 +1516,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; - lock_chunks(root); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) @@ -1537,7 +1531,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, goto out; out: btrfs_free_path(path); - unlock_chunks(root); btrfs_commit_transaction(trans, root); return ret; } @@ -1726,9 +1719,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) fs_devices = fs_devices->seed; } cur_devices->seed = NULL; - lock_chunks(root); __btrfs_close_devices(cur_devices); - unlock_chunks(root); free_fs_devices(cur_devices); } @@ -1990,11 +1981,12 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); + list_for_each_entry(device, &seed_devices->devices, dev_list) + device->fs_devices = seed_devices; + lock_chunks(root); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); - list_for_each_entry(device, &seed_devices->devices, dev_list) { - device->fs_devices = seed_devices; - } + unlock_chunks(root); fs_devices->seeding = 0; fs_devices->num_devices = 0; @@ -2155,8 +2147,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) goto error; } - lock_chunks(root); - q = bdev_get_queue(bdev); if (blk_queue_discard(q)) device->can_discard = 1; @@ -2185,6 +2175,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->fs_devices = root->fs_info->fs_devices; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); + lock_chunks(root); list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); list_add(&device->dev_alloc_list, &root->fs_info->fs_devices->alloc_list); @@ -2212,15 +2203,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) /* add sysfs device entry */ btrfs_kobj_add_device(root->fs_info, device); + /* + * we've got more storage, clear any full flags on the space + * infos + */ + btrfs_clear_space_info_full(root->fs_info); + + unlock_chunks(root); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { - char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; + lock_chunks(root); ret = init_first_rw_device(trans, root, device); + unlock_chunks(root); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error_trans; } + } + + ret = btrfs_add_device(trans, root, device); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto error_trans; + } + + if (seeding_dev) { + char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; + ret = btrfs_finish_sprout(trans, root); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -2234,21 +2244,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fsid); if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) goto error_trans; - } else { - ret = btrfs_add_device(trans, root, device); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto error_trans; - } } - /* - * we've got more storage, clear any full flags on the space - * infos - */ - btrfs_clear_space_info_full(root->fs_info); - - unlock_chunks(root); root->fs_info->num_tolerated_disk_barrier_failures = btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); ret = btrfs_commit_transaction(trans, root); @@ -2280,7 +2277,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return ret; error_trans: - unlock_chunks(root); btrfs_end_transaction(trans, root); rcu_string_free(device->name); btrfs_kobj_rm_device(root->fs_info, device); @@ -2449,20 +2445,27 @@ out: return ret; } -static int __btrfs_grow_device(struct btrfs_trans_handle *trans, +int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size) { struct btrfs_super_block *super_copy = device->dev_root->fs_info->super_copy; struct btrfs_fs_devices *fs_devices; - u64 old_total = btrfs_super_total_bytes(super_copy); - u64 diff = new_size - device->total_bytes; + u64 old_total; + u64 diff; if (!device->writeable) return -EACCES; + + lock_chunks(device->dev_root); + old_total = btrfs_super_total_bytes(super_copy); + diff = new_size - device->total_bytes; + if (new_size <= device->total_bytes || - device->is_tgtdev_for_dev_replace) + device->is_tgtdev_for_dev_replace) { + unlock_chunks(device->dev_root); return -EINVAL; + } fs_devices = device->dev_root->fs_info->fs_devices; @@ -2475,20 +2478,11 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, if (list_empty(&device->resized_list)) list_add_tail(&device->resized_list, &fs_devices->resized_devices); + unlock_chunks(device->dev_root); return btrfs_update_device(trans, device); } -int btrfs_grow_device(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 new_size) -{ - int ret; - lock_chunks(device->dev_root); - ret = __btrfs_grow_device(trans, device, new_size); - unlock_chunks(device->dev_root); - return ret; -} - static int btrfs_free_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, @@ -2540,6 +2534,7 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 u32 cur; struct btrfs_key key; + lock_chunks(root); array_size = btrfs_super_sys_array_size(super_copy); ptr = super_copy->sys_chunk_array; @@ -2569,6 +2564,7 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 cur += len; } } + unlock_chunks(root); return ret; } @@ -2579,8 +2575,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, struct extent_map_tree *em_tree; struct btrfs_root *extent_root; struct btrfs_trans_handle *trans; + struct btrfs_device *device; struct extent_map *em; struct map_lookup *map; + u64 dev_extent_len = 0; int ret; int i; @@ -2604,8 +2602,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, return ret; } - lock_chunks(root); - /* * step two, delete the device extents and the * chunk tree entries @@ -2619,10 +2615,23 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { - ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, - map->stripes[i].physical); + device = map->stripes[i].dev; + ret = btrfs_free_dev_extent(trans, device, + map->stripes[i].physical, + &dev_extent_len); BUG_ON(ret); + if (device->bytes_used > 0) { + lock_chunks(root); + btrfs_device_set_bytes_used(device, + device->bytes_used - dev_extent_len); + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += dev_extent_len; + spin_unlock(&root->fs_info->free_chunk_lock); + btrfs_clear_space_info_full(root->fs_info); + unlock_chunks(root); + } + if (map->stripes[i].dev) { ret = btrfs_update_device(trans, map->stripes[i].dev); BUG_ON(ret); @@ -2652,7 +2661,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, /* once for us */ free_extent_map(em); - unlock_chunks(root); btrfs_end_transaction(trans, root); return 0; } @@ -4029,16 +4037,12 @@ again: list_add_tail(&device->resized_list, &root->fs_info->fs_devices->resized_devices); - /* Now btrfs_update_device() will change the on-disk size. */ - ret = btrfs_update_device(trans, device); - if (ret) { - unlock_chunks(root); - btrfs_end_transaction(trans, root); - goto done; - } WARN_ON(diff > old_total); btrfs_set_super_total_bytes(super_copy, old_total - diff); unlock_chunks(root); + + /* Now btrfs_update_device() will change the on-disk size. */ + ret = btrfs_update_device(trans, device); btrfs_end_transaction(trans, root); done: btrfs_free_path(path); @@ -4612,15 +4616,6 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, alloc_profile); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } - - ret = btrfs_add_device(trans, fs_info->chunk_root, device); - if (ret) - btrfs_abort_transaction(trans, root, ret); -out: return ret; } -- cgit v1.2.3-70-g09d2 From 67a2c45ee7f4f250458279a2e1244679c5d9735c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:43 +0800 Subject: Btrfs: fix use-after-free problem of the device during device replace The problem is: Task0(device scan task) Task1(device replace task) scan_one_device() mutex_lock(&uuid_mutex) device = find_device() mutex_lock(&device_list_mutex) lock_chunk() rm_and_free_source_device unlock_chunk() mutex_unlock(&device_list_mutex) check device Destroying the target device if device replace fails also has the same problem. We fix this problem by locking uuid_mutex during destroying source device or target device, just like the device remove operation. It is a temporary solution, we can fix this problem and make the code more clear by atomic counter in the future. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 3 +++ fs/btrfs/volumes.c | 4 +++- fs/btrfs/volumes.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index aa4c82863c7..e9cbbdb7297 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -509,6 +509,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ret = btrfs_commit_transaction(trans, root); WARN_ON(ret); + mutex_lock(&uuid_mutex); /* keep away write_all_supers() during the finishing procedure */ mutex_lock(&root->fs_info->fs_devices->device_list_mutex); mutex_lock(&root->fs_info->chunk_mutex); @@ -536,6 +537,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_dev_replace_unlock(dev_replace); mutex_unlock(&root->fs_info->chunk_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&uuid_mutex); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); @@ -591,6 +593,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, */ mutex_unlock(&root->fs_info->chunk_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&uuid_mutex); /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d28e1761fde..a15c8ac5d5b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -50,7 +50,7 @@ static void __btrfs_reset_dev_stats(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); -static DEFINE_MUTEX(uuid_mutex); +DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); static void lock_chunks(struct btrfs_root *root) @@ -1867,6 +1867,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, { struct btrfs_device *next_device; + mutex_lock(&uuid_mutex); WARN_ON(!tgtdev); mutex_lock(&fs_info->fs_devices->device_list_mutex); if (tgtdev->bdev) { @@ -1886,6 +1887,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, call_rcu(&tgtdev->rcu, free_device); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&uuid_mutex); } static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 76600a3fedb..2b37da3dd40 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ #include #include "async-thread.h" +extern struct mutex uuid_mutex; + #define BTRFS_STRIPE_LEN (64 * 1024) struct buffer_head; -- cgit v1.2.3-70-g09d2 From 82372bc816d75722c24d1abadb11cd8c0a33883a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:44 +0800 Subject: Btrfs: make the logic of source device removing more clear Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 3 +-- fs/btrfs/volumes.c | 19 +++++++------------ 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/dev-replace.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index e9cbbdb7297..6f662b34ba0 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -569,8 +569,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, if (fs_info->fs_devices->latest_bdev == src_device->bdev) fs_info->fs_devices->latest_bdev = tgt_device->bdev; list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); - if (src_device->fs_devices->seeding) - fs_info->fs_devices->rw_devices++; + fs_info->fs_devices->rw_devices++; /* replace the sysfs entry */ btrfs_kobj_rm_device(fs_info, src_device); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a15c8ac5d5b..2e078fa705a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1819,23 +1819,18 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, list_del_rcu(&srcdev->dev_list); list_del_rcu(&srcdev->dev_alloc_list); fs_devices->num_devices--; - if (srcdev->missing) { + if (srcdev->missing) fs_devices->missing_devices--; - if (!fs_devices->seeding) - fs_devices->rw_devices++; + + if (srcdev->writeable) { + fs_devices->rw_devices--; + /* zero out the old super if it is writable */ + btrfs_scratch_superblock(srcdev); } - if (srcdev->bdev) { + if (srcdev->bdev) fs_devices->open_devices--; - /* - * zero out the old super if it is not writable - * (e.g. seed device) - */ - if (srcdev->writeable) - btrfs_scratch_superblock(srcdev); - } - call_rcu(&srcdev->rcu, free_device); /* -- cgit v1.2.3-70-g09d2