From cfa7a9ccda711ac6ab8f0d17c3a9b540092d305a Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 17 Dec 2012 06:38:51 +0000 Subject: Btrfs: fix memory leak in name_cache_insert() We should free name_cache_entry before returning from the error handling code. Signed-off-by: Tsutomu Itoh --- fs/btrfs/send.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 54454542ad4..321b7fb4e44 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx, (unsigned long)nce->ino); if (!nce_head) { nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); - if (!nce_head) + if (!nce_head) { + kfree(nce); return -ENOMEM; + } INIT_LIST_HEAD(nce_head); ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); -- cgit v1.2.3-70-g09d2 From cc975eb4605c5765a5d5e7a51d24ba5a1cda269e Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 7 Dec 2012 10:09:19 +0000 Subject: btrfs: get the device in write mode when deleting it When we're deleting the device we should get it in write mode since we're going to re-write the super block magic on that device. And it should fail if the device is read-only. Signed-off-by: Lukas Czerner --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cce6aa7401..86279c37de6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } } else { ret = btrfs_get_bdev_and_sb(device_path, - FMODE_READ | FMODE_EXCL, + FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder, 0, &bdev, &bh); if (ret) -- cgit v1.2.3-70-g09d2 From d86e56cf7d3669dd292012ac82b986bd1573b6cc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 15 Nov 2012 11:35:41 +0000 Subject: Btrfs: disable qgroup id 0 Qgroup id 0 is a special number, we should set the id of a qgroup to 0. Fix it. Signed-off-by: Miao Xie --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7624212ae92..dd8e3448fe8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3698,6 +3698,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto drop_write; } + if (!sa->qgroupid) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); -- cgit v1.2.3-70-g09d2 From 5c39da5b6ca23e68e7acea7f4c01470383475214 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 22 Oct 2012 11:39:53 +0000 Subject: Btrfs: do not delete a subvolume which is in a R/O subvolume Step to reproduce: # mkfs.btrfs # mount # btrfs sub create /subv0 # btrfs sub snap /subv0/snap0 # change /subv0 from R/W to R/O # btrfs sub del /subv0/snap0 We deleted the snapshot successfully. I think we should not be able to delete the snapshot since the parent subvolume is R/O. Signed-off-by: Miao Xie --- fs/btrfs/ioctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dd8e3448fe8..5a72896bd76 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2095,13 +2095,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, err = inode_permission(inode, MAY_WRITE | MAY_EXEC); if (err) goto out_dput; - - /* check if subvolume may be deleted by a non-root user */ - err = btrfs_may_delete(dir, dentry, 1); - if (err) - goto out_dput; } + /* check if subvolume may be deleted by a user */ + err = btrfs_may_delete(dir, dentry, 1); + if (err) + goto out_dput; + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { err = -EINVAL; goto out_dput; -- cgit v1.2.3-70-g09d2 From dba60f3f5d564167118cad151a7d41dfe8d2a5f7 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 21 Dec 2012 09:19:51 +0000 Subject: Btrfs: fix resize a readonly device We should not resize a readonly device, fix it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5a72896bd76..0de21213d05 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1362,6 +1362,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, printk(KERN_INFO "btrfs: resizing devid %llu\n", (unsigned long long)devid); } + device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", @@ -1369,9 +1370,10 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - if (device->fs_devices && device->fs_devices->seeding) { + + if (!device->writeable) { printk(KERN_INFO "btrfs: resizer unable to apply on " - "seeding device %llu\n", + "readonly device %llu\n", (unsigned long long)devid); ret = -EINVAL; goto out_free; -- cgit v1.2.3-70-g09d2 From 97547676570b3bd908560741315bf4b7d635bcf5 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 21 Dec 2012 10:38:50 +0000 Subject: Btrfs: fix missing write access release in btrfs_ioctl_resize() We forget to give up the write access after we find some device operation is going on. Fix it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0de21213d05..982c0b9ceea 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1339,6 +1339,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + mnt_drop_write_file(file); return -EINPROGRESS; } -- cgit v1.2.3-70-g09d2 From 72bcd99d450cb1dde8bf13c3b65fc5883b2a3893 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 18 Dec 2012 15:16:34 -0500 Subject: Btrfs: set flushing if we're limited flushing We still need to say we're flushing if we're limit flushing to keep somebody from coming in and stealing our reservation. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d133edfcd44..61fefda74ff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3997,7 +3997,7 @@ again: * We make the other tasks wait for the flush only when we can flush * all things. */ - if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { + if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { flushing = true; space_info->flush = 1; } -- cgit v1.2.3-70-g09d2 From f3fe820c20a1a36c790545184e734e78d61cd68d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 7 Jan 2013 17:03:21 -0500 Subject: Btrfs: add orphan before truncating pagecache Running xfstests 83 in a loop would sometimes fail the fsck. This happens because if we invalidate a page that already has an ordered extent setup for it we will complete the ordered extent ourselves, assuming that the truncate will clean everything up. The problem with this is there is plenty of time for the truncate to fail after we've done this work. So to fix this we need to add the orphan item first to make sure the cleanup gets done properly, and then we can truncate the pagecache and all that stuff and be safe. This fixes the btrfsck failures I was seeing while running 83 in a loop. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 53 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67ed24ae86b..4ddcf79e789 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; + + /* 1 for the orphan item deletion. */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = btrfs_orphan_add(trans, inode); + btrfs_end_transaction(trans, root); + if (ret) + goto out; + ret = btrfs_truncate(inode); } else { nr_unlink++; @@ -3783,9 +3795,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, &BTRFS_I(inode)->runtime_flags); + /* + * 1 for the orphan item we're going to add + * 1 for the orphan item deletion. + */ + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + /* + * We need to do this in case we fail at _any_ point during the + * actual truncate. Once we do the truncate_setsize we could + * invalidate pages which forces any outstanding ordered io to + * be instantly completed which will give us extents that need + * to be truncated. If we fail to get an orphan inode down we + * could have left over extents that were never meant to live, + * so we need to garuntee from this point on that everything + * will be consistent. + */ + ret = btrfs_orphan_add(trans, inode); + btrfs_end_transaction(trans, root); + if (ret) + return ret; + /* we don't support swapfiles, so vmtruncate shouldn't fail */ truncate_setsize(inode, newsize); ret = btrfs_truncate(inode); + if (ret && inode->i_nlink) + btrfs_orphan_del(NULL, inode); } return ret; @@ -6929,11 +6966,9 @@ static int btrfs_truncate(struct inode *inode) /* * 1 for the truncate slack space - * 1 for the orphan item we're going to add - * 1 for the orphan item deletion * 1 for updating the inode. */ - trans = btrfs_start_transaction(root, 4); + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out; @@ -6944,12 +6979,6 @@ static int btrfs_truncate(struct inode *inode) min_size); BUG_ON(ret); - ret = btrfs_orphan_add(trans, inode); - if (ret) { - btrfs_end_transaction(trans, root); - goto out; - } - /* * setattr is responsible for setting the ordered_data_close flag, * but that is only tested during the last file release. That @@ -7018,12 +7047,6 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(trans, inode); if (ret) err = ret; - } else if (ret && inode->i_nlink > 0) { - /* - * Failed to do the truncate, remove us from the in memory - * orphan list. - */ - ret = btrfs_orphan_del(NULL, inode); } if (trans) { -- cgit v1.2.3-70-g09d2 From ac5c93005b7073732e268606688fb6c821d5310e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 27 Dec 2012 09:01:24 +0000 Subject: Btrfs: let allocation start from the right raid type This'd avoid us empty looping. Say we have only one disk and the metadata raid type will be defaultly DUP, and we do not need to start from index=0(RAID10) and get over two empty loops to index=2(DUP). Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61fefda74ff..aeba53191ec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5560,7 +5560,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, int empty_cluster = 2 * 1024 * 1024; struct btrfs_space_info *space_info; int loop = 0; - int index = 0; + int index = __get_raid_index(data); int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool found_uncached_bg = false; -- cgit v1.2.3-70-g09d2 From 3268a2468eb6a31af89930cbae58a62fe6ca6d2d Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 28 Dec 2012 09:33:19 +0000 Subject: Btrfs: reset path lock state to zero We forgot to reset the path lock state to zero after we unlock the path block, and this can lead to the ASSERT checker in tree unlock API. Reported-by: Slava Barinov Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aeba53191ec..85b8454d960 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6788,11 +6788,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, &wc->flags[level]); if (ret < 0) { btrfs_tree_unlock_rw(eb, path->locks[level]); + path->locks[level] = 0; return ret; } BUG_ON(wc->refs[level] == 0); if (wc->refs[level] == 1) { btrfs_tree_unlock_rw(eb, path->locks[level]); + path->locks[level] = 0; return 1; } } -- cgit v1.2.3-70-g09d2 From 1214b53f90131fee1f950010c43e92455fe598ab Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 7 Jan 2013 03:53:08 +0000 Subject: Btrfs: fix off-by-one in lseek Lock end is inclusive. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 20452c110d7..fa48051484b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2242,6 +2242,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) if (lockend <= lockstart) lockend = lockstart + root->sectorsize; + lockend--; len = lockend - lockstart + 1; len = max_t(u64, len, root->sectorsize); -- cgit v1.2.3-70-g09d2 From f9e4fb53938de5db01950c9dfe479703b2f5c964 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 7 Jan 2013 10:10:12 +0000 Subject: Btrfs: fix a bug when llseek for delalloc bytes behind prealloc extents xfstests case 285 complains. It it because btrfs did not try to find unwritten delalloc bytes(only dirty pages, not yet writeback) behind prealloc extents, it ends up finding nothing while we're with SEEK_DATA. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 9 ++++++--- fs/btrfs/inode.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fa48051484b..841cfe3be0e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2309,9 +2309,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) } } - *offset = start; - free_extent_map(em); - break; + if (!test_bit(EXTENT_FLAG_PREALLOC, + &em->flags)) { + *offset = start; + free_extent_map(em); + break; + } } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4ddcf79e789..ac98384b174 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5623,10 +5623,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag return em; if (em) { /* - * if our em maps to a hole, there might - * actually be delalloc bytes behind it + * if our em maps to + * - a hole or + * - a pre-alloc extent, + * there might actually be delalloc bytes behind it. */ - if (em->block_start != EXTENT_MAP_HOLE) + if (em->block_start != EXTENT_MAP_HOLE && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) return em; else hole_em = em; @@ -5708,6 +5711,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag */ em->block_start = hole_em->block_start; em->block_len = hole_len; + if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags)) + set_bit(EXTENT_FLAG_PREALLOC, &em->flags); } else { em->start = range_start; em->len = found; -- cgit v1.2.3-70-g09d2 From f276795627045a3c599a60b476767861e4318c7d Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 8 Jan 2013 19:37:58 +0000 Subject: btrfs: fix btrfs_cont_expand() freeing IS_ERR em btrfs_cont_expand() tries to free an IS_ERR em as it gets an error from btrfs_get_extent() and breaks out of its loop. An instance of -EEXIST was reported in the wild: https://bugzilla.redhat.com/show_bug.cgi?id=874407 I have no idea if that -EEXIST is surprising, or not. Regardless, this error handling should be cleaned up to handle other reasonable errors (ENOMEM, EIO; whatever). This seemed to be the only buggy freeing of the relatively rare IS_ERR em so I opted to fix the caller rather than teach free_extent_map() to use IS_ERR_OR_NULL(). Signed-off-by: Zach Brown Reviewed-by: Eric Sandeen Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac98384b174..3d2c64d4734 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3677,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) block_end - cur_offset, 0); if (IS_ERR(em)) { err = PTR_ERR(em); + em = NULL; break; } last_byte = min(extent_map_end(em), block_end); -- cgit v1.2.3-70-g09d2 From 3972f2603d8570effaf633cea52b12c7c2773c11 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 12 Jan 2013 02:57:22 +0000 Subject: btrfs: update timestamps on truncate() truncate() vs. ftruncate() differ in the VFS; truncate() doesn't set (ATTR_CTIME | ATTR_MTIME), and it's up to the fs to do the timestamp updates if the size changes. Signed-off-by: Eric Sandeen Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3d2c64d4734..9bc6c40b182 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static int btrfs_setsize(struct inode *inode, loff_t newsize); +static int btrfs_setsize(struct inode *inode, struct iattr *attr); static int btrfs_truncate(struct inode *inode); static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); static noinline int cow_file_range(struct inode *inode, @@ -3761,16 +3761,27 @@ next: return err; } -static int btrfs_setsize(struct inode *inode, loff_t newsize) +static int btrfs_setsize(struct inode *inode, struct iattr *attr) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; loff_t oldsize = i_size_read(inode); + loff_t newsize = attr->ia_size; + int mask = attr->ia_valid; int ret; if (newsize == oldsize) return 0; + /* + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a + * special case where we need to update the times despite not having + * these flags set. For all other operations the VFS set these flags + * explicitly if it wants a timestamp update. + */ + if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) + inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); + if (newsize > oldsize) { truncate_pagecache(inode, oldsize, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize); @@ -3843,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - err = btrfs_setsize(inode, attr->ia_size); + err = btrfs_setsize(inode, attr); if (err) return err; } -- cgit v1.2.3-70-g09d2