From 7cddc193924ef6ce679ef0977e01e96d0aedfd1d Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 28 Jun 2013 13:15:52 +0800 Subject: btrfs: fix file truncation if FALLOC_FL_KEEP_SIZE is specified Create a small file and fallocate it to a big size with FALLOC_FL_KEEP_SIZE option, then truncate it back to the small size again, the disk free space is not changed back in this case. i.e, total 4 -rw-r--r-- 1 root root 512 Jun 28 11:35 test Filesystem Size Used Avail Use% Mounted on .... /dev/sdb1 8.0G 56K 7.2G 1% /mnt -rw-r--r-- 1 root root 512 Jun 28 11:35 /mnt/test Filesystem Size Used Avail Use% Mounted on .... /dev/sdb1 8.0G 5.1G 2.2G 70% /mnt Filesystem Size Used Avail Use% Mounted on .... /dev/sdb1 8.0G 5.1G 2.2G 70% /mnt With this fix, the truncated up space is back as: Filesystem Size Used Avail Use% Mounted on .... /dev/sdb1 8.0G 56K 7.2G 1% /mnt Signed-off-by: Jie Liu Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d1b93c8aaf..0fd7647c893 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4391,9 +4391,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) int mask = attr->ia_valid; int ret; - if (newsize == oldsize) - return 0; - /* * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a * special case where we need to update the times despite not having -- cgit v1.2.3-70-g09d2 From e68afa49aec5f0851e550ee1de48fcc3a9bf5ef7 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 1 Jul 2013 22:13:26 +0800 Subject: Btrfs: fix a bug of snapshot-aware defrag to make it work on partial extents For partial extents, snapshot-aware defrag does not work as expected, since a) we use the wrong logical offset to search for parents, which should be disk_bytenr + extent_offset, not just disk_bytenr, b) 'offset' returned by the backref walking just refers to key.offset, not the 'offset' stored in btrfs_extent_data_ref which is (key.offset - extent_offset). The reproducer: $ mkfs.btrfs sda $ mount sda /mnt $ btrfs sub create /mnt/sub $ for i in `seq 5 -1 1`; do dd if=/dev/zero of=/mnt/sub/foo bs=5k count=1 seek=$i conv=notrunc oflag=sync; done $ btrfs sub snap /mnt/sub /mnt/snap1 $ btrfs sub snap /mnt/sub /mnt/snap2 $ sync; btrfs filesystem defrag /mnt/sub/foo; $ umount /mnt $ btrfs-debug-tree sda (Here we can check whether the defrag operation is snapshot-awared. This addresses the above two problems. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0fd7647c893..c72033ee601 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2166,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) continue; - extent_offset = btrfs_file_extent_offset(leaf, extent); - if (key.offset - extent_offset != offset) + /* + * 'offset' refers to the exact key.offset, + * NOT the 'offset' field in btrfs_extent_data_ref, ie. + * (key.offset - extent_offset). + */ + if (key.offset != offset) continue; + extent_offset = btrfs_file_extent_offset(leaf, extent); num_bytes = btrfs_file_extent_num_bytes(leaf, extent); + if (extent_offset >= old->extent_offset + old->offset + old->len || extent_offset + num_bytes <= old->extent_offset + old->offset) continue; + ret = 0; break; } @@ -2187,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, backref->root_id = root_id; backref->inum = inum; - backref->file_pos = offset + extent_offset; + backref->file_pos = offset; backref->num_bytes = num_bytes; backref->extent_offset = extent_offset; backref->generation = btrfs_file_extent_generation(leaf, extent); @@ -2210,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, new->path = path; list_for_each_entry_safe(old, tmp, &new->head, list) { - ret = iterate_inodes_from_logical(old->bytenr, fs_info, + ret = iterate_inodes_from_logical(old->bytenr + + old->extent_offset, fs_info, path, record_one_backref, old); BUG_ON(ret < 0 && ret != -ENOENT); -- cgit v1.2.3-70-g09d2 From db62efbbf883c099d44b0fafe18f8ad8f0396892 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Thu, 11 Jul 2013 16:19:42 -0700 Subject: btrfs: don't loop on large offsets in readdir When btrfs readdir() hits the last entry it sets the readdir offset to a huge value to stop buggy apps from breaking when the same name is returned by readdir() with concurrent rename()s. But unconditionally setting the offset to INT_MAX causes readdir() to loop returning any entries with offsets past INT_MAX. It only takes a few hours of constant file creation and removal to create entries past INT_MAX. So let's set the huge offset to LLONG_MAX if the last entry has already overflowed 32bit loff_t. Without large offsets behaviour is identical. With large offsets 64bit apps will work and 32bit apps will be no more broken than they currently are if they see large offsets. Signed-off-by: Zach Brown Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c72033ee601..021694c0818 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5170,14 +5170,31 @@ next: } /* Reached end of directory/root. Bump pos past the last item. */ - if (key_type == BTRFS_DIR_INDEX_KEY) - /* - * 32-bit glibc will use getdents64, but then strtol - - * so the last number we can serve is this. - */ - ctx->pos = 0x7fffffff; - else - ctx->pos++; + ctx->pos++; + + /* + * Stop new entries from being returned after we return the last + * entry. + * + * New directory entries are assigned a strictly increasing + * offset. This means that new entries created during readdir + * are *guaranteed* to be seen in the future by that readdir. + * This has broken buggy programs which operate on names as + * they're returned by readdir. Until we re-use freed offsets + * we have this hack to stop new entries from being returned + * under the assumption that they'll never reach this huge + * offset. + * + * This is being careful not to overflow 32bit loff_t unless the + * last entry requires it because doing so has broken 32bit apps + * in the past. + */ + if (key_type == BTRFS_DIR_INDEX_KEY) { + if (ctx->pos >= INT_MAX) + ctx->pos = LLONG_MAX; + else + ctx->pos = INT_MAX; + } nopos: ret = 0; err: -- cgit v1.2.3-70-g09d2