From 60ccf82f5b6e26e10d41783464ca469c070c7d49 Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Thu, 1 Sep 2011 16:33:57 +0200 Subject: btrfs: fix memory leak in btrfs_defrag_file kmemleak found this: unreferenced object 0xffff8801b64af968 (size 512): comm "btrfs-cleaner", pid 3317, jiffies 4306810886 (age 903.272s) hex dump (first 32 bytes): 00 82 01 07 00 ea ff ff c0 83 01 07 00 ea ff ff ................ 80 82 01 07 00 ea ff ff c0 87 01 07 00 ea ff ff ................ backtrace: [] kmemleak_alloc+0x5c/0xc0 [] kmem_cache_alloc_trace+0x163/0x240 [] btrfs_defrag_file+0xf0/0xb20 [] btrfs_run_defrag_inodes+0x165/0x210 [] cleaner_kthread+0x177/0x190 [] kthread+0x8d/0xa0 [] kernel_thread_helper+0x4/0x10 [] 0xffffffffffffffff "pages" is not always freed. Fix it removing the unnecesary additional return. Signed-off-by: Diego Calleja --- fs/btrfs/ioctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d2b53eb8a8c..8ccc106f4e1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1140,9 +1140,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, btrfs_set_super_incompat_flags(disk_super, features); } - if (!file) - kfree(ra); - return defrag_count; + ret = defrag_count; out_ra: if (!file) -- cgit v1.2.3-70-g09d2 From cbcc83265d929ac71553c1b5dafdb830171af947 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 2 Sep 2011 15:56:25 +0800 Subject: Btrfs: fix defragmentation regression There's an off-by-one bug: # create a file with lots of 4K file extents # btrfs fi defrag /mnt/file # sync # filefrag -v /mnt/file Filesystem type is: 9123683e File size of /mnt/file is 1228800 (300 blocks, blocksize 4096) ext logical physical expected length flags 0 0 3372 64 1 64 3136 3435 1 2 65 3436 3136 64 3 129 3201 3499 1 4 130 3500 3201 64 5 194 3266 3563 1 6 195 3564 3266 64 7 259 3331 3627 1 8 260 3628 3331 40 eof After this patch: ... # filefrag -v /mnt/file Filesystem type is: 9123683e File size of /mnt/file is 1228800 (300 blocks, blocksize 4096) ext logical physical expected length flags 0 0 3372 300 eof /mnt/file: 1 extent found Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8ccc106f4e1..b39f7bf9270 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1087,7 +1087,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, defrag_count += ret; balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); - i += ret; if (newer_than) { if (newer_off == (u64)-1) @@ -1107,7 +1106,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, break; } } else { - i++; + if (ret > 0) + i += ret; + else + i++; } } -- cgit v1.2.3-70-g09d2 From 151a31b25e5c941bdd9fdefed650effca223c716 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 2 Sep 2011 15:56:39 +0800 Subject: Btrfs: use i_size_read() in btrfs_defrag_file() Don't use inode->i_size directly, since we're not holding i_mutex. This also fixes another bug, that i_size can change after it's checked against 0 and then (i_size - 1) can be negative. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b39f7bf9270..323d77f0925 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -978,6 +978,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, struct btrfs_super_block *disk_super; struct file_ra_state *ra = NULL; unsigned long last_index; + u64 isize = i_size_read(inode); u64 features; u64 last_len = 0; u64 skip = 0; @@ -1003,7 +1004,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, compress_type = range->compress_type; } - if (inode->i_size == 0) + if (isize == 0) return 0; /* @@ -1028,10 +1029,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, /* find the last page to defrag */ if (range->start + range->len > range->start) { - last_index = min_t(u64, inode->i_size - 1, + last_index = min_t(u64, isize - 1, range->start + range->len - 1) >> PAGE_CACHE_SHIFT; } else { - last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + last_index = (isize - 1) >> PAGE_CACHE_SHIFT; } if (newer_than) { -- cgit v1.2.3-70-g09d2 From 5ca496604b5975d371bb669ee6c2394bcbea818f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 2 Sep 2011 15:56:55 +0800 Subject: Btrfs: fix wrong max_to_defrag in btrfs_defrag_file() It's off-by-one, and thus we may skip the last page while defragmenting. An example case: # create /mnt/file with 2 4K file extents # btrfs fi defrag /mnt/file # sync # filefrag /mnt/file /mnt/file: 2 extents found So it's not defragmented. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 323d77f0925..f9026413bcf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1052,7 +1052,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index - 1; + max_to_defrag = last_index; while (i <= last_index && defrag_count < max_to_defrag) { /* -- cgit v1.2.3-70-g09d2 From 008873eafbc77deb1702aedece33756c58486c6a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 2 Sep 2011 15:57:07 +0800 Subject: Btrfs: honor extent thresh during defragmentation We won't defrag an extent, if it's bigger than the threshold we specified and there's no small extent before it, but actually the code doesn't work this way. There are three bugs: - When should_defrag_range() decides we should keep on defragmenting an extent, last_len is not incremented. (old bug) - The length that passes to should_defrag_range() is not the length we're going to defrag. (new bug) - We always defrag 256K bytes data, and a big extent can be part of this range. (new bug) For a file with 4 extents: | 4K | 4K | 256K | 256K | The result of defrag with (the default) 256K extent thresh should be: | 264K | 256K | but with those bugs, we'll get: | 520K | Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f9026413bcf..d524b6697ad 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -765,7 +765,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, int ret = 1; /* - * make sure that once we start defragging and extent, we keep on + * make sure that once we start defragging an extent, we keep on * defragging it */ if (start < *defrag_end) @@ -810,7 +810,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, * extent will force at least part of that big extent to be defragged. */ if (ret) { - *last_len += len; *defrag_end = extent_map_end(em); } else { *last_len = 0; @@ -984,13 +983,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, u64 skip = 0; u64 defrag_end = 0; u64 newer_off = range->start; - int newer_left = 0; unsigned long i; + unsigned long ra_index = 0; int ret; int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; int extent_thresh = range->extent_thresh; - int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + int cluster = max_cluster; u64 new_align = ~((u64)128 * 1024 - 1); struct page **pages = NULL; @@ -1020,7 +1020,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra = &file->f_ra; } - pages = kmalloc(sizeof(struct page *) * newer_cluster, + pages = kmalloc(sizeof(struct page *) * max_cluster, GFP_NOFS); if (!pages) { ret = -ENOMEM; @@ -1045,7 +1045,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * the extents in the file evenly spaced */ i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; - newer_left = newer_cluster; } else goto out_ra; } else { @@ -1077,12 +1076,26 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = max(i + 1, next); continue; } + + if (!newer_than) { + cluster = (PAGE_CACHE_ALIGN(defrag_end) >> + PAGE_CACHE_SHIFT) - i; + cluster = min(cluster, max_cluster); + } else { + cluster = max_cluster; + } + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) BTRFS_I(inode)->force_compress = compress_type; - btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); + if (i + cluster > ra_index) { + ra_index = max(i, ra_index); + btrfs_force_ra(inode->i_mapping, ra, file, ra_index, + cluster); + ra_index += max_cluster; + } - ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); + ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) goto out_ra; @@ -1102,15 +1115,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (!ret) { range->start = newer_off; i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; - newer_left = newer_cluster; } else { break; } } else { - if (ret > 0) + if (ret > 0) { i += ret; - else + last_len += ret << PAGE_CACHE_SHIFT; + } else { i++; + last_len = 0; + } } } -- cgit v1.2.3-70-g09d2 From f4c697e6406da5dd445eda8d923c53e1138793dd Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 5 Sep 2011 16:34:54 +0200 Subject: btrfs: return EINVAL if start > total_bytes in fitrim ioctl We should retirn EINVAL if the start is beyond the end of the file system in the btrfs_ioctl_fitrim(). Fix that by adding the appropriate check for it. Also in the btrfs_trim_fs() it is possible that len+start might overflow if big values are passed. Fix it by decrementing the len so that start+len is equal to the file system size in the worst case. Signed-off-by: Lukas Czerner --- fs/btrfs/ioctl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d524b6697ad..136a2f980e2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -282,6 +282,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; + u64 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -300,12 +301,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) } } rcu_read_unlock(); + if (!num_devices) return -EOPNOTSUPP; - if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; + if (range.start > total_bytes) + return -EINVAL; + range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(root, &range); if (ret < 0) -- cgit v1.2.3-70-g09d2