From 1ce01c4a199c50b023802be25261c0c02b2f0214 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Thu, 10 Apr 2014 22:58:20 -0400
Subject: ext4: fix COLLAPSE_RANGE test failure in data journalling mode

When mounting ext4 with data=journal option, xfstest shared/002 and
shared/004 are currently failing as checksum computed for testfile
does not match with the checksum computed in other journal modes.
In case of data=journal mode, a call to filemap_write_and_wait_range
will not flush anything to disk as buffers are not marked dirty in
write_end. In collapse range this call is followed by a call to
truncate_pagecache_range. Due to this, when checksum is computed,
a portion of file is re-read from disk which replace valid data with
NULL bytes and hence the reason for the difference in checksum.

Calling ext4_force_commit before filemap_write_and_wait_range solves
the issue as it will mark the buffers dirty during commit transaction
which can be later synced by a call to filemap_write_and_wait_range.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 82df3ce9874..be1e56cbbf3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5383,6 +5383,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
 	punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
 
+	/* Call ext4_force_commit to flush all data in case of data=journal. */
+	if (ext4_should_journal_data(inode)) {
+		ret = ext4_force_commit(inode->i_sb);
+		if (ret)
+			return ret;
+	}
+
 	/* Write out all dirty pages */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
 	if (ret)
-- 
cgit v1.2.3-70-g09d2


From 9ef06cec7c96f6bf59f1dd8b64b9645820099051 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sat, 12 Apr 2014 09:47:00 -0400
Subject: ext4: remove unnecessary check for APPEND and IMMUTABLE

All the checks IS_APPEND and IS_IMMUTABLE for the fallocate operation on
the inode are done in vfs. No need to do this again in ext4. Remove it.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 6 ------
 fs/ext4/inode.c   | 6 +-----
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index be1e56cbbf3..ed4ec48239b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5398,12 +5398,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	/* Take mutex lock */
 	mutex_lock(&inode->i_mutex);
 
-	/* It's not possible punch hole on append only file */
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-		ret = -EPERM;
-		goto out_mutex;
-	}
-
 	if (IS_SWAPFILE(inode)) {
 		ret = -ETXTBSY;
 		goto out_mutex;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f023f0cb46f..e2bba76f0d7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3541,11 +3541,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	}
 
 	mutex_lock(&inode->i_mutex);
-	/* It's not possible punch hole on append only file */
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-		ret = -EPERM;
-		goto out_mutex;
-	}
+
 	if (IS_SWAPFILE(inode)) {
 		ret = -ETXTBSY;
 		goto out_mutex;
-- 
cgit v1.2.3-70-g09d2


From 23fffa925ea2c9a2bcb1a4453e2c542635aa3545 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sat, 12 Apr 2014 09:56:41 -0400
Subject: fs: move falloc collapse range check into the filesystem methods

Currently in do_fallocate in collapse range case we're checking
whether offset + len is not bigger than i_size.  However there is
nothing which would prevent i_size from changing so the check is
pointless.  It should be done in the file system itself and the file
system needs to make sure that i_size is not going to change.  The
i_size check for the other fallocate modes are also done in the
filesystems.

As it is now we can easily crash the kernel by having two processes
doing truncate and fallocate collapse range at the same time.  This
can be reproduced on ext4 and it is theoretically possible on xfs even
though I was not able to trigger it with this simple test.

This commit removes the check from do_fallocate and adds it to the
file system.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext4/extents.c | 11 +++++++++--
 fs/open.c         |  8 --------
 fs/xfs/xfs_file.c | 10 +++++++++-
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ed4ec48239b..ac5460d0d13 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5368,8 +5368,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	loff_t new_size;
 	int ret;
 
-	BUG_ON(offset + len > i_size_read(inode));
-
 	/* Collapse range works only on fs block size aligned offsets. */
 	if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
 	    len & (EXT4_BLOCK_SIZE(sb) - 1))
@@ -5398,6 +5396,15 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	/* Take mutex lock */
 	mutex_lock(&inode->i_mutex);
 
+	/*
+	 * There is no need to overlap collapse range with EOF, in which case
+	 * it is effectively a truncate operation
+	 */
+	if (offset + len >= i_size_read(inode)) {
+		ret = -EINVAL;
+		goto out_mutex;
+	}
+
 	if (IS_SWAPFILE(inode)) {
 		ret = -ETXTBSY;
 		goto out_mutex;
diff --git a/fs/open.c b/fs/open.c
index 3a83253d337..adf34202213 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -284,14 +284,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
 		return -EFBIG;
 
-	/*
-	 * There is no need to overlap collapse range with EOF, in which case
-	 * it is effectively a truncate operation
-	 */
-	if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
-	    (offset + len >= i_size_read(inode)))
-		return -EINVAL;
-
 	if (!file->f_op->fallocate)
 		return -EOPNOTSUPP;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7abff8c16c..3cb528c4f27 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -840,7 +840,15 @@ xfs_file_fallocate(
 			goto out_unlock;
 		}
 
-		ASSERT(offset + len < i_size_read(inode));
+		/*
+		 * There is no need to overlap collapse range with EOF,
+		 * in which case it is effectively a truncate operation
+		 */
+		if (offset + len >= i_size_read(inode)) {
+			error = -EINVAL;
+			goto out_unlock;
+		}
+
 		new_size = i_size_read(inode) - len;
 
 		error = xfs_collapse_file_space(ip, offset, len);
-- 
cgit v1.2.3-70-g09d2


From 0790b31b69374ddadefebb156251b319e5b43345 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sat, 12 Apr 2014 10:05:37 -0400
Subject: fs: disallow all fallocate operation on active swapfile

Currently some file system have IS_SWAPFILE check in their fallocate
implementations and some do not. However we should really prevent any
fallocate operation on swapfile so move the check to vfs and remove the
redundant checks from the file systems fallocate implementations.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ceph/file.c    | 3 ---
 fs/ext4/extents.c | 5 -----
 fs/ext4/inode.c   | 5 -----
 fs/open.c         | 7 +++++++
 4 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 09c7afe32e4..596e6cc9f9c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1215,9 +1215,6 @@ static long ceph_fallocate(struct file *file, int mode,
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
-	if (IS_SWAPFILE(inode))
-		return -ETXTBSY;
-
 	mutex_lock(&inode->i_mutex);
 
 	if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ac5460d0d13..b2d3869b576 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5405,11 +5405,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	if (IS_SWAPFILE(inode)) {
-		ret = -ETXTBSY;
-		goto out_mutex;
-	}
-
 	/* Currently just for extent based files */
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 		ret = -EOPNOTSUPP;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e2bba76f0d7..b74cfd2a42e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3542,11 +3542,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
 	mutex_lock(&inode->i_mutex);
 
-	if (IS_SWAPFILE(inode)) {
-		ret = -ETXTBSY;
-		goto out_mutex;
-	}
-
 	/* No need to punch hole beyond i_size */
 	if (offset >= inode->i_size)
 		goto out_mutex;
diff --git a/fs/open.c b/fs/open.c
index adf34202213..7b823daa6a9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -262,6 +262,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (IS_IMMUTABLE(inode))
 		return -EPERM;
 
+	/*
+	 * We can not allow to do any fallocate operation on an active
+	 * swapfile
+	 */
+	if (IS_SWAPFILE(inode))
+		ret = -ETXTBSY;
+
 	/*
 	 * Revalidate the write permissions, in case security policy has
 	 * changed since the files were opened.
-- 
cgit v1.2.3-70-g09d2


From 847c6c422aa0ae81a5517a9558ec2737806dca48 Mon Sep 17 00:00:00 2001
From: Zheng Liu <wenqing.lz@taobao.com>
Date: Sat, 12 Apr 2014 12:45:55 -0400
Subject: ext4: fix byte order problems introduced by the COLLAPSE_RANGE
 patches

This commit tries to fix some byte order issues that is found by sparse
check.

$ make M=fs/ext4 C=2 CF=-D__CHECK_ENDIAN__
...
  CHECK   fs/ext4/extents.c
fs/ext4/extents.c:5232:41: warning: restricted __le32 degrades to integer
fs/ext4/extents.c:5236:52: warning: bad assignment (-=) to restricted __le32
fs/ext4/extents.c:5258:45: warning: bad assignment (-=) to restricted __le32
fs/ext4/extents.c:5303:28: warning: restricted __le32 degrades to integer
fs/ext4/extents.c:5318:18: warning: incorrect type in assignment (different base types)
fs/ext4/extents.c:5318:18:    expected unsigned int [unsigned] [usertype] ex_start
fs/ext4/extents.c:5318:18:    got restricted __le32 [usertype] ee_block
fs/ext4/extents.c:5319:24: warning: restricted __le32 degrades to integer
fs/ext4/extents.c:5334:31: warning: incorrect type in assignment (different base types)
...

Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b2d3869b576..f24ef869760 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5229,11 +5229,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 			if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
 				update = 1;
 
-			*start = ex_last->ee_block +
+			*start = le32_to_cpu(ex_last->ee_block) +
 				ext4_ext_get_actual_len(ex_last);
 
 			while (ex_start <= ex_last) {
-				ex_start->ee_block -= shift;
+				le32_add_cpu(&ex_start->ee_block, -shift);
 				if (ex_start >
 					EXT_FIRST_EXTENT(path[depth].p_hdr)) {
 					if (ext4_ext_try_to_merge_right(inode,
@@ -5255,7 +5255,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 		if (err)
 			goto out;
 
-		path[depth].p_idx->ei_block -= shift;
+		le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
 		err = ext4_ext_dirty(handle, inode, path + depth);
 		if (err)
 			goto out;
@@ -5300,7 +5300,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 		return ret;
 	}
 
-	stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
+	stop_block = le32_to_cpu(extent->ee_block) +
+			ext4_ext_get_actual_len(extent);
 	ext4_ext_drop_refs(path);
 	kfree(path);
 
@@ -5315,8 +5316,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 	path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
 	depth = path->p_depth;
 	extent =  path[depth].p_ext;
-	ex_start = extent->ee_block;
-	ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
+	ex_start = le32_to_cpu(extent->ee_block);
+	ex_end = le32_to_cpu(extent->ee_block) +
+			ext4_ext_get_actual_len(extent);
 	ext4_ext_drop_refs(path);
 	kfree(path);
 
@@ -5331,7 +5333,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 			return PTR_ERR(path);
 		depth = path->p_depth;
 		extent = path[depth].p_ext;
-		current_block = extent->ee_block;
+		current_block = le32_to_cpu(extent->ee_block);
 		if (start > current_block) {
 			/* Hole, move to the next extent */
 			ret = mext_next_extent(inode, path, &extent);
-- 
cgit v1.2.3-70-g09d2


From 40c406c74eb9eed58ae7d4d12a0197f7279c9499 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 12 Apr 2014 22:53:53 -0400
Subject: ext4: COLLAPSE_RANGE only works on extent-based files

Unfortunately, we weren't checking to make sure of this the inode was
extent-based before attempt operate on it.  Hilarity ensues.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Namjae Jeon <namjae.jeon@samsung.com>
---
 fs/ext4/extents.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f24ef869760..96e0a4bc8fa 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4878,9 +4878,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (mode & FALLOC_FL_PUNCH_HOLE)
 		return ext4_punch_hole(inode, offset, len);
 
-	if (mode & FALLOC_FL_COLLAPSE_RANGE)
-		return ext4_collapse_range(inode, offset, len);
-
 	ret = ext4_convert_inline_data(inode);
 	if (ret)
 		return ret;
@@ -4892,6 +4889,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 		return -EOPNOTSUPP;
 
+	if (mode & FALLOC_FL_COLLAPSE_RANGE)
+		return ext4_collapse_range(inode, offset, len);
+
 	if (mode & FALLOC_FL_ZERO_RANGE)
 		return ext4_zero_range(file, offset, len, mode);
 
-- 
cgit v1.2.3-70-g09d2


From 8dc79ec4c0537e1b83c0739af82a7babefb30012 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Sun, 13 Apr 2014 15:05:42 -0400
Subject: ext4: fix error handling in ext4_ext_shift_extents

Fix error handling by adding some.  :-)

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 96e0a4bc8fa..38be06354b8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5314,11 +5314,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 	 * enough to accomodate the shift.
 	 */
 	path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
 	depth = path->p_depth;
 	extent =  path[depth].p_ext;
-	ex_start = le32_to_cpu(extent->ee_block);
-	ex_end = le32_to_cpu(extent->ee_block) +
+	if (extent) {
+		ex_start = le32_to_cpu(extent->ee_block);
+		ex_end = le32_to_cpu(extent->ee_block) +
 			ext4_ext_get_actual_len(extent);
+	} else {
+		ex_start = 0;
+		ex_end = 0;
+	}
 	ext4_ext_drop_refs(path);
 	kfree(path);
 
-- 
cgit v1.2.3-70-g09d2


From a18ed359bdddcded4f97ff5e2f07793ff9336913 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Sun, 13 Apr 2014 15:41:13 -0400
Subject: ext4: always check ext4_ext_find_extent result

Where are some places where logic guaranties us that extent we are
searching exits, but this may not be true due to on-disk data
corruption. If such corruption happens we must prevent possible
null pointer dereferences.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 38be06354b8..64b400356ca 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
 		return PTR_ERR(path);
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
+	if (!ex) {
+		EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+				 (unsigned long) map->m_lblk);
+		return -EIO;
+	}
 	uninitialized = ext4_ext_is_uninitialized(ex);
 	split_flag1 = 0;
 
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
 		}
 		depth = ext_depth(inode);
 		ex = path[depth].p_ext;
+		if (!ex) {
+			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+					 (unsigned long) map->m_lblk);
+			err = -EIO;
+			goto out;
+		}
 	}
 
 	err = ext4_ext_get_access(handle, inode, path + depth);
@@ -5340,6 +5351,12 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 			return PTR_ERR(path);
 		depth = path->p_depth;
 		extent = path[depth].p_ext;
+		if (!extent) {
+			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+					 (unsigned long) start);
+			return -EIO;
+		}
+
 		current_block = le32_to_cpu(extent->ee_block);
 		if (start > current_block) {
 			/* Hole, move to the next extent */
-- 
cgit v1.2.3-70-g09d2


From 694c793fc1ade0946149c5f8d43f71e0728c4e81 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 18 Apr 2014 10:21:15 -0400
Subject: ext4: use truncate_pagecache() in collapse range

We should be using truncate_pagecache() instead of
truncate_pagecache_range() in the collapse range because we're
truncating page cache from offset to the end of file.
truncate_pagecache() also get rid of the private COWed pages from the
range because we're going to shift the end of the file.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 64b400356ca..3de9b2d7028 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5437,7 +5437,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	truncate_pagecache_range(inode, offset, -1);
+	truncate_pagecache(inode, offset);
 
 	/* Wait for existing dio to complete */
 	ext4_inode_block_unlocked_dio(inode);
-- 
cgit v1.2.3-70-g09d2


From 1a66c7c3bea52ba0f7596b8940d74fce75281d16 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 18 Apr 2014 10:41:52 -0400
Subject: ext4: use filemap_write_and_wait_range() correctly in collapse range

Currently we're passing -1 as lend argumnet for
filemap_write_and_wait_range() which is wrong since lend is signed type
so it would cause some confusion and we might not write_and_wait for the
entire range we're expecting to write.

Fix it by using LLONG_MAX instead.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3de9b2d7028..f4a676908b0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5415,7 +5415,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	}
 
 	/* Write out all dirty pages */
-	ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
+	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-70-g09d2


From 2c1d23289bc2f7cfa358bc856b87a992dcb11ad5 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 18 Apr 2014 10:43:21 -0400
Subject: ext4: fix removing status extents in ext4_collapse_range()

Currently in ext4_collapse_range() when calling ext4_es_remove_extent() to
remove status extents we're passing (EXT_MAX_BLOCKS - punch_start - 1)
in order to remove all extents from start of the collapse range to the
end of the file. However this is wrong because we might miss the
possible extent covering the last block of the file.

Fix it by removing the -1.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f4a676908b0..c6f624582d3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5454,7 +5454,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	ext4_discard_preallocations(inode);
 
 	ret = ext4_es_remove_extent(inode, punch_start,
-				    EXT_MAX_BLOCKS - punch_start - 1);
+				    EXT_MAX_BLOCKS - punch_start);
 	if (ret) {
 		up_write(&EXT4_I(inode)->i_data_sem);
 		goto out_stop;
-- 
cgit v1.2.3-70-g09d2


From 9337d5d31ab798f0c74150506371551a9195251a Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 18 Apr 2014 10:48:25 -0400
Subject: ext4: no need to truncate pagecache twice in collapse range

We're already calling truncate_pagecache() before we attempt to do any
actual job so there is not need to truncate pagecache once more using
truncate_setsize() after we're finished.

Remove truncate_setsize() and replace it just with i_size_write() note
that we're holding appropriate locks.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c6f624582d3..3ee60e2e2ac 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5474,7 +5474,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	}
 
 	new_size = i_size_read(inode) - len;
-	truncate_setsize(inode, new_size);
+	i_size_write(inode, new_size);
 	EXT4_I(inode)->i_disksize = new_size;
 
 	ext4_discard_preallocations(inode);
-- 
cgit v1.2.3-70-g09d2


From ef24f6c234de9a03aed9368163dbaad9a4f6391f Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 18 Apr 2014 10:50:23 -0400
Subject: ext4: discard preallocations after removing space

Currently in ext4_collapse_range() and ext4_punch_hole() we're
discarding preallocation twice. Once before we attempt to do any changes
and second time after we're done with the changes.

While the second call to ext4_discard_preallocations() in
ext4_punch_hole() case is not needed, we need to discard preallocation
right after ext4_ext_remove_space() in collapse range case because in
the case we had to restart a transaction in the middle of removing space
we might have new preallocations created.

Remove unneeded ext4_discard_preallocations() ext4_punch_hole() and move
it to the better place in ext4_collapse_range()

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 +-
 fs/ext4/inode.c   | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3ee60e2e2ac..eb7be8f08e1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5465,6 +5465,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		up_write(&EXT4_I(inode)->i_data_sem);
 		goto out_stop;
 	}
+	ext4_discard_preallocations(inode);
 
 	ret = ext4_ext_shift_extents(inode, handle, punch_stop,
 				     punch_stop - punch_start);
@@ -5477,7 +5478,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	i_size_write(inode, new_size);
 	EXT4_I(inode)->i_disksize = new_size;
 
-	ext4_discard_preallocations(inode);
 	up_write(&EXT4_I(inode)->i_data_sem);
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b74cfd2a42e..d7b7462a0e1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3621,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 		ret = ext4_free_hole_blocks(handle, inode, first_block,
 					    stop_block);
 
-	ext4_discard_preallocations(inode);
 	up_write(&EXT4_I(inode)->i_data_sem);
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
-- 
cgit v1.2.3-70-g09d2


From 6dd834effc12ba71092d9d1e4944530234b58ab1 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 18 Apr 2014 10:55:24 -0400
Subject: ext4: fix extent merging in ext4_ext_shift_path_extents()

There is a bug in ext4_ext_shift_path_extents() where if we actually
manage to merge a extent we would skip shifting the next extent. This
will result in in one extent in the extent tree not being properly
shifted.

This is causing failure in various xfstests tests using fsx or fsstress
with collapse range support. It will also cause file system corruption
which looks something like:

 e2fsck 1.42.9 (4-Feb-2014)
 Pass 1: Checking inodes, blocks, and sizes
 Inode 20 has out of order extents
        (invalid logical block 3, physical block 492938, len 2)
 Clear? yes
 ...

when running e2fsck.

It's also very easily reproducible just by running fsx without any
parameters. I can usually hit the problem within a minute.

Fix it by increasing ex_start only if we're not merging the extent.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
---
 fs/ext4/extents.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index eb7be8f08e1..d0860f2d36d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5245,13 +5245,14 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 
 			while (ex_start <= ex_last) {
 				le32_add_cpu(&ex_start->ee_block, -shift);
-				if (ex_start >
-					EXT_FIRST_EXTENT(path[depth].p_hdr)) {
-					if (ext4_ext_try_to_merge_right(inode,
-						path, ex_start - 1))
-						ex_last--;
-				}
-				ex_start++;
+				/* Try to merge to the left. */
+				if ((ex_start >
+				     EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
+				    ext4_ext_try_to_merge_right(inode,
+							path, ex_start - 1))
+					ex_last--;
+				else
+					ex_start++;
 			}
 			err = ext4_ext_dirty(handle, inode, path + depth);
 			if (err)
-- 
cgit v1.2.3-70-g09d2


From 6c5e73d3a26b73bfcac0b4a932cb918177d067f2 Mon Sep 17 00:00:00 2001
From: jon ernst <jonernst07@gmail.com>
Date: Fri, 18 Apr 2014 11:50:35 -0400
Subject: ext4: enforce we are operating on a regular file in ext4_zero_range()

Signed-off-by: Jon Ernst <jonernst07@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d0860f2d36d..2f49b12a4c4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4741,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
 	trace_ext4_zero_range(inode, offset, len, mode);
 
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
 	/*
 	 * Write out all dirty pages to avoid race conditions
 	 * Then release them.
-- 
cgit v1.2.3-70-g09d2


From 86f1ca3889142d5959362c5694db3f3dc26f377a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 18 Apr 2014 11:52:11 -0400
Subject: ext4: use EINVAL if not a regular file in ext4_collapse_range()

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2f49b12a4c4..9b9251adb40 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5404,7 +5404,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		return -EINVAL;
 
 	if (!S_ISREG(inode->i_mode))
-		return -EOPNOTSUPP;
+		return -EINVAL;
 
 	trace_ext4_collapse_range(inode, offset, len);
 
-- 
cgit v1.2.3-70-g09d2


From a8680e0d5efd46aa54d7085e5b4a268f726922c7 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Sat, 19 Apr 2014 16:37:31 -0400
Subject: ext4: fix COLLAPSE_RANGE failure with 1KB block size

When formatting with 1KB or 2KB(not aligned with PAGE SIZE) block
size, xfstests generic/075 and 091 are failing. The offset supplied to
function truncate_pagecache_range is block size aligned. In this
function start offset is re-aligned to PAGE_SIZE by rounding_up to the
next page boundary.  Due to this rounding up, old data remains in the
page cache when blocksize is less than page size and start offset is
not aligned with page size.  In case of collapse range, we need to
align start offset to page size boundary by doing a round down
operation instead of round up.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9b9251adb40..d6bca2a1deb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5395,7 +5395,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	ext4_lblk_t punch_start, punch_stop;
 	handle_t *handle;
 	unsigned int credits;
-	loff_t new_size;
+	loff_t new_size, ioffset;
 	int ret;
 
 	/* Collapse range works only on fs block size aligned offsets. */
@@ -5418,8 +5418,15 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
+	/*
+	 * Need to round down offset to be aligned with page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+
 	/* Write out all dirty pages */
-	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+					   LLONG_MAX);
 	if (ret)
 		return ret;
 
@@ -5441,7 +5448,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	truncate_pagecache(inode, offset);
+	truncate_pagecache(inode, ioffset);
 
 	/* Wait for existing dio to complete */
 	ext4_inode_block_unlocked_dio(inode);
-- 
cgit v1.2.3-70-g09d2


From 0a04b248532b358b27a8da050642da6f5f304b03 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Sat, 19 Apr 2014 16:38:21 -0400
Subject: ext4: disable COLLAPSE_RANGE for bigalloc

Once COLLAPSE RANGE is be disable for ext4 with bigalloc feature till finding
root-cause of problem. It will be enable with fixing that regression of
xfstest(generic 075 and 091) again.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/ext4/extents.c')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d6bca2a1deb..01b0c208f62 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5406,6 +5406,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	if (!S_ISREG(inode->i_mode))
 		return -EINVAL;
 
+	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
+		return -EOPNOTSUPP;
+
 	trace_ext4_collapse_range(inode, offset, len);
 
 	punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
-- 
cgit v1.2.3-70-g09d2