From 4adb6ab3e0fa71363a5ef229544b2d17de6600d7 Mon Sep 17 00:00:00 2001 From: Kazuya Mio Date: Mon, 7 Apr 2014 10:53:28 -0400 Subject: ext4: FIBMAP ioctl causes BUG_ON due to handle EXT_MAX_BLOCKS When we try to get 2^32-1 block of the file which has the extent (ee_block=2^32-2, ee_len=1) with FIBMAP ioctl, it causes BUG_ON in ext4_ext_put_gap_in_cache(). To avoid the problem, ext4_map_blocks() needs to check the file logical block number. ext4_ext_put_gap_in_cache() called via ext4_map_blocks() cannot handle 2^32-1 because the maximum file logical block number is 2^32-2. Note that ext4_ind_map_blocks() returns -EIO when the block number is invalid. So ext4_map_blocks() should also return the same errno. Signed-off-by: Kazuya Mio Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5b0d2c7d540..93f16c5e8a8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, if (unlikely(map->m_len > INT_MAX)) map->m_len = INT_MAX; + /* We can handle the block number less than EXT_MAX_BLOCKS */ + if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) + return -EIO; + /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { ext4_es_lru_add(inode); -- cgit v1.2.3-70-g09d2 From 87f7e41636ff201148443551d06bc74497160aac Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 8 Apr 2014 11:38:28 -0400 Subject: ext4: update PF_MEMALLOC handling in ext4_write_inode() The special handling of PF_MEMALLOC callers in ext4_write_inode() shouldn't be necessary as there shouldn't be any. Warn about it. Also update comment before the function as it seems somewhat outdated. (Changes modeled on an ext3 patch posted by Jan Kara to the linux-ext4 mailing list on Februaryt 28, 2014, which apparently never went into the ext3 tree.) Signed-off-by: "Theodore Ts'o" Cc: Jan Kara --- fs/ext4/inode.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 93f16c5e8a8..7b93df9aa18 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4427,21 +4427,20 @@ out_brelse: * * We are called from a few places: * - * - Within generic_file_write() for O_SYNC files. + * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. * Here, there will be no transaction running. We wait for any running * transaction to commit. * - * - Within sys_sync(), kupdate and such. - * We wait on commit, if tol to. + * - Within flush work (sys_sync(), kupdate and such). + * We wait on commit, if told to. * - * - Within prune_icache() (PF_MEMALLOC == true) - * Here we simply return. We can't afford to block kswapd on the - * journal commit. + * - Within iput_final() -> write_inode_now() + * We wait on commit, if told to. * * In all cases it is actually safe for us to return without doing anything, * because the inode has been copied into a raw inode buffer in - * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for - * knfsd. + * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL + * writeback. * * Note that we are absolutely dependent upon all inode dirtiers doing the * right thing: they *must* call mark_inode_dirty() after dirtying info in @@ -4453,15 +4452,15 @@ out_brelse: * stuff(); * inode->i_size = expr; * - * is in error because a kswapd-driven write_inode() could occur while - * `stuff()' is running, and the new i_size will be lost. Plus the inode - * will no longer be on the superblock's dirty inode list. + * is in error because write_inode() could occur while `stuff()' is running, + * and the new i_size will be lost. Plus the inode will no longer be on the + * superblock's dirty inode list. */ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; - if (current->flags & PF_MEMALLOC) + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) return 0; if (EXT4_SB(inode->i_sb)->s_journal) { -- cgit v1.2.3-70-g09d2 From 622cad1325e404598fe3b148c3fa640dbaabc235 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 11 Apr 2014 10:35:17 -0400 Subject: ext4: move ext4_update_i_disksize() into mpage_map_and_submit_extent() The function ext4_update_i_disksize() is used in only one place, in the function mpage_map_and_submit_extent(). Move its code to simplify the code paths, and also move the call to ext4_mark_inode_dirty() into the i_data_sem's critical region, to be consistent with all of the other places where we update i_disksize. That way, we also keep the raw_inode's i_disksize protected, to avoid the following race: CPU #1 CPU #2 down_write(&i_data_sem) Modify i_disk_size up_write(&i_data_sem) down_write(&i_data_sem) Modify i_disk_size Copy i_disk_size to on-disk inode up_write(&i_data_sem) Copy i_disk_size to on-disk inode Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/ext4/ext4.h | 17 ----------------- fs/ext4/inode.c | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 20 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f1c65dc7cc0..66946aa6212 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) up_write(&EXT4_I(inode)->i_data_sem); } -/* - * Update i_disksize after writeback has been started. Races with truncate - * are avoided by checking i_size under i_data_sem. - */ -static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize) -{ - loff_t i_size; - - down_write(&EXT4_I(inode)->i_data_sem); - i_size = i_size_read(inode); - if (newsize > i_size) - newsize = i_size; - if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; - up_write(&EXT4_I(inode)->i_data_sem); -} - struct ext4_group_info { unsigned long bb_state; struct rb_root bb_free_root; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7b93df9aa18..f023f0cb46f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2247,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle, return err; } while (map->m_len); - /* Update on-disk size after IO is submitted */ + /* + * Update on-disk size after IO is submitted. Races with + * truncate are avoided by checking i_size under i_data_sem. + */ disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; if (disksize > EXT4_I(inode)->i_disksize) { int err2; - - ext4_wb_update_i_disksize(inode, disksize); + loff_t i_size; + + down_write(&EXT4_I(inode)->i_data_sem); + i_size = i_size_read(inode); + if (disksize > i_size) + disksize = i_size; + if (disksize > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = disksize; err2 = ext4_mark_inode_dirty(handle, inode); + up_write(&EXT4_I(inode)->i_data_sem); if (err2) ext4_error(inode->i_sb, "Failed to mark inode %lu dirty", -- cgit v1.2.3-70-g09d2 From 9ef06cec7c96f6bf59f1dd8b64b9645820099051 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sat, 12 Apr 2014 09:47:00 -0400 Subject: ext4: remove unnecessary check for APPEND and IMMUTABLE All the checks IS_APPEND and IS_IMMUTABLE for the fallocate operation on the inode are done in vfs. No need to do this again in ext4. Remove it. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 6 ------ fs/ext4/inode.c | 6 +----- 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index be1e56cbbf3..ed4ec48239b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5398,12 +5398,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) /* Take mutex lock */ mutex_lock(&inode->i_mutex); - /* It's not possible punch hole on append only file */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { - ret = -EPERM; - goto out_mutex; - } - if (IS_SWAPFILE(inode)) { ret = -ETXTBSY; goto out_mutex; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f023f0cb46f..e2bba76f0d7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3541,11 +3541,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } mutex_lock(&inode->i_mutex); - /* It's not possible punch hole on append only file */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { - ret = -EPERM; - goto out_mutex; - } + if (IS_SWAPFILE(inode)) { ret = -ETXTBSY; goto out_mutex; -- cgit v1.2.3-70-g09d2 From 0790b31b69374ddadefebb156251b319e5b43345 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sat, 12 Apr 2014 10:05:37 -0400 Subject: fs: disallow all fallocate operation on active swapfile Currently some file system have IS_SWAPFILE check in their fallocate implementations and some do not. However we should really prevent any fallocate operation on swapfile so move the check to vfs and remove the redundant checks from the file systems fallocate implementations. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ceph/file.c | 3 --- fs/ext4/extents.c | 5 ----- fs/ext4/inode.c | 5 ----- fs/open.c | 7 +++++++ 4 files changed, 7 insertions(+), 13 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 09c7afe32e4..596e6cc9f9c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1215,9 +1215,6 @@ static long ceph_fallocate(struct file *file, int mode, if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - mutex_lock(&inode->i_mutex); if (ceph_snap(inode) != CEPH_NOSNAP) { diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ac5460d0d13..b2d3869b576 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5405,11 +5405,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - if (IS_SWAPFILE(inode)) { - ret = -ETXTBSY; - goto out_mutex; - } - /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ret = -EOPNOTSUPP; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e2bba76f0d7..b74cfd2a42e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3542,11 +3542,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) mutex_lock(&inode->i_mutex); - if (IS_SWAPFILE(inode)) { - ret = -ETXTBSY; - goto out_mutex; - } - /* No need to punch hole beyond i_size */ if (offset >= inode->i_size) goto out_mutex; diff --git a/fs/open.c b/fs/open.c index adf34202213..7b823daa6a9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -262,6 +262,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (IS_IMMUTABLE(inode)) return -EPERM; + /* + * We can not allow to do any fallocate operation on an active + * swapfile + */ + if (IS_SWAPFILE(inode)) + ret = -ETXTBSY; + /* * Revalidate the write permissions, in case security policy has * changed since the files were opened. -- cgit v1.2.3-70-g09d2 From ef24f6c234de9a03aed9368163dbaad9a4f6391f Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 18 Apr 2014 10:50:23 -0400 Subject: ext4: discard preallocations after removing space Currently in ext4_collapse_range() and ext4_punch_hole() we're discarding preallocation twice. Once before we attempt to do any changes and second time after we're done with the changes. While the second call to ext4_discard_preallocations() in ext4_punch_hole() case is not needed, we need to discard preallocation right after ext4_ext_remove_space() in collapse range case because in the case we had to restart a transaction in the middle of removing space we might have new preallocations created. Remove unneeded ext4_discard_preallocations() ext4_punch_hole() and move it to the better place in ext4_collapse_range() Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- fs/ext4/inode.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3ee60e2e2ac..eb7be8f08e1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5465,6 +5465,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } + ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start); @@ -5477,7 +5478,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) i_size_write(inode, new_size); EXT4_I(inode)->i_disksize = new_size; - ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b74cfd2a42e..d7b7462a0e1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3621,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) ret = ext4_free_hole_blocks(handle, inode, first_block, stop_block); - ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); -- cgit v1.2.3-70-g09d2