From 9378c6768e4fca48971e7b6a9075bc006eda981d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2014 10:52:57 -0400 Subject: ext4: fix overflow when updating superblock backups after resize When there are no meta block groups update_backups() will compute the backup block in 32-bit arithmetics thus possibly overflowing the block number and corrupting the filesystem. OTOH filesystems without meta block groups larger than 16 TB should be rare. Fix the problem by doing the counting in 64-bit arithmetics. Coverity-id: 741252 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Reviewed-by: Lukas Czerner --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f298c60f907..ca4588388fc 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, break; if (meta_bg == 0) - backup_block = group * bpg + blk_off; + backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; else backup_block = (ext4_group_first_block_no(sb, group) + ext4_bg_has_super(sb, group)); -- cgit v1.2.3-70-g09d2 From 599a9b77ab289d85c2d5c8607624efbe1f552b0f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2014 10:53:16 -0400 Subject: ext4: fix oops when loading block bitmap failed When we fail to load block bitmap in __ext4_new_inode() we will dereference NULL pointer in ext4_journal_get_write_access(). So check for error from ext4_read_block_bitmap(). Coverity-id: 989065 Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/ialloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8012a5daf40..ac644c31ca6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -887,6 +887,10 @@ got: struct buffer_head *block_bitmap_bh; block_bitmap_bh = ext4_read_block_bitmap(sb, group); + if (!block_bitmap_bh) { + err = -EIO; + goto out; + } BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); err = ext4_journal_get_write_access(handle, block_bitmap_bh); if (err) { -- cgit v1.2.3-70-g09d2 From 98c1a7593fa355fda7f5a5940c8bf5326ca964ba Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 30 Oct 2014 10:53:16 -0400 Subject: ext4: enable journal checksum when metadata checksum feature enabled If metadata checksumming is turned on for the FS, we need to tell the journal to use checksumming too. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1eda6ab0ef9..5c11e2153e3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3526,6 +3526,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_EXT4_FS_POSIX_ACL set_opt(sb, POSIX_ACL); #endif + /* don't forget to enable journal_csum when metadata_csum is enabled. */ + if (ext4_has_metadata_csum(sb)) + set_opt(sb, JOURNAL_CHECKSUM); + if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) set_opt(sb, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) -- cgit v1.2.3-70-g09d2 From 6b992ff25658367089db4a82666e232b65d55eae Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 30 Oct 2014 10:53:16 -0400 Subject: ext4: disallow changing journal_csum option during remount ext4 does not permit changing the metadata or journal checksum feature flag while mounted. Until we decide to support that, don't allow a remount to change the journal_csum flag (right now we silently fail to change anything). Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5c11e2153e3..96059e0ef16 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4845,6 +4845,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ + test_opt(sb, JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "changing journal_checksum " + "during remount not supported"); + err = -EINVAL; + goto restore_opts; + } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { if (test_opt2(sb, EXPLICIT_DELALLOC)) { ext4_msg(sb, KERN_ERR, "can't mount with " -- cgit v1.2.3-70-g09d2 From 50460fe8c6d1d95b16427936e351f277a1c72d43 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 30 Oct 2014 10:53:16 -0400 Subject: ext4: remove extent status procfs files if journal load fails If we can't load the journal, remove the procfs files for the extent status information file to avoid leaking resources. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 96059e0ef16..2c9e6864abd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3947,7 +3947,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && !(sb->s_flags & MS_RDONLY)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) - goto failed_mount3; + goto failed_mount3a; /* * The first inode we look at is the journal inode. Don't try @@ -3956,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!test_opt(sb, NOLOAD) && EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) - goto failed_mount3; + goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { ext4_msg(sb, KERN_ERR, "required journal recovery " @@ -4244,6 +4244,7 @@ failed_mount_wq: jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } +failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: del_timer_sync(&sbi->s_err_report); -- cgit v1.2.3-70-g09d2 From a41537e69b4aa43f0fea02498c2595a81267383b Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 30 Oct 2014 10:53:16 -0400 Subject: ext4: prevent bugon on race between write/fcntl O_DIRECT flags can be toggeled via fcntl(F_SETFL). But this value checked twice inside ext4_file_write_iter() and __generic_file_write() which result in BUG_ON inside ext4_direct_IO. Let's initialize iocb->private unconditionally. TESTCASE: xfstest:generic/036 https://patchwork.ozlabs.org/patch/402445/ #TYPICAL STACK TRACE: kernel BUG at fs/ext4/inode.c:2960! invalid opcode: 0000 [#1] SMP Modules linked in: brd iTCO_wdt lpc_ich mfd_core igb ptp dm_mirror dm_region_hash dm_log dm_mod CPU: 6 PID: 5505 Comm: aio-dio-fcntl-r Not tainted 3.17.0-rc2-00176-gff5c017 #161 Hardware name: Intel Corporation W2600CR/W2600CR, BIOS SE5C600.86B.99.99.x028.061320111235 06/13/2011 task: ffff88080e95a7c0 ti: ffff88080f908000 task.ti: ffff88080f908000 RIP: 0010:[] [] ext4_direct_IO+0x162/0x3d0 RSP: 0018:ffff88080f90bb58 EFLAGS: 00010246 RAX: 0000000000000400 RBX: ffff88080fdb2a28 RCX: 00000000a802c818 RDX: 0000040000080000 RSI: ffff88080d8aeb80 RDI: 0000000000000001 RBP: ffff88080f90bbc8 R08: 0000000000000000 R09: 0000000000001581 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88080d8aeb80 R13: ffff88080f90bbf8 R14: ffff88080fdb28c8 R15: ffff88080fdb2a28 FS: 00007f23b2055700(0000) GS:ffff880818400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f23b2045000 CR3: 000000080cedf000 CR4: 00000000000407e0 Stack: ffff88080f90bb98 0000000000000000 7ffffffffffffffe ffff88080fdb2c30 0000000000000200 0000000000000200 0000000000000001 0000000000000200 ffff88080f90bbc8 ffff88080fdb2c30 ffff88080f90be08 0000000000000200 Call Trace: [] generic_file_direct_write+0xed/0x180 [] __generic_file_write_iter+0x222/0x370 [] ext4_file_write_iter+0x34b/0x400 [] ? aio_run_iocb+0x239/0x410 [] ? aio_run_iocb+0x239/0x410 [] ? local_clock+0x25/0x30 [] ? __lock_acquire+0x274/0x700 [] ? ext4_unwritten_wait+0xb0/0xb0 [] aio_run_iocb+0x286/0x410 [] ? local_clock+0x25/0x30 [] ? lock_release_holdtime+0x29/0x190 [] ? lookup_ioctx+0x4b/0xf0 [] do_io_submit+0x55b/0x740 [] ? do_io_submit+0x3ca/0x740 [] SyS_io_submit+0x10/0x20 [] system_call_fastpath+0x16/0x1b Code: 01 48 8b 80 f0 01 00 00 48 8b 18 49 8b 45 10 0f 85 f1 01 00 00 48 03 45 c8 48 3b 43 48 0f 8f e3 01 00 00 49 83 7c 24 18 00 75 04 <0f> 0b eb fe f0 ff 83 ec 01 00 00 49 8b 44 24 18 8b 00 85 c0 89 RIP [] ext4_direct_IO+0x162/0x3d0 RSP Reported-by: Sasha Levin Signed-off-by: Theodore Ts'o Signed-off-by: Dmitry Monakhov Cc: stable@vger.kernel.org --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index aca7b24a443..8131be8c0af 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); } + iocb->private = &overwrite; if (o_direct) { blk_start_plug(&plug); - iocb->private = &overwrite; /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !aio_mutex && -- cgit v1.2.3-70-g09d2 From d48458d4a768cece43f80a081a26cf912877da9c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 30 Oct 2014 10:53:17 -0400 Subject: jbd2: use a better hash function for the revoke table The old hash function didn't work well for 64-bit block numbers, and used undefined (negative) shift right behavior. Use the generic 64-bit hash function instead. Signed-off-by: Theodore Ts'o Reported-by: Andrey Ryabinin --- fs/jbd2/revoke.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index d5e95a175c9..c6cbaef2bda 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -92,6 +92,7 @@ #include #include #include +#include #endif static struct kmem_cache *jbd2_revoke_record_cache; @@ -130,16 +131,9 @@ static void flush_descriptor(journal_t *, struct buffer_head *, int, int); /* Utility functions to maintain the revoke table */ -/* Borrowed from buffer.c: this is a tried and tested block hash function */ static inline int hash(journal_t *journal, unsigned long long block) { - struct jbd2_revoke_table_s *table = journal->j_revoke; - int hash_shift = table->hash_shift; - int hash = (int)block ^ (int)((block >> 31) >> 1); - - return ((hash << (hash_shift - 6)) ^ - (hash >> 13) ^ - (hash << (hash_shift - 12))) & (table->hash_size - 1); + return hash_64(block, journal->j_revoke->hash_shift); } static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, -- cgit v1.2.3-70-g09d2 From 6050d47adcadbb53582434d919ed7f038d936712 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2014 10:53:17 -0400 Subject: ext4: bail out from make_indexed_dir() on first error When ext4_handle_dirty_dx_node() or ext4_handle_dirty_dirent_node() fail, there's really something wrong with the fs and there's no point in continuing further. Just return error from make_indexed_dir() in that case. Also initialize frames array so that if we return early due to error, dx_release() doesn't try to dereference uninitialized memory (which could happen also due to error in do_split()). Coverity-id: 741300 Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/namei.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 123798c5ac3..426211882f7 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1816,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; ext4fs_dirhash(name, namelen, &hinfo); + memset(frames, 0, sizeof(frames)); frame = frames; frame->entries = entries; frame->at = entries; frame->bh = bh; bh = bh2; - ext4_handle_dirty_dx_node(handle, dir, frame->bh); - ext4_handle_dirty_dirent_node(handle, dir, bh); + retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); + if (retval) + goto out_frames; + retval = ext4_handle_dirty_dirent_node(handle, dir, bh); + if (retval) + goto out_frames; de = do_split(handle,dir, &bh, frame, &hinfo); if (IS_ERR(de)) { - /* - * Even if the block split failed, we have to properly write - * out all the changes we did so far. Otherwise we can end up - * with corrupted filesystem. - */ - ext4_mark_inode_dirty(handle, dir); - dx_release(frames); - return PTR_ERR(de); + retval = PTR_ERR(de); + goto out_frames; } dx_release(frames); retval = add_dirent_to_buf(handle, dentry, inode, de, bh); brelse(bh); return retval; +out_frames: + /* + * Even if the block split failed, we have to properly write + * out all the changes we did so far. Otherwise we can end up + * with corrupted filesystem. + */ + ext4_mark_inode_dirty(handle, dir); + dx_release(frames); + return retval; } /* -- cgit v1.2.3-70-g09d2 From 4f879ca687a5f2473b952937ce92c795a39019b4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2014 10:53:17 -0400 Subject: ext4: bail early when clearing inode journal flag fails When clearing inode journal flag, we call jbd2_journal_flush() to force all the journalled data to their final locations. Currently we ignore when this fails and continue clearing inode journal flag. This isn't a big problem because when jbd2_journal_flush() fails, journal is likely aborted anyway. But it can still lead to somewhat confusing results so rather bail out early. Coverity-id: 989044 Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e9777f93cf0..3356ab5395f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4959,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (val) ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); else { - jbd2_journal_flush(journal); + err = jbd2_journal_flush(journal); + if (err < 0) { + jbd2_journal_unlock_updates(journal); + ext4_inode_resume_unlocked_dio(inode); + return err; + } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); } ext4_set_aops(inode); -- cgit v1.2.3-70-g09d2 From ae9e9c6aeea6f91ccb4fb369d7dd8f1a8b5f6a58 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2014 10:53:17 -0400 Subject: ext4: make ext4_ext_convert_to_initialized() return proper number of blocks ext4_ext_convert_to_initialized() can return more blocks than are actually allocated from map->m_lblk in case where initial part of the on-disk extent is zeroed out. Luckily this doesn't have serious consequences because the caller currently uses the return value only to unmap metadata buffers. Anyway this is a data corruption/exposure problem waiting to happen so fix it. Coverity-id: 1226848 Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 37043d0b2be..0b16fb4c06d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3603,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } } - allocated = ext4_split_extent(handle, inode, ppath, - &split_map, split_flag, flags); - if (allocated < 0) - err = allocated; - + err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, + flags); + if (err > 0) + err = 0; out: /* If we have gotten a failure, don't zero out status tree */ if (!err) -- cgit v1.2.3-70-g09d2