From f73953c0656f2db9073c585c4df2884a8ecd101e Mon Sep 17 00:00:00 2001 From: Thiemo Nagel Date: Tue, 7 Apr 2009 18:46:47 -0400 Subject: ext4: Fix big-endian problem in __ext4_check_blockref() Commit fe2c8191 introduced a regression on big-endian system, because the checks to make sure block references in non-extent inodes are valid failed to use le32_to_cpu(). Reported-by: Alexander Beregalov Signed-off-by: Thiemo Nagel Tested-by: Alexander Beregalov Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a2e7952bc5f..c6bd6ced3bb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -372,16 +372,16 @@ static int ext4_block_to_path(struct inode *inode, } static int __ext4_check_blockref(const char *function, struct inode *inode, - unsigned int *p, unsigned int max) { + __le32 *p, unsigned int max) { unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); - unsigned int *bref = p; + __le32 *bref = p; while (bref < p+max) { - if (unlikely(*bref >= maxblocks)) { + if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { ext4_error(inode->i_sb, function, "block reference %u >= max (%u) " "in inode #%lu, offset=%d", - *bref, maxblocks, + le32_to_cpu(*bref), maxblocks, inode->i_ino, (int)(bref-p)); return -EIO; } -- cgit v1.2.3-70-g09d2 From 485c26ec70f823f2a9cf45982b724893e53a859e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 13:43:20 -0400 Subject: ext4: Fix softlockup caused by illegal i_file_acl value in on-disk inode If the block containing external extended attributes (which is stored in i_file_acl and i_file_acl_high) is larger than the on-disk filesystem, the process which tried to access the extended attributes will endlessly issue kernel printks complaining that "__find_get_block_slow() failed", locking up that CPU until the system is forcibly rebooted. So when we read in the inode, make sure the i_file_acl value is legal, and if not, flag the filesystem as being corrupted. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c6bd6ced3bb..cab75bbcd57 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4409,7 +4409,17 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } - if (ei->i_flags & EXT4_EXTENTS_FL) { + if (ei->i_file_acl && + ((ei->i_file_acl < + (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + + EXT4_SB(sb)->s_gdb_count)) || + (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { + ext4_error(sb, __func__, + "bad extended attribute block %llu in inode #%lu", + ei->i_file_acl, inode->i_ino); + ret = -EIO; + goto bad_inode; + } else if (ei->i_flags & EXT4_EXTENTS_FL) { /* Validate extent which is part of inode */ ret = ext4_ext_check_inode(inode); } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || -- cgit v1.2.3-70-g09d2 From a9e817425dc0baede8ebe5fbc9984a640257432b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 16:11:18 -0400 Subject: ext4: Ignore i_file_acl_high unless EXT4_FEATURE_INCOMPAT_64BIT is present Don't try to look at i_file_acl_high unless the INCOMPAT_64BIT feature bit is set. The field is normally zero, but older versions of e2fsck didn't automatically check to make sure of this, so in the spirit of "be liberal in what you accept", don't look at i_file_acl_high unless we are using a 64-bit filesystem. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cab75bbcd57..11460037ea9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_flags = le32_to_cpu(raw_inode->i_flags); inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_HURD)) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; - } inode->i_size = ext4_isize(raw_inode); ei->i_disksize = inode->i_size; inode->i_generation = le32_to_cpu(raw_inode->i_generation); -- cgit v1.2.3-70-g09d2 From c4b5a614316c505922a522b2e35ba05ea3e08a7c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 18:45:35 -0400 Subject: ext4: Do not try to validate extents on special files The EXTENTS_FL flag should never be set on special files, but if it is, don't bother trying to validate that the extents tree is valid, since only files, directories, and non-fast symlinks will ever have an extent data structure. We perhaps should flag the filesystem as being corrupted if we see a special file (named pipes, device nodes, Unix domain sockets, etc.) with the EXTENTS_FL flag, but e2fsck doesn't currently check this case, so we'll just ignore this for now, since it's harmless. Without this fix, a special device with the extents flag is flagged as an error by the kernel, so it is impossible to access or delete the inode, but e2fsck doesn't see it as a problem, leading to confused/frustrated users. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 11460037ea9..e91f978c7f1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4407,6 +4407,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } + ret = 0; if (ei->i_file_acl && ((ei->i_file_acl < (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + @@ -4418,8 +4419,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EIO; goto bad_inode; } else if (ei->i_flags & EXT4_EXTENTS_FL) { - /* Validate extent which is part of inode */ - ret = ext4_ext_check_inode(inode); + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + (S_ISLNK(inode->i_mode) && + !ext4_inode_is_fast_symlink(inode))) + /* Validate extent which is part of inode */ + ret = ext4_ext_check_inode(inode); } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) { -- cgit v1.2.3-70-g09d2 From 9c1ee184a30394e54165fa4c15923cabd952c106 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 May 2009 18:36:58 -0400 Subject: ext4: Fix sub-block zeroing for writes into preallocated extents We need to mark the buffer_head mapping preallocated space as new during write_begin. Otherwise we don't zero out the page cache content properly for a partial write. This will cause file corruption with preallocation. Now that we mark the buffer_head new we also need to have a valid buffer_head blocknr so that unmap_underlying_metadata() unmaps the correct block. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 ++ fs/ext4/inode.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e4033215834..172656c2a3b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2875,6 +2875,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (allocated > max_blocks) allocated = max_blocks; set_buffer_unwritten(bh_result); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; goto out2; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e91f978c7f1..d4b634ae06b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2323,6 +2323,13 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, set_buffer_delay(bh_result); } else if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); + /* + * With sub-block writes into unwritten extents + * we also need to mark the buffer as new so that + * the unwritten parts of the buffer gets correctly zeroed. + */ + if (buffer_unwritten(bh_result)) + set_buffer_new(bh_result); ret = 0; } -- cgit v1.2.3-70-g09d2 From 33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 12 May 2009 14:40:37 -0400 Subject: ext4: Use a fake block number for delayed new buffer_head Use a very large unsigned number (~0xffff) as as the fake block number for the delayed new buffer. The VFS should never try to write out this number, but if it does, this will make it obvious. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d4b634ae06b..0ac31a06422 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2297,6 +2297,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret = 0; + sector_t invalid_block = ~((sector_t) 0xffff); + + if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) + invalid_block = ~0; BUG_ON(create == 0); BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); @@ -2318,7 +2322,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* not enough space to reserve */ return ret; - map_bh(bh_result, inode->i_sb, 0); + map_bh(bh_result, inode->i_sb, invalid_block); set_buffer_new(bh_result); set_buffer_delay(bh_result); } else if (ret > 0) { -- cgit v1.2.3-70-g09d2 From 2a8964d63d50dd2d65d71d342bc7fb6ef4117614 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 14 May 2009 17:05:39 -0400 Subject: ext4: Clear the unwritten buffer_head flag after the extent is initialized The BH_Unwritten flag indicates that the buffer is allocated on disk but has not been written; that is, the disk was part of a persistent preallocation area. That flag should only be set when a get_blocks() function is looking up a inode's logical to physical block mapping. When ext4_get_blocks_wrap() is called with create=1, the uninitialized extent is converted into an initialized one, so the BH_Unwritten flag is no longer appropriate. Hence, we need to make sure the BH_Unwritten is not left set, since the combination of BH_Mapped and BH_Unwritten is not allowed; among other things, it will result ext4's get_block() to be called over and over again during the write_begin phase of write(2). Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0ac31a06422..2a9ffd528dd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1149,6 +1149,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, int retval; clear_buffer_mapped(bh); + clear_buffer_unwritten(bh); /* * Try to see if we can get the block without requesting @@ -1178,6 +1179,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, if (retval > 0 && buffer_mapped(bh)) return retval; + /* + * When we call get_blocks without the create flag, the + * BH_Unwritten flag could have gotten set if the blocks + * requested were part of a uninitialized extent. We need to + * clear this flag now that we are committed to convert all or + * part of the uninitialized extent to be an initialized + * extent. This is because we need to avoid the combination + * of BH_Unwritten and BH_Mapped flags being simultaneously + * set on the buffer_head. + */ + clear_buffer_unwritten(bh); + /* * New blocks allocate and/or writing to uninitialized extent * will possibly result in updating i_data, so we take -- cgit v1.2.3-70-g09d2