diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 66 | ||||
-rw-r--r-- | fs/ext4/dir.c | 3 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 35 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 7 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 4 | ||||
-rw-r--r-- | fs/ext4/extents.c | 14 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 8 | ||||
-rw-r--r-- | fs/ext4/file.c | 3 | ||||
-rw-r--r-- | fs/ext4/inline.c | 15 | ||||
-rw-r--r-- | fs/ext4/inode.c | 65 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 22 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 4 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 1 | ||||
-rw-r--r-- | fs/ext4/namei.c | 131 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 32 | ||||
-rw-r--r-- | fs/ext4/resize.c | 15 | ||||
-rw-r--r-- | fs/ext4/super.c | 18 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 9 |
19 files changed, 263 insertions, 191 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5c56785007e..0762d143e25 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -83,9 +83,9 @@ static inline int ext4_block_in_group(struct super_block *sb, /* Return the number of clusters used for file system metadata; this * represents the overhead needed by the file system. */ -unsigned ext4_num_overhead_clusters(struct super_block *sb, - ext4_group_t block_group, - struct ext4_group_desc *gdp) +static unsigned ext4_num_overhead_clusters(struct super_block *sb, + ext4_group_t block_group, + struct ext4_group_desc *gdp) { unsigned num_clusters; int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; @@ -176,9 +176,10 @@ static unsigned int num_clusters_in_group(struct super_block *sb, } /* Initializes an uninitialized block bitmap */ -void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) +static void ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, + ext4_group_t block_group, + struct ext4_group_desc *gdp) { unsigned int bit, bit_max; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -307,6 +308,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh) { + struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; ext4_fsblk_t blk; @@ -326,14 +328,14 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether block bitmap block number is set */ blk = ext4_block_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(offset, bh->b_data)) + if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; /* check whether the inode bitmap block number is set */ blk = ext4_inode_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(offset, bh->b_data)) + if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; @@ -341,18 +343,19 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, - offset + EXT4_SB(sb)->s_itb_per_group, - offset); - if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) + EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), + EXT4_B2C(sbi, offset)); + if (next_zero_bit < + EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group)) /* bad bitmap for inode tables */ return blk; return 0; } -void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - ext4_group_t block_group, - struct buffer_head *bh) +static void ext4_validate_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) { ext4_fsblk_t blk; struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); @@ -708,16 +711,6 @@ static inline int test_root(ext4_group_t a, int b) } } -static int ext4_group_sparse(ext4_group_t group) -{ - if (group <= 1) - return 1; - if (!(group & 1)) - return 0; - return (test_root(group, 7) || test_root(group, 5) || - test_root(group, 3)); -} - /** * ext4_bg_has_super - number of blocks used by the superblock in group * @sb: superblock for filesystem @@ -728,11 +721,26 @@ static int ext4_group_sparse(ext4_group_t group) */ int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) { - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + if (group == 0) + return 1; + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) { + if (group == le32_to_cpu(es->s_backup_bgs[0]) || + group == le32_to_cpu(es->s_backup_bgs[1])) + return 1; return 0; - return 1; + } + if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) + return 1; + if (!(group & 1)) + return 0; + if (test_root(group, 3) || (test_root(group, 5)) || + test_root(group, 7)) + return 1; + + return 0; } static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d638c57e996..ef1bed66c14 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -105,7 +105,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, static int ext4_readdir(struct file *file, struct dir_context *ctx) { unsigned int offset; - int i, stored; + int i; struct ext4_dir_entry_2 *de; int err; struct inode *inode = file_inode(file); @@ -133,7 +133,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return ret; } - stored = 0; offset = ctx->pos & (sb->s_blocksize - 1); while (ctx->pos < inode->i_size) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index eb37d76bf91..7cc5a0e2368 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -875,6 +875,8 @@ struct ext4_inode_info { struct inode vfs_inode; struct jbd2_inode *jinode; + spinlock_t i_raw_lock; /* protects updates to the raw inode */ + /* * File creation time. Its function is same as that of * struct timespec i_{a,c,m}time in the generic inode. @@ -1158,7 +1160,8 @@ struct ext4_super_block { __le32 s_usr_quota_inum; /* inode for tracking user quota */ __le32 s_grp_quota_inum; /* inode for tracking group quota */ __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ - __le32 s_reserved[108]; /* Padding to the end of the block */ + __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ + __le32 s_reserved[106]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -1504,6 +1507,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 +#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 @@ -1952,10 +1956,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb, extern ext4_group_t ext4_get_group_number(struct super_block *sb, ext4_fsblk_t block); -extern void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - ext4_group_t block_group, - struct buffer_head *bh); extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, @@ -1984,16 +1984,9 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, struct buffer_head *bh); extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group); -extern void ext4_init_block_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, - struct ext4_group_desc *desc); extern unsigned ext4_free_clusters_after_init(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp); -extern unsigned ext4_num_overhead_clusters(struct super_block *sb, - ext4_group_t block_group, - struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); /* dir.c */ @@ -2136,8 +2129,6 @@ extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); -extern int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); @@ -2196,8 +2187,6 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ extern int ext4_calculate_overhead(struct super_block *sb); -extern int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es); extern void ext4_superblock_csum_set(struct super_block *sb); extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); @@ -2569,19 +2558,11 @@ extern const struct file_operations ext4_dir_operations; extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); -extern void ext4_unwritten_wait(struct inode *inode); /* inline.c */ extern int ext4_has_inline_data(struct inode *inode); -extern int ext4_get_inline_size(struct inode *inode); extern int ext4_get_max_inline_size(struct inode *inode); extern int ext4_find_inline_data_nolock(struct inode *inode); -extern void ext4_write_inline_data(struct inode *inode, - struct ext4_iloc *iloc, - void *buffer, loff_t pos, - unsigned int len); -extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, - unsigned int len); extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, unsigned int len); extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); @@ -2769,13 +2750,11 @@ extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc); + struct writeback_control *wbc, + bool keep_towrite); /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); -extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); -extern int ext4_mmp_csum_verify(struct super_block *sb, - struct mmp_struct *mmp); /* * Note that these flags will never ever appear in a buffer_head's state flag. diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c3fb607413e..0074e0d23d6 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -122,9 +122,10 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, return handle; } -void ext4_journal_abort_handle(const char *caller, unsigned int line, - const char *err_fn, struct buffer_head *bh, - handle_t *handle, int err) +static void ext4_journal_abort_handle(const char *caller, unsigned int line, + const char *err_fn, + struct buffer_head *bh, + handle_t *handle, int err) { char nbuf[16]; const char *errstr = ext4_decode_error(NULL, err, nbuf); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 81cfefa9dc0..17c00ff202f 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -231,10 +231,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); /* * Wrapper functions with which ext4 calls into JBD. */ -void ext4_journal_abort_handle(const char *caller, unsigned int line, - const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e305a31641f..4da228a0e6d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -143,6 +143,7 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, { if (path->p_bh) { /* path points to block */ + BUFFER_TRACE(path->p_bh, "get_write_access"); return ext4_journal_get_write_access(handle, path->p_bh); } /* path points to leaf/index in inode body */ @@ -4018,7 +4019,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, allocated, newblock); /* get_block() before submit the IO, split the extent */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { + if (flags & EXT4_GET_BLOCKS_PRE_IO) { ret = ext4_split_convert_extents(handle, inode, map, path, flags | EXT4_GET_BLOCKS_CONVERT); if (ret <= 0) @@ -4036,7 +4037,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, goto out; } /* IO end_io complete, convert the filled extent to written */ - if ((flags & EXT4_GET_BLOCKS_CONVERT)) { + if (flags & EXT4_GET_BLOCKS_CONVERT) { ret = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (ret >= 0) { @@ -4475,7 +4476,7 @@ got_allocated_blocks: * For non asycn direct IO case, flag the inode state * that we need to perform conversion when IO is done. */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) + if (flags & EXT4_GET_BLOCKS_PRE_IO) set_unwritten = 1; } @@ -4740,6 +4741,13 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (!S_ISREG(inode->i_mode)) return -EINVAL; + /* Call ext4_force_commit to flush all data in case of data=journal. */ + if (ext4_should_journal_data(inode)) { + ret = ext4_force_commit(inode->i_sb); + if (ret) + return ret; + } + /* * Write out all dirty pages to avoid race conditions * Then release them. diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 98c90f5834e..3f5c188953a 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -344,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1, if (ext4_es_status(es1) != ext4_es_status(es2)) return 0; - if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) + if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { + pr_warn("ES assertion failed when merging extents. " + "The sum of lengths of es1 (%d) and es2 (%d) " + "is bigger than allowed file size (%d)\n", + es1->es_len, es2->es_len, EXT_MAX_BLOCKS); + WARN_ON(1); return 0; + } if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) return 0; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 708aad76819..8695f70af1e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -57,7 +57,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) return 0; } -void ext4_unwritten_wait(struct inode *inode) +static void ext4_unwritten_wait(struct inode *inode) { wait_queue_head_t *wq = ext4_ioend_wq(inode); @@ -237,6 +237,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); if (IS_ERR(handle)) return PTR_ERR(handle); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) { ext4_journal_stop(handle); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 82edf5b9335..645205d8ada 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -22,7 +22,7 @@ #define EXT4_INLINE_DOTDOT_OFFSET 2 #define EXT4_INLINE_DOTDOT_SIZE 4 -int ext4_get_inline_size(struct inode *inode) +static int ext4_get_inline_size(struct inode *inode) { if (EXT4_I(inode)->i_inline_off) return EXT4_I(inode)->i_inline_size; @@ -211,8 +211,8 @@ out: * value since it is already handled by ext4_xattr_ibody_inline_set. * That saves us one memcpy. */ -void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, - void *buffer, loff_t pos, unsigned int len) +static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, + void *buffer, loff_t pos, unsigned int len) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; @@ -264,6 +264,7 @@ static int ext4_create_inline_data(handle_t *handle, if (error) return error; + BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, is.iloc.bh); if (error) goto out; @@ -347,6 +348,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, if (error == -ENODATA) goto out; + BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, is.iloc.bh); if (error) goto out; @@ -373,8 +375,8 @@ out: return error; } -int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, - unsigned int len) +static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, + unsigned int len) { int ret, size; struct ext4_inode_info *ei = EXT4_I(inode); @@ -424,6 +426,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, if (error) goto out; + BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, is.iloc.bh); if (error) goto out; @@ -1007,6 +1010,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, if (err) return err; + BUFFER_TRACE(iloc->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, iloc->bh); if (err) return err; @@ -1669,6 +1673,7 @@ int ext4_delete_inline_entry(handle_t *handle, EXT4_MIN_INLINE_DATA_SIZE; } + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b2cee73c143..8a064734e6e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) int ea_blocks = EXT4_I(inode)->i_file_acl ? EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; + if (ext4_has_inline_data(inode)) + return 0; + return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } @@ -443,7 +446,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * could be converted. */ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -555,7 +558,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * file system block. */ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -627,7 +630,7 @@ found: * the write lock of i_data_sem, and call get_blocks() * with create == 1 flag. */ - down_write((&EXT4_I(inode)->i_data_sem)); + down_write(&EXT4_I(inode)->i_data_sem); /* * if the caller is from delayed allocation writeout path @@ -922,6 +925,7 @@ int do_journal_get_write_access(handle_t *handle, */ if (dirty) clear_buffer_dirty(bh); + BUFFER_TRACE(bh, "get write access"); ret = ext4_journal_get_write_access(handle, bh); if (!ret && dirty) ret = ext4_handle_dirty_metadata(handle, NULL, bh); @@ -1540,7 +1544,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ext4_es_lru_add(inode); if (ext4_es_is_hole(&es)) { retval = 0; - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1577,7 +1581,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, * Try to see if we can get the block without requesting a new * file system block. */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_has_inline_data(inode)) { /* * We will soon create blocks for this page, and let @@ -1769,6 +1773,7 @@ static int __ext4_journalled_writepage(struct page *page, BUG_ON(!ext4_handle_valid(handle)); if (inline_data) { + BUFFER_TRACE(inode_bh, "get write access"); ret = ext4_journal_get_write_access(handle, inode_bh); err = ext4_handle_dirty_metadata(handle, inode, inode_bh); @@ -1846,6 +1851,7 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; struct ext4_io_submit io_submit; + bool keep_towrite = false; trace_ext4_writepage(page); size = i_size_read(inode); @@ -1876,6 +1882,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return 0; } + keep_towrite = true; } if (PageChecked(page) && ext4_should_journal_data(inode)) @@ -1892,7 +1899,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc); + ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -1911,7 +1918,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) else len = PAGE_CACHE_SIZE; clear_page_dirty_for_io(page); - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); + err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; @@ -3439,7 +3446,7 @@ unlock: * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ -int ext4_block_truncate_page(handle_t *handle, +static int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from) { unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -4303,12 +4310,15 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; + struct super_block *sb = inode->i_sb; int err = 0, rc, block; - int need_datasync = 0; + int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; - /* For fields not not tracking in the in-memory inode, + spin_lock(&ei->i_raw_lock); + + /* For fields not tracked in the in-memory inode, * initialise them to zero for new inodes. */ if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); @@ -4346,8 +4356,10 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - if (ext4_inode_blocks_set(handle, raw_inode, ei)) + if (ext4_inode_blocks_set(handle, raw_inode, ei)) { + spin_unlock(&ei->i_raw_lock); goto out_brelse; + } raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) @@ -4359,24 +4371,11 @@ static int ext4_do_update_inode(handle_t *handle, need_datasync = 1; } if (ei->i_disksize > 0x7fffffffULL) { - struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) { - /* If this is the first large file - * created, add a flag to the superblock. - */ - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE); - ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); - } + cpu_to_le32(EXT4_GOOD_OLD_REV)) + set_large_file = 1; } raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -4408,12 +4407,24 @@ static int ext4_do_update_inode(handle_t *handle, ext4_inode_csum_set(inode, raw_inode, ei); + spin_unlock(&ei->i_raw_lock); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - + if (set_large_file) { + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) + goto out_brelse; + ext4_update_dynamic_rev(sb); + EXT4_SET_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE); + ext4_handle_sync(handle); + err = ext4_handle_dirty_super(handle, sb); + } ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c8238a26818..59e31622cc6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1044,6 +1044,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) * allocating. If we are looking at the buddy cache we would * have taken a reference using ext4_mb_load_buddy and that * would have pinned buddy page to page cache. + * The call to ext4_mb_get_buddy_page_lock will mark the + * page accessed. */ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { @@ -1062,7 +1064,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); if (e4b.bd_buddy_page == NULL) { /* @@ -1082,7 +1083,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); err: ext4_mb_put_buddy_page_lock(&e4b); return ret; @@ -1141,7 +1141,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, /* we could use find_or_create_page(), but it locks page * what we'd like to avoid in fast path ... */ - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) /* @@ -1176,15 +1176,16 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_bitmap_page = page; e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); block++; pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) page_cache_release(page); @@ -1209,9 +1210,10 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_buddy_page = page; e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); @@ -2617,7 +2619,7 @@ int ext4_mb_init(struct super_block *sb) sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { ret = -ENOMEM; - goto out_free_groupinfo_slab; + goto out; } for_each_possible_cpu(i) { struct ext4_locality_group *lg; @@ -2642,8 +2644,6 @@ int ext4_mb_init(struct super_block *sb) out_free_locality_groups: free_percpu(sbi->s_locality_groups); sbi->s_locality_groups = NULL; -out_free_groupinfo_slab: - ext4_groupinfo_destroy_slabs(); out: kfree(sbi->s_mb_offsets); sbi->s_mb_offsets = NULL; @@ -2876,6 +2876,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!bitmap_bh) goto out_err; + BUFFER_TRACE(bitmap_bh, "getting write access"); err = ext4_journal_get_write_access(handle, bitmap_bh); if (err) goto out_err; @@ -2888,6 +2889,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, ext4_free_group_clusters(sb, gdp)); + BUFFER_TRACE(gdp_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdp_bh); if (err) goto out_err; @@ -3145,7 +3147,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); - BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); + BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 2ae73a80c19..ec092437d3e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -505,7 +505,7 @@ int ext4_ext_migrate(struct inode *inode) * with i_data_sem held to prevent racing with block * allocation. */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); up_read((&EXT4_I(inode)->i_data_sem)); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 04434ad3e8e..32bce844c2e 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -18,7 +18,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) return cpu_to_le32(csum); } -int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) +static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) { if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -27,7 +27,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); } -void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) +static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) { if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1b809fe51da..2484c7ec6a7 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -391,6 +391,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode, if (depth) { /* Register to journal */ + BUFFER_TRACE(orig_path->p_bh, "get_write_access"); ret = ext4_journal_get_write_access(handle, orig_path->p_bh); if (ret) return ret; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1cb84f78909..3520ab8a663 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -67,6 +67,7 @@ static struct buffer_head *ext4_append(handle_t *handle, return ERR_PTR(err); inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); @@ -1778,6 +1779,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); + BUFFER_TRACE(bh, "get_write_access"); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -2510,8 +2512,7 @@ static int empty_dir(struct inode *inode) ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); de = ext4_next_entry(de1, sb->s_blocksize); while (offset < inode->i_size) { - if (!bh || - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { unsigned int lblock; err = 0; brelse(bh); @@ -2539,26 +2540,37 @@ static int empty_dir(struct inode *inode) return 1; } -/* ext4_orphan_add() links an unlinked or truncated inode into a list of +/* + * ext4_orphan_add() links an unlinked or truncated inode into a list of * such inodes, starting at the superblock, in case we crash before the * file is closed/deleted, or in case the inode truncate spans multiple * transactions and the last transaction is not recovered after a crash. * * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). + * + * Orphan list manipulation functions must be called under i_mutex unless + * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) { struct super_block *sb = inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_iloc iloc; int err = 0, rc; + bool dirty = false; - if (!EXT4_SB(sb)->s_journal) + if (!sbi->s_journal) return 0; - mutex_lock(&EXT4_SB(sb)->s_orphan_lock); + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && + !mutex_is_locked(&inode->i_mutex)); + /* + * Exit early if inode already is on orphan list. This is a big speedup + * since we don't have to contend on the global s_orphan_lock. + */ if (!list_empty(&EXT4_I(inode)->i_orphan)) - goto out_unlock; + return 0; /* * Orphan handling is only valid for files with data blocks @@ -2569,48 +2581,51 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) - goto out_unlock; + goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) - goto out_unlock; + goto out; + + mutex_lock(&sbi->s_orphan_lock); /* * Due to previous errors inode may be already a part of on-disk * orphan list. If so skip on-disk list modification. */ - if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= - (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) - goto mem_insert; - - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); - EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_super(handle, sb); - rc = ext4_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; - - /* Only add to the head of the in-memory list if all the - * previous operations succeeded. If the orphan_add is going to - * fail (possibly taking the journal offline), we can't risk - * leaving the inode on the orphan list: stray orphan-list - * entries can cause panics at unmount time. - * - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ -mem_insert: - if (!err) - list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - + if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > + (le32_to_cpu(sbi->s_es->s_inodes_count))) { + /* Insert this inode at the head of the on-disk orphan list */ + NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + dirty = true; + } + list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); + mutex_unlock(&sbi->s_orphan_lock); + + if (dirty) { + err = ext4_handle_dirty_super(handle, sb); + rc = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (!err) + err = rc; + if (err) { + /* + * We have to remove inode from in-memory list if + * addition to on disk orphan list failed. Stray orphan + * list entries can cause panics at unmount time. + */ + mutex_lock(&sbi->s_orphan_lock); + list_del(&EXT4_I(inode)->i_orphan); + mutex_unlock(&sbi->s_orphan_lock); + } + } jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); -out_unlock: - mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); - ext4_std_error(inode->i_sb, err); +out: + ext4_std_error(sb, err); return err; } @@ -2622,45 +2637,51 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) { struct list_head *prev; struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 ino_next; struct ext4_iloc iloc; int err = 0; - if ((!EXT4_SB(inode->i_sb)->s_journal) && - !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) + if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) return 0; - mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && + !mutex_is_locked(&inode->i_mutex)); + /* Do this quick check before taking global s_orphan_lock. */ if (list_empty(&ei->i_orphan)) - goto out; + return 0; - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; - sbi = EXT4_SB(inode->i_sb); + if (handle) { + /* Grab inode buffer early before taking global s_orphan_lock */ + err = ext4_reserve_inode_write(handle, inode, &iloc); + } + mutex_lock(&sbi->s_orphan_lock); jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); + prev = ei->i_orphan.prev; list_del_init(&ei->i_orphan); /* If we're on an error path, we may not have a valid * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (!handle) - goto out; - - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) + if (!handle || err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_err; + } + ino_next = NEXT_ORPHAN(inode); if (prev == &sbi->s_orphan) { jbd_debug(4, "superblock will point to %u\n", ino_next); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); - if (err) + if (err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; + } sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { struct ext4_iloc iloc2; @@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %u\n", i_prev->i_ino, ino_next); err = ext4_reserve_inode_write(handle, i_prev, &iloc2); - if (err) + if (err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; + } NEXT_ORPHAN(i_prev) = ino_next; err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); + mutex_unlock(&sbi->s_orphan_lock); } if (err) goto out_brelse; NEXT_ORPHAN(inode) = 0; err = ext4_mark_iloc_dirty(handle, inode, &iloc); - out_err: ext4_std_error(inode->i_sb, err); -out: - mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index c18d95b5054..b24a2541a9b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -401,7 +401,8 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc) + struct writeback_control *wbc, + bool keep_towrite) { struct inode *inode = page->mapping->host; unsigned block_start, blocksize; @@ -414,10 +415,24 @@ int ext4_bio_write_page(struct ext4_io_submit *io, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - set_page_writeback(page); + if (keep_towrite) + set_page_writeback_keepwrite(page); + else + set_page_writeback(page); ClearPageError(page); /* + * Comments copied from block_write_full_page: + * + * The page straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + /* * In the first loop we prepare and mark buffers to submit. We have to * mark all buffers in the page before submitting so that * end_page_writeback() cannot be called from ext4_bio_end_io() when IO @@ -428,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { block_start = bh_offset(bh); if (block_start >= len) { - /* - * Comments copied from block_write_full_page_endio: - * - * The page straddles i_size. It must be zeroed out on - * each and every writepage invocation because it may - * be mmapped. "A file is mapped in multiples of the - * page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when - * mapped, and writes to that region are not written - * out to the file." - */ - zero_user_segment(page, block_start, - block_start + blocksize); clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f3b84cd9de5..bb0e80f03e2 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -42,7 +42,7 @@ int ext4_resize_begin(struct super_block *sb) void ext4_resize_end(struct super_block *sb) { clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, @@ -348,6 +348,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, bh = sb_getblk(sb, blk); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); + BUFFER_TRACE(bh, "get_write_access"); if ((err = ext4_journal_get_write_access(handle, bh))) { brelse(bh); bh = ERR_PTR(err); @@ -426,6 +427,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, if (unlikely(!bh)) return -ENOMEM; + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) return err; @@ -518,6 +520,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, goto out; } + BUFFER_TRACE(gdb, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb); if (err) { brelse(gdb); @@ -790,14 +793,17 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, goto exit_dind; } + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (unlikely(err)) goto exit_dind; + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) goto exit_dind; + BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, dind); if (unlikely(err)) ext4_std_error(sb, err); @@ -902,6 +908,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; ext4_kvfree(o_group_desc); + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) brelse(gdb_bh); @@ -977,6 +984,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } for (i = 0; i < reserved_gdb; i++) { + BUFFER_TRACE(primary[i], "get_write_access"); if ((err = ext4_journal_get_write_access(handle, primary[i]))) goto exit_bh; } @@ -1084,6 +1092,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, ext4_debug("update metadata backup %llu(+%llu)\n", backup_block, backup_block - ext4_group_first_block_no(sb, group)); + BUFFER_TRACE(bh, "get_write_access"); if ((err = ext4_journal_get_write_access(handle, bh))) break; lock_buffer(bh); @@ -1163,6 +1172,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, */ if (gdb_off) { gdb_bh = sbi->s_group_desc[gdb_num]; + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) @@ -1433,6 +1443,7 @@ static int ext4_flex_group_add(struct super_block *sb, goto exit; } + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto exit_journal; @@ -1645,6 +1656,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, return err; } + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) { ext4_warning(sb, "error %d on journal write access", err); @@ -1804,6 +1816,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (IS_ERR(handle)) return PTR_ERR(handle); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto errout; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c4895c195e0..b9b9aabfb4d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -138,8 +138,8 @@ static __le32 ext4_superblock_csum(struct super_block *sb, return cpu_to_le32(csum); } -int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es) +static int ext4_superblock_csum_verify(struct super_block *sb, + struct ext4_super_block *es) { if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -879,6 +879,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) return NULL; ei->vfs_inode.i_version = 1; + spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); ext4_es_init_tree(&ei->i_es_tree); @@ -1903,7 +1904,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (!(sbi->s_mount_state & EXT4_VALID_FS)) ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " "running e2fsck is recommended"); - else if ((sbi->s_mount_state & EXT4_ERROR_FS)) + else if (sbi->s_mount_state & EXT4_ERROR_FS) ext4_msg(sb, KERN_WARNING, "warning: mounting fs with errors, " "running e2fsck is recommended"); @@ -2404,6 +2405,16 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, if (ext4_bg_has_super(sb, bg)) has_super = 1; + /* + * If we have a meta_bg fs with 1k blocks, group 0's GDT is at + * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled + * on modern mke2fs or blksize > 1k on older mke2fs) then we must + * compensate. + */ + if (sb->s_blocksize == 1024 && nr == 0 && + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) + has_super++; + return (has_super + ext4_group_first_block_no(sb, bg)); } @@ -5370,6 +5381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, bh = ext4_bread(handle, inode, blk, 1, &err); if (!bh) goto out; + BUFFER_TRACE(bh, "get write access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4eec399ec80..e7387337060 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -369,6 +369,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, { int error; + if (strlen(name) > 255) + return -ERANGE; + down_read(&EXT4_I(inode)->xattr_sem); error = ext4_xattr_ibody_get(inode, name_index, name, buffer, buffer_size); @@ -513,6 +516,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) return; + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); ext4_handle_dirty_super(handle, sb); @@ -532,6 +536,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); + BUFFER_TRACE(bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bh); if (error) goto out; @@ -774,6 +779,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (s->base) { ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, bs->bh->b_blocknr); + BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bs->bh); if (error) goto cleanup; @@ -859,6 +865,7 @@ inserted: EXT4_C2B(EXT4_SB(sb), 1)); if (error) goto cleanup; + BUFFER_TRACE(new_bh, "get_write_access"); error = ext4_journal_get_write_access(handle, new_bh); if (error) @@ -896,7 +903,7 @@ inserted: * take i_data_sem because we will test * i_delalloc_reserved_flag in ext4_mb_new_blocks */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); block = ext4_new_meta_blocks(handle, inode, goal, 0, NULL, &error); up_read((&EXT4_I(inode)->i_data_sem)); |