From 10995aa2451afa20b721cc7de856cae1a13dba57 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:12 -0800 Subject: ocfs2: Morph the haphazard OCFS2_IS_VALID_DINODE() checks. Random places in the code would check a dinode bh to see if it was valid. Not only did they do different levels of validation, they handled errors in different ways. The previous commit unified inode block reads, validating all block reads in the same place. Thus, these haphazard checks are no longer necessary. Rather than eliminate them, however, we change them to BUG_ON() checks. This ensures the assumptions remain true. All of the code paths to these checks have been audited to ensure they come from a validated inode read. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c5ff18b46b5..95d432b694e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -441,11 +441,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, ac->ac_alloc_slot = slot; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read inside + * ocfs2_inode_lock(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", (unsigned long long)le64_to_cpu(fe->i_blkno)); @@ -931,11 +931,6 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto out; - } if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); status = -EIO; @@ -1392,11 +1387,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, BUG_ON(!ac->ac_bh); fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read during + * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le32_to_cpu(fe->id1.bitmap1.i_used) >= le32_to_cpu(fe->id1.bitmap1.i_total)) { ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " @@ -1782,11 +1777,12 @@ int ocfs2_free_suballoc_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + /* The alloc_bh comes from ocfs2_free_dinode() or + * ocfs2_free_clusters(). The callers have all locked the + * allocator and gotten alloc_bh from the lock call. This + * validates the dinode buffer. Any corruption that has happended + * is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", -- cgit v1.2.3-70-g09d2 From 57e3e7971136003c96766346049aa73b82cab079 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:13 -0800 Subject: ocfs2: Consolidate validation of group descriptors. Currently the validation of group descriptors is directly duplicated so that one version can error the filesystem and the other (resize) can just report the problem. Consolidate to one function that takes a boolean. Wrap that function with the old call for the old users. This is in preparation for lifting the read+validate step into a single function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/resize.c | 40 ++++++----------------------- fs/ocfs2/suballoc.c | 74 +++++++++++++++++++++++++++++++---------------------- fs/ocfs2/suballoc.h | 20 ++++++++++++--- 3 files changed, 68 insertions(+), 66 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 739d452f617..a2de32a317a 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -396,41 +396,16 @@ static int ocfs2_check_new_group(struct inode *inode, struct buffer_head *group_bh) { int ret; - struct ocfs2_group_desc *gd; + struct ocfs2_group_desc *gd = + (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * - le16_to_cpu(di->id2.i_chain.cl_bpc); + ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1); + if (ret) + goto out; - gd = (struct ocfs2_group_desc *)group_bh->b_data; - - ret = -EIO; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) - mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno)); - else if (di->i_blkno != gd->bg_parent_dinode) - mlog(ML_ERROR, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - else if (le16_to_cpu(gd->bg_bits) > max_bits) - mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "claims that %u are free\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - else if (le16_to_cpu(gd->bg_chain) != input->chain) + ret = -EINVAL; + if (le16_to_cpu(gd->bg_chain) != input->chain) mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " "while input has %u set.\n", (unsigned long long)le64_to_cpu(gd->bg_blkno), @@ -449,6 +424,7 @@ static int ocfs2_check_new_group(struct inode *inode, else ret = 0; +out: return ret; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 95d432b694e..ddba97dc06a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -146,59 +146,71 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) } /* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) +int ocfs2_validate_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd, + int clean_error) { unsigned int max_bits; +#define do_error(fmt, ...) \ + do{ \ + if (clean_error) \ + mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ + else \ + ocfs2_error(sb, fmt, ##__VA_ARGS__); \ + } while (0) + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); - return -EIO; + do_error("Group Descriptor #%llu has bad signature %.*s", + (unsigned long long)le64_to_cpu(gd->bg_blkno), 7, + gd->bg_signature); + return -EINVAL; } if (di->i_blkno != gd->bg_parent_dinode) { - ocfs2_error(sb, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - return -EIO; + do_error("Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EINVAL; } max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - return -EIO; + do_error("Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EINVAL; } if (le16_to_cpu(gd->bg_chain) >= le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_chain)); - return -EIO; + do_error("Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EINVAL; } if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EIO; + do_error("Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; } if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EIO; + do_error("Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; } +#undef do_error return 0; } diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 4df159d8f45..7adfcc478bd 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -165,9 +165,23 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); /* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd); +/* + * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it + * finds a problem. A caller that wants to check a group descriptor + * without going readonly passes a nonzero clean_error. This is only + * resize, really. + */ +int ocfs2_validate_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd, + int clean_error); +static inline int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + return ocfs2_validate_group_descriptor(sb, di, gd, 0); +} + int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, -- cgit v1.2.3-70-g09d2 From 68f64d471be38631d7196b938d9809802dd467fa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:14 -0800 Subject: ocfs2: Wrap group descriptor reads in a dedicated function. We have a clean call for validating group descriptors, but every place that wants the always does a read_block()+validate() call pair. Create a toplevel ocfs2_read_group_descriptor() that does the right thing. This allows us to leverage the single call point later for fancier handling. We also add validation of gd->bg_generation against the superblock and gd->bg_blkno against the block we thought we read. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/resize.c | 12 ++---- fs/ocfs2/suballoc.c | 108 +++++++++++++++++++++++++++++++--------------------- fs/ocfs2/suballoc.h | 19 ++++----- 3 files changed, 78 insertions(+), 61 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index a2de32a317a..252baff5eb8 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -330,20 +330,14 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); - ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); goto out_unlock; } - group = (struct ocfs2_group_desc *)group_bh->b_data; - ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); - if (ret) { - mlog_errno(ret); - goto out_unlock; - } - cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > le16_to_cpu(fe->id2.i_chain.cl_cpg)) { @@ -400,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode, (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1); + ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1); if (ret) goto out; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ddba97dc06a..797f509d725 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,13 +145,13 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -/* somewhat more expensive than our other checks, so use sparingly. */ int ocfs2_validate_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd, + struct buffer_head *bh, int clean_error) { unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; #define do_error(fmt, ...) \ do{ \ @@ -162,16 +162,32 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, } while (0) if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - do_error("Group Descriptor #%llu has bad signature %.*s", - (unsigned long long)le64_to_cpu(gd->bg_blkno), 7, + do_error("Group descriptor #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, gd->bg_signature); return -EINVAL; } + if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) { + do_error("Group descriptor #%llu has an invalid bg_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(gd->bg_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) { + do_error("Group descriptor #%llu has an invalid " + "fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(gd->bg_generation)); + return -EINVAL; + } + if (di->i_blkno != gd->bg_parent_dinode) { - do_error("Group descriptor # %llu has bad parent " + do_error("Group descriptor #%llu has bad parent " "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), (unsigned long long)le64_to_cpu(di->i_blkno)); return -EINVAL; @@ -179,33 +195,33 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - do_error("Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + do_error("Group descriptor #%llu has bit count of %u", + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits)); return -EINVAL; } if (le16_to_cpu(gd->bg_chain) >= le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - do_error("Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + do_error("Group descriptor #%llu has bad chain %u", + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_chain)); return -EINVAL; } if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - do_error("Group descriptor # %llu has bit count %u but " + do_error("Group descriptor #%llu has bit count %u but " "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits), le16_to_cpu(gd->bg_free_bits_count)); return -EINVAL; } if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - do_error("Group descriptor # %llu has bit count %u but " + do_error("Group descriptor #%llu has bit count %u but " "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits), 8 * le16_to_cpu(gd->bg_size)); return -EINVAL; @@ -215,6 +231,30 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return 0; } +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, gd_blkno, &tmp); + if (rc) + goto out; + + rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0); + if (rc) { + brelse(tmp); + goto out; + } + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, @@ -1177,21 +1217,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, u16 found; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *gd; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); return ret; } gd = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); - ret = -EIO; - goto out; - } - ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, ac->ac_max_block, bit_off, &found); if (ret < 0) { @@ -1248,19 +1284,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); - status = ocfs2_read_block(alloc_inode, - le64_to_cpu(cl->cl_recs[chain].c_blkno), - &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + le64_to_cpu(cl->cl_recs[chain].c_blkno), + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use @@ -1278,18 +1309,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; - status = ocfs2_read_block(alloc_inode, - next_group, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + next_group, &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } } if (status < 0) { if (status != -ENOSPC) @@ -1801,18 +1827,14 @@ int ocfs2_free_suballoc_bits(handle_t *handle, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); - status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } - group = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); - if (status) { - mlog_errno(status); - goto bail; - } + BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); status = ocfs2_block_group_clear_bits(handle, alloc_inode, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 7adfcc478bd..43de4fd826d 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -164,23 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); * and return that block offset. */ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); -/* somewhat more expensive than our other checks, so use sparingly. */ /* * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it * finds a problem. A caller that wants to check a group descriptor * without going readonly passes a nonzero clean_error. This is only - * resize, really. + * resize, really. Everyone else should be using + * ocfs2_read_group_descriptor(). */ int ocfs2_validate_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd, + struct buffer_head *bh, int clean_error); -static inline int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) -{ - return ocfs2_validate_group_descriptor(sb, di, gd, 0); -} +/* + * Read a group descriptor block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The descriptor will be validated with + * ocfs2_validate_group_descriptor(). + */ +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh); int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, -- cgit v1.2.3-70-g09d2 From 4203530613280281868b3ca36c817530bca3825c Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:15 -0800 Subject: ocfs2: Morph the haphazard OCFS2_IS_VALID_GROUP_DESC() checks. Random places in the code would check a group descriptor bh to see if it was valid. The previous commit unified descriptor block reads, validating all block reads in the same place. Thus, these checks are no longer necessary. Rather than eliminate them, however, we change them to BUG_ON() checks. This ensures the assumptions remain true. All of the code paths to these checks have been audited to ensure they come from a validated descriptor read. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2.h | 7 ------- fs/ocfs2/suballoc.c | 39 ++++++++++++++------------------------- 2 files changed, 14 insertions(+), 32 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 467bdb6f71e..82ba887afa0 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -458,13 +458,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \ - typeof(__gd) ____gd = (__gd); \ - ocfs2_error((__sb), \ - "Group Descriptor # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \ - (____gd)->bg_signature); \ -} while (0) #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 797f509d725..766a00b2644 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -842,10 +842,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, int offset, start, found, status = 0; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); - return -EIO; - } + /* Callers got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; @@ -910,11 +909,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* All callers get the descriptor via + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, @@ -983,16 +980,10 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto out; - } - if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); - status = -EIO; - goto out; - } + /* The caller got these descriptors from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno), chain, @@ -1055,7 +1046,7 @@ out_rollback: bg->bg_next_group = cpu_to_le64(bg_ptr); prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); } -out: + mlog_exit(status); return status; } @@ -1758,11 +1749,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* The caller got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); mlog(0, "off = %u, num = %u\n", bit_off, num_bits); -- cgit v1.2.3-70-g09d2 From 970e4936d7d15f35d00fd15a14f5343ba78b2fc8 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:19 -0800 Subject: ocfs2: Validate metadata only when it's read from disk. Add an optional validation hook to ocfs2_read_blocks(). Now the validation function is only called when a block was actually read off of disk. It is not called when the buffer was in cache. We add a buffer state bit BH_NeedsValidate to flag these buffers. It must always be one higher than the last JBD2 buffer state bit. The dinode, dirblock, extent_block, and xattr_block validators are lifted to this scheme directly. The group_descriptor validator needs to be split into two pieces. The first part only needs the gd buffer and is passed to ocfs2_read_block(). The second part requires the dinode as well, and is called every time. It's only 3 compares, so it's tiny. This also allows us to clean up the non-fatal gd check used by resize.c. It now has no magic argument. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 17 ++++----- fs/ocfs2/buffer_head_io.c | 33 ++++++++++++++++- fs/ocfs2/buffer_head_io.h | 27 ++++++++------ fs/ocfs2/dir.c | 13 +++---- fs/ocfs2/inode.c | 18 +++------- fs/ocfs2/resize.c | 2 +- fs/ocfs2/slot_map.c | 4 +-- fs/ocfs2/suballoc.c | 91 +++++++++++++++++++++++++++++++++-------------- fs/ocfs2/suballoc.h | 15 ++++---- fs/ocfs2/xattr.c | 26 +++++++------- 10 files changed, 149 insertions(+), 97 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f430cc6e0f3..e823a27ba34 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -684,6 +684,9 @@ static int ocfs2_validate_extent_block(struct super_block *sb, struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)bh->b_data; + mlog(0, "Validating extent block %llu\n", + (unsigned long long)bh->b_blocknr); + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { ocfs2_error(sb, "Extent block #%llu has bad signature %.*s", @@ -719,21 +722,13 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, eb_blkno, &tmp); - if (rc) - goto out; - - rc = ocfs2_validate_extent_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } + rc = ocfs2_read_block(inode, eb_blkno, &tmp, + ocfs2_validate_extent_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 3a178ec48d7..0e9eed0c223 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -39,6 +39,19 @@ #include "buffer_head_io.h" +/* + * Bits on bh->b_state used by ocfs2. + * + * These MUST be after the JBD2 bits. Currently BH_Unshadow is the last + * JBD2 bit. + */ +enum ocfs2_state_bits { + BH_NeedsValidate = BH_Unshadow + 1, +}; + +/* Expand the magic b_state functions */ +BUFFER_FNS(NeedsValidate, needs_validate); + int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode) { @@ -166,7 +179,9 @@ bail: } int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, - struct buffer_head *bhs[], int flags) + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; int i, ignore_cache = 0; @@ -298,6 +313,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ + if (validate) + set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); continue; @@ -328,6 +345,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, bhs[i] = NULL; continue; } + + if (buffer_needs_validate(bh)) { + /* We never set NeedsValidate if the + * buffer was held by the journal, so + * that better not have changed */ + BUG_ON(buffer_jbd(bh)); + clear_buffer_needs_validate(bh); + status = validate(inode->i_sb, bh); + if (status) { + put_bh(bh); + bhs[i] = NULL; + continue; + } + } } /* Always set the buffer in the cache, even if it was diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 75e1dcb1ade..c75d682dadd 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -31,21 +31,24 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh, int uptodate); -static inline int ocfs2_read_block(struct inode *inode, - u64 off, - struct buffer_head **bh); - int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode); -int ocfs2_read_blocks(struct inode *inode, - u64 block, - int nr, - struct buffer_head *bhs[], - int flags); int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]); +/* + * If not NULL, validate() will be called on a buffer that is freshly + * read from disk. It will not be called if the buffer was in cache. + * Note that if validate() is being used for this buffer, it needs to + * be set even for a READAHEAD call, as it marks the buffer for later + * validation. + */ +int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)); + int ocfs2_write_super_or_backup(struct ocfs2_super *osb, struct buffer_head *bh); @@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, #define OCFS2_BH_READAHEAD 8 static inline int ocfs2_read_block(struct inode *inode, u64 off, - struct buffer_head **bh) + struct buffer_head **bh, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; @@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off, goto bail; } - status = ocfs2_read_blocks(inode, off, 1, bh, 0); + status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); bail: return status; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c2f3fd93be5..7e863d40380 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -214,6 +214,8 @@ static int ocfs2_validate_dir_block(struct super_block *sb, * Nothing yet. We don't validate dirents here, that's handled * in-place when the code walks them. */ + mlog(0, "Validating dirblock %llu\n", + (unsigned long long)bh->b_blocknr); return 0; } @@ -255,20 +257,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, goto out; } - rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags); + rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags, + ocfs2_validate_dir_block); if (rc) { mlog_errno(rc); goto out; } - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_dir_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } - /* If ocfs2_read_blocks() got us a new bh, pass it up. */ if (!*bh) *bh = tmp; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 9eb701b8646..ec3497bafda 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1255,6 +1255,9 @@ int ocfs2_validate_inode_block(struct super_block *sb, int rc = -EINVAL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + mlog(0, "Validating dinode %llu\n", + (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); if (!OCFS2_IS_VALID_DINODE(di)) { @@ -1300,23 +1303,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, struct buffer_head *tmp = *bh; rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, - flags); - if (rc) - goto out; - - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_inode_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } + flags, ocfs2_validate_inode_block); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 252baff5eb8..867de3ebfca 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -394,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode, (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1); + ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh); if (ret) goto out; diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bdda2d8f850..40661e7824e 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); @@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 766a00b2644..226fe21f260 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,14 +145,6 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -int ocfs2_validate_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct buffer_head *bh, - int clean_error) -{ - unsigned int max_bits; - struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; - #define do_error(fmt, ...) \ do{ \ if (clean_error) \ @@ -161,6 +153,12 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, ocfs2_error(sb, fmt, ##__VA_ARGS__); \ } while (0) +static int ocfs2_validate_gd_self(struct super_block *sb, + struct buffer_head *bh, + int clean_error) +{ + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { do_error("Group descriptor #%llu has bad signature %.*s", (unsigned long long)bh->b_blocknr, 7, @@ -184,6 +182,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return -EINVAL; } + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + do_error("Group descriptor #%llu has bit count %u but " + "claims that %u are free", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + do_error("Group descriptor #%llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; + } + + return 0; +} + +static int ocfs2_validate_gd_parent(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh, + int clean_error) +{ + unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (di->i_blkno != gd->bg_parent_dinode) { do_error("Group descriptor #%llu has bad parent " "pointer (%llu, expected %llu)", @@ -209,26 +236,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return -EINVAL; } - if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - do_error("Group descriptor #%llu has bit count %u but " - "claims that %u are free", - (unsigned long long)bh->b_blocknr, - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EINVAL; - } + return 0; +} - if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - do_error("Group descriptor #%llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)bh->b_blocknr, - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EINVAL; - } #undef do_error - return 0; +/* + * This version only prints errors. It does not fail the filesystem, and + * exists only for resize. + */ +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh) +{ + int rc; + + rc = ocfs2_validate_gd_self(sb, bh, 1); + if (!rc) + rc = ocfs2_validate_gd_parent(sb, di, bh, 1); + + return rc; +} + +static int ocfs2_validate_group_descriptor(struct super_block *sb, + struct buffer_head *bh) +{ + mlog(0, "Validating group descriptor %llu\n", + (unsigned long long)bh->b_blocknr); + + return ocfs2_validate_gd_self(sb, bh, 0); } int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, @@ -237,11 +273,12 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, gd_blkno, &tmp); + rc = ocfs2_read_block(inode, gd_blkno, &tmp, + ocfs2_validate_group_descriptor); if (rc) goto out; - rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0); + rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0); if (rc) { brelse(tmp); goto out; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 43de4fd826d..e3c13c77f9e 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -165,16 +165,15 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); /* - * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it + * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it * finds a problem. A caller that wants to check a group descriptor - * without going readonly passes a nonzero clean_error. This is only - * resize, really. Everyone else should be using - * ocfs2_read_group_descriptor(). + * without going readonly should read the block with ocfs2_read_block[s]() + * and then checking it with this function. This is only resize, really. + * Everyone else should be using ocfs2_read_group_descriptor(). */ -int ocfs2_validate_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct buffer_head *bh, - int clean_error); +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh); /* * Read a group descriptor block into *bh. If *bh is NULL, a bh will be * allocated. This is a cached read. The descriptor will be validated with diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ef4aa5482d0..8af29b3bd6d 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -266,7 +266,8 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, int rc; rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, - bucket->bu_blocks, bucket->bu_bhs, 0); + bucket->bu_blocks, bucket->bu_bhs, 0, + NULL); if (rc) ocfs2_xattr_bucket_relse(bucket); return rc; @@ -359,12 +360,8 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, xb_blkno, &tmp); - if (!rc) { - rc = ocfs2_validate_xattr_block(inode->i_sb, tmp); - if (rc) - brelse(tmp); - } + rc = ocfs2_read_block(inode, xb_blkno, &tmp, + ocfs2_validate_xattr_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ if (!rc && !*bh) @@ -925,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); /* Copy ocfs2_xattr_value */ for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -1174,7 +1171,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -2206,7 +2203,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, base = xis->base; credits += OCFS2_INODE_UPDATE_CREDITS; } else { - int i, block_off; + int i, block_off = 0; xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; xe = xbs->here; name_offset = le16_to_cpu(xe->xe_name_offset); @@ -2840,6 +2837,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, break; } + xe_name = bucket_block(bucket, block_off) + new_offset; if (!memcmp(name, xe_name, name_len)) { *xe_index = i; @@ -3598,7 +3596,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, prev_blkno, &old_bh); + ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL); if (ret < 0) { mlog_errno(ret); brelse(new_bh); @@ -3990,7 +3988,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, ocfs2_journal_dirty(handle, first_bh); /* update the new bucket header. */ - ret = ocfs2_read_block(inode, to_blk_start, &bh); + ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL); if (ret < 0) { mlog_errno(ret); goto out; @@ -4337,7 +4335,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, p_blkno, &first_bh); + ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -4635,7 +4633,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); value_blk += header_bh->b_blocknr; - ret = ocfs2_read_block(inode, value_blk, &value_bh); + ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3-70-g09d2 From d6b32bbb3eae3fb787f1c33bf9f767ca1ddeb208 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 14:55:01 -0700 Subject: ocfs2: block read meta ecc. Add block check calls to the read_block validate functions. This is the almost all of the read-side checking of metaecc. xattr buckets are not checked yet. Writes are also unchecked, and so a read-write mount will quickly fail. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 17 +++++++++++++++++ fs/ocfs2/blockcheck.c | 9 +++++++++ fs/ocfs2/inode.c | 18 +++++++++++++++++- fs/ocfs2/quota_global.c | 13 +++++++++++-- fs/ocfs2/suballoc.c | 31 ++++++++++++++++++++++++++++++- fs/ocfs2/xattr.c | 17 +++++++++++++++++ 6 files changed, 101 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 84a7bd4db5d..6b27f74bb34 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -37,6 +37,7 @@ #include "alloc.h" #include "aops.h" +#include "blockcheck.h" #include "dlmglue.h" #include "extent_map.h" #include "inode.h" @@ -682,12 +683,28 @@ struct ocfs2_merge_ctxt { static int ocfs2_validate_extent_block(struct super_block *sb, struct buffer_head *bh) { + int rc; struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)bh->b_data; mlog(0, "Validating extent block %llu\n", (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check); + if (rc) + return rc; + + /* + * Errors after here are fatal. + */ + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { ocfs2_error(sb, "Extent block #%llu has bad signature %.*s", diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2bf3d7f61ae..2ce6ae5e4b8 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -24,6 +24,8 @@ #include #include +#include + #include "ocfs2.h" #include "blockcheck.h" @@ -292,6 +294,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, if (crc == check.bc_crc32e) goto out; + mlog(ML_ERROR, + "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", + (unsigned int)check.bc_crc32e, (unsigned int)crc); + /* Ok, try ECC fixups */ ecc = ocfs2_hamming_encode_block(data, blocksize); ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); @@ -301,6 +307,9 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, if (crc == check.bc_crc32e) goto out; + mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", + (unsigned int)check.bc_crc32e, (unsigned int)crc); + rc = -EIO; out: diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 288512c9dbc..9370b652ab9 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -38,6 +38,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "extent_map.h" #include "file.h" @@ -1262,7 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode, int ocfs2_validate_inode_block(struct super_block *sb, struct buffer_head *bh) { - int rc = -EINVAL; + int rc; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; mlog(0, "Validating dinode %llu\n", @@ -1270,6 +1271,21 @@ int ocfs2_validate_inode_block(struct super_block *sb, BUG_ON(!buffer_uptodate(bh)); + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); + if (rc) + goto bail; + + /* + * Errors after here are fatal. + */ + + rc = -EINVAL; + if (!OCFS2_IS_VALID_DINODE(di)) { ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)bh->b_blocknr, 7, diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 7dbcfd7f65e..a0b8b14cca8 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -16,6 +16,7 @@ #include "ocfs2_fs.h" #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "inode.h" #include "journal.h" #include "file.h" @@ -90,12 +91,20 @@ struct qtree_fmt_operations ocfs2_global_ops = { static int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh) { - struct ocfs2_disk_dqtrailer *dqt = ocfs2_dq_trailer(sb, bh->b_data); + struct ocfs2_disk_dqtrailer *dqt = + ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); mlog(0, "Validating quota block %llu\n", (unsigned long long)bh->b_blocknr); - return 0; + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); } int ocfs2_read_quota_block(struct inode *inode, u64 v_block, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 226fe21f260..78755766c32 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -35,6 +35,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "inode.h" #include "journal.h" @@ -250,8 +251,18 @@ int ocfs2_check_group_descriptor(struct super_block *sb, struct buffer_head *bh) { int rc; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + + BUG_ON(!buffer_uptodate(bh)); - rc = ocfs2_validate_gd_self(sb, bh, 1); + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check); + if (!rc) + rc = ocfs2_validate_gd_self(sb, bh, 1); if (!rc) rc = ocfs2_validate_gd_parent(sb, di, bh, 1); @@ -261,9 +272,27 @@ int ocfs2_check_group_descriptor(struct super_block *sb, static int ocfs2_validate_group_descriptor(struct super_block *sb, struct buffer_head *bh) { + int rc; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + mlog(0, "Validating group descriptor %llu\n", (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check); + if (rc) + return rc; + + /* + * Errors after here are fatal. + */ + return ocfs2_validate_gd_self(sb, bh, 0); } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index dfc51c305bb..bc822d6ba54 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -42,6 +42,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "file.h" #include "symlink.h" @@ -322,12 +323,28 @@ static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, static int ocfs2_validate_xattr_block(struct super_block *sb, struct buffer_head *bh) { + int rc; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)bh->b_data; mlog(0, "Validating xattr block %llu\n", (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); + + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check); + if (rc) + return rc; + + /* + * Errors after here are fatal + */ + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { ocfs2_error(sb, "Extended attribute block #%llu has bad " -- cgit v1.2.3-70-g09d2 From 13723d00e374c2a6d6ccb5af6de965e89c3e1b01 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 19:25:01 -0700 Subject: ocfs2: Use metadata-specific ocfs2_journal_access_*() functions. The per-metadata-type ocfs2_journal_access_*() functions hook up jbd2 commit triggers and allow us to compute metadata ecc right before the buffers are written out. This commit provides ecc for inodes, extent blocks, group descriptors, and quota blocks. It is not safe to use extened attributes and metaecc at the same time yet. The ocfs2_extent_tree and ocfs2_path abstractions in alloc.c both hide the type of block at their root. Before, it didn't matter, but now the root block must use the appropriate ocfs2_journal_access_*() function. To keep this abstract, the structures now have a pointer to the matching journal_access function and a wrapper call to call it. A few places use naked ocfs2_write_block() calls instead of adding the blocks to the journal. We make sure to calculate their checksum and ecc before the write. Since we pass around the journal_access functions. Let's typedef them in ocfs2.h. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 233 ++++++++++++++++++++++++++++-------------------- fs/ocfs2/alloc.h | 5 +- fs/ocfs2/aops.c | 8 +- fs/ocfs2/dir.c | 48 ++++++---- fs/ocfs2/file.c | 16 ++-- fs/ocfs2/inode.c | 17 ++-- fs/ocfs2/journal.c | 2 + fs/ocfs2/journal.h | 3 +- fs/ocfs2/localalloc.c | 18 ++-- fs/ocfs2/namei.c | 38 ++++---- fs/ocfs2/ocfs2.h | 4 + fs/ocfs2/quota_global.c | 2 +- fs/ocfs2/quota_local.c | 18 ++-- fs/ocfs2/resize.c | 16 ++-- fs/ocfs2/suballoc.c | 58 ++++++------ 15 files changed, 280 insertions(+), 206 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index c22ff49b5e3..6e58fd557e5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -298,11 +298,13 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh, + ocfs2_journal_access_func access, void *obj, struct ocfs2_extent_tree_operations *ops) { et->et_ops = ops; et->et_root_bh = bh; + et->et_root_journal_access = access; if (!obj) obj = (void *)bh->b_data; et->et_object = obj; @@ -318,15 +320,16 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh) { - __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); + __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di, + NULL, &ocfs2_dinode_et_ops); } void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, struct inode *inode, struct buffer_head *bh) { - __ocfs2_init_extent_tree(et, inode, bh, NULL, - &ocfs2_xattr_tree_et_ops); + __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb, + NULL, &ocfs2_xattr_tree_et_ops); } void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, @@ -334,7 +337,7 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct buffer_head *bh, struct ocfs2_xattr_value_root *xv) { - __ocfs2_init_extent_tree(et, inode, bh, xv, + __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access, xv, &ocfs2_xattr_value_et_ops); } @@ -356,6 +359,15 @@ static inline void ocfs2_et_update_clusters(struct inode *inode, et->et_ops->eo_update_clusters(inode, et, clusters); } +static inline int ocfs2_et_root_journal_access(handle_t *handle, + struct inode *inode, + struct ocfs2_extent_tree *et, + int type) +{ + return et->et_root_journal_access(handle, inode, et->et_root_bh, + type); +} + static inline int ocfs2_et_insert_check(struct inode *inode, struct ocfs2_extent_tree *et, struct ocfs2_extent_rec *rec) @@ -396,12 +408,14 @@ struct ocfs2_path_item { #define OCFS2_MAX_PATH_DEPTH 5 struct ocfs2_path { - int p_tree_depth; - struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; + int p_tree_depth; + ocfs2_journal_access_func p_root_access; + struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; }; #define path_root_bh(_path) ((_path)->p_node[0].bh) #define path_root_el(_path) ((_path)->p_node[0].el) +#define path_root_access(_path)((_path)->p_root_access) #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) #define path_num_items(_path) ((_path)->p_tree_depth + 1) @@ -434,6 +448,8 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) */ if (keep_root) depth = le16_to_cpu(path_root_el(path)->l_tree_depth); + else + path_root_access(path) = NULL; path->p_tree_depth = depth; } @@ -459,6 +475,7 @@ static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) BUG_ON(path_root_bh(dest) != path_root_bh(src)); BUG_ON(path_root_el(dest) != path_root_el(src)); + BUG_ON(path_root_access(dest) != path_root_access(src)); ocfs2_reinit_path(dest, 1); @@ -480,6 +497,7 @@ static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src) int i; BUG_ON(path_root_bh(dest) != path_root_bh(src)); + BUG_ON(path_root_access(dest) != path_root_access(src)); for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { brelse(dest->p_node[i].bh); @@ -515,7 +533,8 @@ static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index, } static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, - struct ocfs2_extent_list *root_el) + struct ocfs2_extent_list *root_el, + ocfs2_journal_access_func access) { struct ocfs2_path *path; @@ -527,6 +546,7 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, get_bh(root_bh); path_root_bh(path) = root_bh; path_root_el(path) = root_el; + path_root_access(path) = access; } return path; @@ -534,12 +554,38 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) { - return ocfs2_new_path(path_root_bh(path), path_root_el(path)); + return ocfs2_new_path(path_root_bh(path), path_root_el(path), + path_root_access(path)); } static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) { - return ocfs2_new_path(et->et_root_bh, et->et_root_el); + return ocfs2_new_path(et->et_root_bh, et->et_root_el, + et->et_root_journal_access); +} + +/* + * Journal the buffer at depth idx. All idx>0 are extent_blocks, + * otherwise it's the root_access function. + * + * I don't like the way this function's name looks next to + * ocfs2_journal_access_path(), but I don't have a better one. + */ +static int ocfs2_path_bh_journal_access(handle_t *handle, + struct inode *inode, + struct ocfs2_path *path, + int idx) +{ + ocfs2_journal_access_func access = path_root_access(path); + + if (!access) + access = ocfs2_journal_access; + + if (idx) + access = ocfs2_journal_access_eb; + + return access(handle, inode, path->p_node[idx].bh, + OCFS2_JOURNAL_ACCESS_WRITE); } /* @@ -554,8 +600,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, goto out; for(i = 0; i < path_num_items(path); i++) { - ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, path, i); if (ret < 0) { mlog_errno(ret); goto out; @@ -708,8 +753,11 @@ static int ocfs2_validate_extent_block(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check); - if (rc) + if (rc) { + mlog(ML_ERROR, "Checksum failed for extent block %llu\n", + (unsigned long long)bh->b_blocknr); return rc; + } /* * Errors after here are fatal. @@ -842,8 +890,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, } ocfs2_set_new_buffer_uptodate(inode, bhs[i]); - status = ocfs2_journal_access(handle, inode, bhs[i], - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_eb(handle, inode, bhs[i], + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -986,8 +1034,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_eb(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -1026,21 +1074,21 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, * journal_dirty erroring as it won't unless we've aborted the * handle (in which case we would never be here) so reserving * the write with journal_access is all we need to do. */ - status = ocfs2_journal_access(handle, inode, *last_eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } - status = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } if (eb_bh) { - status = ocfs2_journal_access(handle, inode, eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_eb(handle, inode, eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1129,8 +1177,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, eb_el = &eb->h_list; root_el = et->et_root_el; - status = ocfs2_journal_access(handle, inode, new_eb_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_eb(handle, inode, new_eb_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -1148,8 +1196,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, goto bail; } - status = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1918,25 +1966,23 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, root_bh = left_path->p_node[subtree_index].bh; BUG_ON(root_bh != right_path->p_node[subtree_index].bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; } for(i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -2455,9 +2501,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, return -EAGAIN; if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { - ret = ocfs2_journal_access(handle, inode, - path_leaf_bh(right_path), - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_eb(handle, inode, + path_leaf_bh(right_path), + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -2474,8 +2520,8 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, * We have to update i_last_eb_blk during the meta * data delete. */ - ret = ocfs2_journal_access(handle, inode, et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -2490,25 +2536,23 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, */ BUG_ON(right_has_empty && !del_right_subtree); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; } for(i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -2653,16 +2697,17 @@ out: static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, handle_t *handle, - struct buffer_head *bh, - struct ocfs2_extent_list *el) + struct ocfs2_path *path) { int ret; + struct buffer_head *bh = path_leaf_bh(path); + struct ocfs2_extent_list *el = path_leaf_el(path); if (!ocfs2_is_empty_extent(&el->l_recs[0])) return 0; - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, path, + path_num_items(path) - 1); if (ret) { mlog_errno(ret); goto out; @@ -2744,9 +2789,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, * Caller might still want to make changes to the * tree root, so re-add it to the journal here. */ - ret = ocfs2_journal_access(handle, inode, - path_root_bh(left_path), - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, 0); if (ret) { mlog_errno(ret); goto out; @@ -2929,8 +2973,7 @@ rightmost_no_delete: * it up front. */ ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, - path_leaf_bh(path), - path_leaf_el(path)); + path); if (ret) mlog_errno(ret); goto out; @@ -3164,8 +3207,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, root_bh = left_path->p_node[subtree_index].bh; BUG_ON(root_bh != right_path->p_node[subtree_index].bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; @@ -3173,17 +3216,15 @@ static int ocfs2_merge_rec_right(struct inode *inode, for (i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -3195,8 +3236,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, right_rec = &el->l_recs[index + 1]; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, left_path, + path_num_items(left_path) - 1); if (ret) { mlog_errno(ret); goto out; @@ -3335,8 +3376,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, root_bh = left_path->p_node[subtree_index].bh; BUG_ON(root_bh != right_path->p_node[subtree_index].bh); - ret = ocfs2_journal_access(handle, inode, root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, right_path, + subtree_index); if (ret) { mlog_errno(ret); goto out; @@ -3344,17 +3385,15 @@ static int ocfs2_merge_rec_left(struct inode *inode, for (i = subtree_index + 1; i < path_num_items(right_path); i++) { - ret = ocfs2_journal_access(handle, inode, - right_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + right_path, i); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, - left_path->p_node[i].bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, + left_path, i); if (ret) { mlog_errno(ret); goto out; @@ -3366,8 +3405,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, has_empty_extent = 1; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_path_bh_journal_access(handle, inode, left_path, + path_num_items(left_path) - 1); if (ret) { mlog_errno(ret); goto out; @@ -4009,8 +4048,8 @@ static int ocfs2_do_insert_extent(struct inode *inode, el = et->et_root_el; - ret = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -4071,8 +4110,8 @@ static int ocfs2_do_insert_extent(struct inode *inode, * ocfs2_rotate_tree_right() might have extended the * transaction without re-journaling our tree root. */ - ret = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -4593,9 +4632,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, BUG_ON(num_bits > clusters_to_add); - /* reserve our write early -- insert_extent may update the inode */ - status = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + /* reserve our write early -- insert_extent may update the tree root */ + status = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -5347,8 +5386,8 @@ int ocfs2_remove_btree_range(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, et->et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_et_root_journal_access(handle, inode, et, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -5461,8 +5500,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, goto bail; } - status = ocfs2_journal_access(handle, tl_inode, tl_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -5523,8 +5562,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, while (i >= 0) { /* Caller has given us at least enough credits to * update the truncate log dinode */ - status = ocfs2_journal_access(handle, tl_inode, tl_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -5780,6 +5819,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, * tl_used. */ tl->tl_used = 0; + ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check); status = ocfs2_write_block(osb, tl_bh, tl_inode); if (status < 0) { mlog_errno(status); @@ -6546,8 +6586,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, } if (last_eb_bh) { - status = ocfs2_journal_access(handle, inode, last_eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_eb(handle, inode, last_eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -6908,8 +6948,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, goto out_unlock; } - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; @@ -7043,7 +7083,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, i_size_read(inode)); - path = ocfs2_new_path(fe_bh, &di->id2.i_list); + path = ocfs2_new_path(fe_bh, &di->id2.i_list, + ocfs2_journal_access_di); if (!path) { status = -ENOMEM; mlog_errno(status); @@ -7276,8 +7317,8 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, goto out; } - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 59d37d1b7d4..4b6fea22748 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -45,7 +45,9 @@ * * ocfs2_extent_tree contains info for the root of the b-tree, it must have a * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree - * functions. + * functions. With metadata ecc, we now call different journal_access + * functions for each type of metadata, so it must have the + * root_journal_access function. * ocfs2_extent_tree_operations abstract the normal operations we do for * the root of extent b-tree. */ @@ -54,6 +56,7 @@ struct ocfs2_extent_tree { struct ocfs2_extent_tree_operations *et_ops; struct buffer_head *et_root_bh; struct ocfs2_extent_list *et_root_el; + ocfs2_journal_access_func et_root_journal_access; void *et_object; unsigned int et_max_leaf_clusters; }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 6b647ec87bb..a067a6cffb0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1512,8 +1512,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, goto out; } - ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { ocfs2_commit_trans(osb, handle); @@ -1740,8 +1740,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * We don't want this to fail in ocfs2_write_end(), so do it * here. */ - ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_quota; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 3708fe482e3..45e4e03d8f7 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -378,14 +378,18 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle, struct inode *new_entry_inode) { int ret; + ocfs2_journal_access_func access = ocfs2_journal_access_db; /* * The same code works fine for both inline-data and extent - * based directories, so no need to split this up. + * based directories, so no need to split this up. The only + * difference is the journal_access function. */ - ret = ocfs2_journal_access(handle, dir, de_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + access = ocfs2_journal_access_di; + + ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -407,9 +411,13 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, { struct ocfs2_dir_entry *de, *pde; int i, status = -ENOENT; + ocfs2_journal_access_func access = ocfs2_journal_access_db; mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + access = ocfs2_journal_access_di; + i = 0; pde = NULL; de = (struct ocfs2_dir_entry *) first_de; @@ -420,8 +428,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, goto bail; } if (de == de_del) { - status = ocfs2_journal_access(handle, dir, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = access(handle, dir, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { status = -EIO; mlog_errno(status); @@ -581,8 +589,14 @@ int __ocfs2_add_entry(handle_t *handle, goto bail; } - status = ocfs2_journal_access(handle, dir, insert_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + if (insert_bh == parent_fe_bh) + status = ocfs2_journal_access_di(handle, dir, + insert_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + else + status = ocfs2_journal_access_db(handle, dir, + insert_bh, + OCFS2_JOURNAL_ACCESS_WRITE); /* By now the buffer is marked for journaling */ offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { @@ -1081,8 +1095,8 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb, struct ocfs2_inline_data *data = &di->id2.i_data; unsigned int size = le16_to_cpu(data->id_count); - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; @@ -1129,8 +1143,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, ocfs2_set_new_buffer_uptodate(inode, new_bh); - status = ocfs2_journal_access(handle, inode, new_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_db(handle, inode, new_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -1292,8 +1306,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); - ret = ocfs2_journal_access(handle, dir, dirdata_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_journal_access_db(handle, dir, dirdata_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out_commit; @@ -1319,8 +1333,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, * We let the later dirent insert modify c/mtime - to the user * the data hasn't changed. */ - ret = ocfs2_journal_access(handle, dir, di_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_journal_access_di(handle, dir, di_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); goto out_commit; @@ -1583,8 +1597,8 @@ do_extend: ocfs2_set_new_buffer_uptodate(dir, new_bh); - status = ocfs2_journal_access(handle, dir, new_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_db(handle, dir, new_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9374d374a26..e8f795f978a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -256,8 +256,8 @@ int ocfs2_update_inode_atime(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out_commit; @@ -353,8 +353,8 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, goto out; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_commit; @@ -590,8 +590,8 @@ restarted_transaction: /* reserve a write to the file entry early on - that we if we * run out of credits in the allocation path, we can still * update i_size. */ - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1121,8 +1121,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode, goto out; } - ret = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_trans; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 9370b652ab9..229e707bc05 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -537,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, goto out; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out; @@ -621,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode, } /* set the inodes dtime */ - status = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail_commit; @@ -1190,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle, mlog_entry("(inode %llu)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1277,8 +1277,11 @@ int ocfs2_validate_inode_block(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); - if (rc) + if (rc) { + mlog(ML_ERROR, "Checksum failed for dinode %llu\n", + (unsigned long long)bh->b_blocknr); goto bail; + } /* * Errors after here are fatal. diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 2daa5848faf..3b54dba0f74 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -752,6 +752,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, if (replayed) ocfs2_bump_recovery_generation(fe); + ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); status = ocfs2_write_block(osb, bh, journal->j_inode); if (status < 0) mlog_errno(status); @@ -1486,6 +1487,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, osb->slot_recovery_generations[slot_num] = ocfs2_get_recovery_generation(fe); + ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); status = ocfs2_write_block(osb, bh, inode); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index bca370dab02..3c3532e1307 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -247,9 +247,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks); #define OCFS2_JOURNAL_ACCESS_WRITE 1 #define OCFS2_JOURNAL_ACCESS_UNDO 2 + /* ocfs2_inode */ int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, - struct buffer_head *bh, int type); + struct buffer_head *bh, int type); /* ocfs2_extent_block */ int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, struct buffer_head *bh, int type); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 19cfb1b9ce0..ec70cdbe77f 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -36,6 +36,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "inode.h" #include "journal.h" @@ -382,8 +383,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) } memcpy(alloc_copy, alloc, bh->b_size); - status = ocfs2_journal_access(handle, local_alloc_inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_commit; @@ -476,6 +477,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, alloc = (struct ocfs2_dinode *) alloc_bh->b_data; ocfs2_clear_local_alloc(alloc); + ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); status = ocfs2_write_block(osb, alloc_bh, inode); if (status < 0) mlog_errno(status); @@ -762,9 +764,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, * delete bits from it! */ *num_bits = bits_wanted; - status = ocfs2_journal_access(handle, local_alloc_inode, - osb->local_alloc_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, local_alloc_inode, + osb->local_alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1240,9 +1242,9 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, } memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); - status = ocfs2_journal_access(handle, local_alloc_inode, - osb->local_alloc_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, local_alloc_inode, + osb->local_alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 6173807ba23..084aba86c3b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -361,8 +361,8 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - status = ocfs2_journal_access(handle, dir, parent_fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, dir, parent_fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -493,8 +493,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); - status = ocfs2_journal_access(handle, inode, *new_fe_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_di(handle, inode, *new_fe_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto leave; @@ -664,8 +664,8 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_unlock_inode; } - err = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + err = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (err < 0) { mlog_errno(err); goto out_commit; @@ -851,8 +851,8 @@ static int ocfs2_unlink(struct inode *dir, goto leave; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1265,8 +1265,8 @@ static int ocfs2_rename(struct inode *old_dir, goto bail; } } - status = ocfs2_journal_access(handle, new_inode, newfe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, new_inode, newfe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1312,8 +1312,8 @@ static int ocfs2_rename(struct inode *old_dir, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - status = ocfs2_journal_access(handle, old_inode, old_inode_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status >= 0) { old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; @@ -1389,9 +1389,9 @@ static int ocfs2_rename(struct inode *old_dir, (int)old_dir_nlink, old_dir->i_nlink); } else { struct ocfs2_dinode *fe; - status = ocfs2_journal_access(handle, old_dir, - old_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, old_dir, + old_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); fe = (struct ocfs2_dinode *) old_dir_bh->b_data; fe->i_links_count = cpu_to_le16(old_dir->i_nlink); status = ocfs2_journal_dirty(handle, old_dir_bh); @@ -1898,8 +1898,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } - status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1986,8 +1986,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, goto leave; } - status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 2bb389fe739..bad87d0a03c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -339,6 +339,10 @@ struct ocfs2_super #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) +/* Useful typedef for passing around journal access functions */ +typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int type); + static inline int ocfs2_should_order_data(struct inode *inode) { if (!S_ISREG(inode->i_mode)) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index a0b8b14cca8..444aa5a467f 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -244,7 +244,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, set_buffer_uptodate(bh); unlock_buffer(bh); ocfs2_set_buffer_uptodate(gqinode, bh); - err = ocfs2_journal_access(handle, gqinode, bh, ja_type); + err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type); if (err < 0) { brelse(bh); goto out; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index d451b715aef..07deec5e972 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -106,8 +106,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, mlog_errno(status); return status; } - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_dq(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); ocfs2_commit_trans(OCFS2_SB(sb), handle); @@ -506,7 +506,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, goto out_commit; } /* Release local quota file entry */ - status = ocfs2_journal_access(handle, lqinode, + status = ocfs2_journal_access_dq(handle, lqinode, qbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); @@ -614,8 +614,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, mlog_errno(status); goto out_bh; } - status = ocfs2_journal_access(handle, lqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_dq(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_trans; @@ -981,8 +981,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( goto out; } - status = ocfs2_journal_access(handle, lqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_dq(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_trans; @@ -1074,7 +1074,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( mlog_errno(status); goto out; } - status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh, + status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); @@ -1207,7 +1207,7 @@ static int ocfs2_local_release_dquot(struct dquot *dquot) goto out; } - status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type], + status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type], od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 867de3ebfca..424adaa5f90 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", new_clusters, first_new_cluster); - ret = ocfs2_journal_access(handle, bm_inode, group_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; @@ -141,8 +141,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, } /* update the inode accordingly. */ - ret = ocfs2_journal_access(handle, bm_inode, bm_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_rollback; @@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) cl = &fe->id2.i_chain; cr = &cl->cl_recs[input->chain]; - ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_commit; @@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) goto out_commit; } - ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_commit; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 78755766c32..a69628603e1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -261,7 +261,11 @@ int ocfs2_check_group_descriptor(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check); - if (!rc) + if (rc) { + mlog(ML_ERROR, + "Checksum failed for group descriptor %llu\n", + (unsigned long long)bh->b_blocknr); + } else rc = ocfs2_validate_gd_self(sb, bh, 1); if (!rc) rc = ocfs2_validate_gd_parent(sb, di, bh, 1); @@ -343,10 +347,10 @@ static int ocfs2_block_group_fill(handle_t *handle, goto bail; } - status = ocfs2_journal_access(handle, - alloc_inode, - bg_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + status = ocfs2_journal_access_gd(handle, + alloc_inode, + bg_bh, + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; @@ -476,8 +480,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg = (struct ocfs2_group_desc *) bg_bh->b_data; - status = ocfs2_journal_access(handle, alloc_inode, - bh, OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, alloc_inode, + bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -986,10 +990,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, if (ocfs2_is_cluster_bitmap(alloc_inode)) journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - status = ocfs2_journal_access(handle, - alloc_inode, - group_bh, - journal_type); + status = ocfs2_journal_access_gd(handle, + alloc_inode, + group_bh, + journal_type); if (status < 0) { mlog_errno(status); goto bail; @@ -1060,8 +1064,8 @@ static int ocfs2_relink_block_group(handle_t *handle, bg_ptr = le64_to_cpu(bg->bg_next_group); prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); - status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; @@ -1075,8 +1079,8 @@ static int ocfs2_relink_block_group(handle_t *handle, goto out_rollback; } - status = ocfs2_journal_access(handle, alloc_inode, bg_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; @@ -1090,8 +1094,8 @@ static int ocfs2_relink_block_group(handle_t *handle, goto out_rollback; } - status = ocfs2_journal_access(handle, alloc_inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; @@ -1242,8 +1246,8 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode, struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; @@ -1414,10 +1418,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, /* Ok, claim our bits now: set the info on dinode, chainlist * and then the group */ - status = ocfs2_journal_access(handle, - alloc_inode, - ac->ac_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, + alloc_inode, + ac->ac_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; @@ -1824,8 +1828,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, if (ocfs2_is_cluster_bitmap(alloc_inode)) journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - status = ocfs2_journal_access(handle, alloc_inode, group_bh, - journal_type); + status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh, + journal_type); if (status < 0) { mlog_errno(status); goto bail; @@ -1900,8 +1904,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle, goto bail; } - status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; -- cgit v1.2.3-70-g09d2