From 7bf72edee614e10b8d470c40a326f47bfdd69992 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 3 May 2006 17:46:50 -0700 Subject: ocfs2: better group descriptor consistency checks Try to catch corrupted group descriptors with some stronger checks placed in a couple of strategic locations. Detect a failed resizefs and refuse to allocate past what bitmap i_clusters allows. Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 114 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 20 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 195523090c8..7ac68cac041 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -85,11 +85,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, u64 *bg_blkno); static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr); -static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, - struct buffer_head *bg_bh, - unsigned int bits_wanted, - u16 *bit_off, - u16 *bits_found); static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, @@ -143,6 +138,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } +/* somewhat more expensive than our other checks, so use sparingly. */ +static int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + unsigned int max_bits; + + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); + return -EIO; + } + + if (di->i_blkno != gd->bg_parent_dinode) { + ocfs2_error(sb, "Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EIO; + } + + max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); + if (le16_to_cpu(gd->bg_bits) > max_bits) { + ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_chain) >= + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EIO; + } + + return 0; +} + static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, @@ -663,6 +716,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, struct buffer_head *bg_bh, unsigned int bits_wanted, + unsigned int total_bits, u16 *bit_off, u16 *bits_found) { @@ -679,10 +733,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; - while((offset = ocfs2_find_next_zero_bit(bitmap, - le16_to_cpu(bg->bg_bits), - start)) != -1) { - if (offset == le16_to_cpu(bg->bg_bits)) + while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { + if (offset == total_bits) break; if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { @@ -911,14 +963,35 @@ static int ocfs2_cluster_group_search(struct inode *inode, { int search = -ENOSPC; int ret; - struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; u16 tmp_off, tmp_found; + unsigned int max_bits, gd_cluster_off; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); - if (bg->bg_free_bits_count) { + if (gd->bg_free_bits_count) { + max_bits = le16_to_cpu(gd->bg_bits); + + /* Tail groups in cluster bitmaps which aren't cpg + * aligned are prone to partial extention by a failed + * fs resize. If the file system resize never got to + * update the dinode cluster count, then we don't want + * to trust any clusters past it, regardless of what + * the group descriptor says. */ + gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(gd->bg_blkno)); + if ((gd_cluster_off + max_bits) > + OCFS2_I(inode)->ip_clusters) { + max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; + mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + OCFS2_I(inode)->ip_clusters, max_bits); + } + ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + max_bits, &tmp_off, &tmp_found); if (ret) return ret; @@ -951,6 +1024,7 @@ static int ocfs2_block_group_search(struct inode *inode, if (bg->bg_free_bits_count) ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + le16_to_cpu(bg->bg_bits), bit_off, bits_found); return ret; @@ -988,9 +1062,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } @@ -1018,9 +1092,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } } @@ -1494,9 +1568,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, } group = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(group)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); + if (status) { + mlog_errno(status); goto bail; } BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); -- cgit v1.2.3-70-g09d2 From 883d4cae4a2b01a05193cf2665c77b7489a8b6a0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 5 Jun 2006 16:41:00 -0400 Subject: ocfs2: allocation hints Record the most recently used allocation group on the allocation context, so that subsequent allocations can attempt to optimize for contiguousness. Local alloc especially should benefit from this as the current chain search tends to let it spew across the disk. Signed-off-by: Mark Fasheh --- fs/ocfs2/localalloc.c | 8 +++ fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/suballoc.c | 147 +++++++++++++++++++++++++++++++++++++++++++++----- fs/ocfs2/suballoc.h | 2 + 4 files changed, 145 insertions(+), 13 deletions(-) (limited to 'fs/ocfs2/suballoc.c') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0d1973ea32b..1f17a4d0828 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, mlog(0, "Allocating %u clusters for a new window.\n", ocfs2_local_alloc_window_bits(osb)); + + /* Instruct the allocation code to try the most recently used + * cluster group. We'll re-record the group used this pass + * below. */ + ac->ac_last_group = osb->la_last_gd; + /* we used the generic suballoc reserve function, but we set * everything up nicely, so there's no reason why we can't use * the more specific cluster api to claim bits. */ @@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, goto bail; } + osb->la_last_gd = ac->ac_last_group; + la->la_bm_off = cpu_to_le32(cluster_off); alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); /* just in case... In the future when we find space ourselves, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d52100d49b6..0462a7f4e21 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -236,6 +236,7 @@ struct ocfs2_super enum ocfs2_local_alloc_state local_alloc_state; struct buffer_head *local_alloc_bh; + u64 la_last_gd; /* Next two fields are for local node slot recovery during * mount. */ diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 7ac68cac041..9d91e66f51a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u16 *bit_off, u16 *bits_found); -static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, - u32 bits_wanted, - u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno); static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, u32 bits_wanted, @@ -1030,12 +1024,103 @@ static int ocfs2_block_group_search(struct inode *inode, return ret; } +static int ocfs2_alloc_dinode_update_counts(struct inode *inode, + struct ocfs2_journal_handle *handle, + struct buffer_head *di_bh, + u32 num_bits, + u16 chain) +{ + int ret; + u32 tmp_used; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; + struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; + + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); + di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); + le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); + + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret < 0) + mlog_errno(ret); + +out: + return ret; +} + +static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, + u32 bits_wanted, + u32 min_bits, + u16 *bit_off, + unsigned int *num_bits, + u64 gd_blkno, + u16 *bits_left) +{ + int ret; + u16 found; + struct buffer_head *group_bh = NULL; + struct ocfs2_group_desc *gd; + struct inode *alloc_inode = ac->ac_inode; + struct ocfs2_journal_handle *handle = ac->ac_handle; + + ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, + &group_bh, OCFS2_BH_CACHED, alloc_inode); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + gd = (struct ocfs2_group_desc *) group_bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); + ret = -EIO; + goto out; + } + + ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, + bit_off, &found); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + *num_bits = found; + + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, + *num_bits, + le16_to_cpu(gd->bg_chain)); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, + *bit_off, *num_bits); + if (ret < 0) + mlog_errno(ret); + + *bits_left = le16_to_cpu(gd->bg_free_bits_count); + +out: + brelse(group_bh); + + return ret; +} + static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, - u64 *bg_blkno) + u64 *bg_blkno, + u16 *bits_left) { int status; u16 chain, tmp_bits; @@ -1173,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, (unsigned long long)fe->i_blkno); *bg_blkno = le64_to_cpu(bg->bg_blkno); + *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: if (group_bh) brelse(group_bh); @@ -1194,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, { int status; u16 victim, i; + u16 bits_left = 0; + u64 hint_blkno = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1220,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } + if (hint_blkno) { + /* Attempt to short-circuit the usual search mechanism + * by jumping straight to the most recently used + * allocation group. This helps us mantain some + * contiguousness across allocations. */ + status = ocfs2_search_one_group(ac, bits_wanted, min_bits, + bit_off, num_bits, + hint_blkno, &bits_left); + if (!status) { + /* Be careful to update *bg_blkno here as the + * caller is expecting it to be filled in, and + * ocfs2_search_one_group() won't do that for + * us. */ + *bg_blkno = hint_blkno; + goto set_hint; + } + if (status < 0 && status != -ENOSPC) { + mlog_errno(status); + goto bail; + } + } + cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; victim = ocfs2_find_victim_chain(cl); @@ -1227,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, - num_bits, bg_blkno); + num_bits, bg_blkno, &bits_left); if (!status) - goto bail; + goto set_hint; if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1251,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_chain = i; status = ocfs2_search_chain(ac, bits_wanted, min_bits, - bit_off, num_bits, - bg_blkno); + bit_off, num_bits, bg_blkno, + &bits_left); if (!status) break; if (status < 0 && status != -ENOSPC) { @@ -1260,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } } -bail: +set_hint: + if (status != -ENOSPC) { + /* If the next search of this group is not likely to + * yield a suitable extent, then we reset the last + * group hint so as to not waste a disk read */ + if (bits_left < min_bits) + ac->ac_last_group = 0; + else + ac->ac_last_group = *bg_blkno; + } + +bail: mlog_exit(status); return status; } @@ -1415,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, { int status; unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; - u64 bg_blkno; + u64 bg_blkno = 0; u16 bg_bit_off; mlog_entry_void(); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a76c82a7cea..c787838d105 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,6 +49,8 @@ struct ocfs2_alloc_context { u16 ac_chain; int ac_allow_chain_relink; group_search_t *ac_group_search; + + u64 ac_last_group; }; void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); -- cgit v1.2.3-70-g09d2