diff options
Diffstat (limited to 'fs/ocfs2/suballoc.c')
-rw-r--r-- | fs/ocfs2/suballoc.c | 1201 |
1 files changed, 872 insertions, 329 deletions
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c30b644d957..5fed60de763 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -51,7 +51,33 @@ #define ALLOC_NEW_GROUP 0x1 #define ALLOC_GROUPS_FROM_GLOBAL 0x2 -#define OCFS2_MAX_INODES_TO_STEAL 1024 +#define OCFS2_MAX_TO_STEAL 1024 + +struct ocfs2_suballoc_result { + u64 sr_bg_blkno; /* The bg we allocated from. Set + to 0 when a block group is + contiguous. */ + u64 sr_bg_stable_blkno; /* + * Doesn't change, always + * set to target block + * group descriptor + * block. + */ + u64 sr_blkno; /* The first allocated block */ + unsigned int sr_bit_offset; /* The bit in the bg */ + unsigned int sr_bits; /* How many bits we claimed */ +}; + +static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) +{ + if (res->sr_blkno == 0) + return 0; + + if (res->sr_bg_blkno) + return res->sr_bg_blkno; + + return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); +} static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); @@ -60,6 +86,7 @@ static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, u64 group_blkno, + unsigned int group_clusters, u16 my_chain, struct ocfs2_chain_list *cl); static int ocfs2_block_group_alloc(struct ocfs2_super *osb, @@ -73,20 +100,17 @@ static int ocfs2_cluster_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u64 max_block, - u16 *bit_off, u16 *bits_found); + struct ocfs2_suballoc_result *res); static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u64 max_block, - u16 *bit_off, u16 *bits_found); -static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, - struct ocfs2_alloc_context *ac, + struct ocfs2_suballoc_result *res); +static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno); + struct ocfs2_suballoc_result *res); static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr); static inline int ocfs2_block_group_set_bits(handle_t *handle, @@ -95,13 +119,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, struct buffer_head *group_bh, unsigned int bit_off, unsigned int num_bits); -static inline int ocfs2_block_group_clear_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits); - static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, @@ -137,6 +154,11 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) } brelse(ac->ac_bh); ac->ac_bh = NULL; + ac->ac_resv = NULL; + if (ac->ac_find_loc_priv) { + kfree(ac->ac_find_loc_priv); + ac->ac_find_loc_priv = NULL; + } } void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) @@ -152,7 +174,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) #define do_error(fmt, ...) \ do{ \ - if (clean_error) \ + if (resize) \ mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ else \ ocfs2_error(sb, fmt, ##__VA_ARGS__); \ @@ -160,7 +182,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) static int ocfs2_validate_gd_self(struct super_block *sb, struct buffer_head *bh, - int clean_error) + int resize) { struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; @@ -211,7 +233,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb, static int ocfs2_validate_gd_parent(struct super_block *sb, struct ocfs2_dinode *di, struct buffer_head *bh, - int clean_error) + int resize) { unsigned int max_bits; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; @@ -233,8 +255,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, return -EINVAL; } - if (le16_to_cpu(gd->bg_chain) >= - le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + /* In resize, we may meet the case bg_chain == cl_next_free_rec. */ + if ((le16_to_cpu(gd->bg_chain) > + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) || + ((le16_to_cpu(gd->bg_chain) == + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) { do_error("Group descriptor #%llu has bad chain %u", (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_chain)); @@ -329,14 +354,38 @@ out: return rc; } +static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, + struct ocfs2_group_desc *bg, + struct ocfs2_chain_list *cl, + u64 p_blkno, unsigned int clusters) +{ + struct ocfs2_extent_list *el = &bg->bg_list; + struct ocfs2_extent_rec *rec; + + BUG_ON(!ocfs2_supports_discontig_bg(osb)); + if (!el->l_next_free_rec) + el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb)); + rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)]; + rec->e_blkno = cpu_to_le64(p_blkno); + rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / + le16_to_cpu(cl->cl_bpc)); + rec->e_leaf_clusters = cpu_to_le16(clusters); + le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); + le16_add_cpu(&bg->bg_free_bits_count, + clusters * le16_to_cpu(cl->cl_bpc)); + le16_add_cpu(&el->l_next_free_rec, 1); +} + static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, u64 group_blkno, + unsigned int group_clusters, u16 my_chain, struct ocfs2_chain_list *cl) { int status = 0; + struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct super_block * sb = alloc_inode->i_sb; @@ -363,19 +412,23 @@ static int ocfs2_block_group_fill(handle_t *handle, memset(bg, 0, sb->s_blocksize); strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); - bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); - bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); + bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1, + osb->s_feature_incompat)); bg->bg_chain = cpu_to_le16(my_chain); bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); bg->bg_blkno = cpu_to_le64(group_blkno); + if (group_clusters == le16_to_cpu(cl->cl_cpg)) + bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); + else + ocfs2_bg_discontig_add_extent(osb, bg, cl, group_blkno, + group_clusters); + /* set the 1st bit in the bitmap to account for the descriptor block */ ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); - status = ocfs2_journal_dirty(handle, bg_bh); - if (status < 0) - mlog_errno(status); + ocfs2_journal_dirty(handle, bg_bh); /* There is no need to zero out or otherwise initialize the * other blocks in a group - All valid FS metadata in a block @@ -401,6 +454,238 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) return best; } +static struct buffer_head * +ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle, + struct inode *alloc_inode, + struct ocfs2_alloc_context *ac, + struct ocfs2_chain_list *cl) +{ + int status; + u32 bit_off, num_bits; + u64 bg_blkno; + struct buffer_head *bg_bh; + unsigned int alloc_rec = ocfs2_find_smallest_chain(cl); + + status = ocfs2_claim_clusters(handle, ac, + le16_to_cpu(cl->cl_cpg), &bit_off, + &num_bits); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto bail; + } + + /* setup the group */ + bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); + mlog(0, "new descriptor, record %u, at block %llu\n", + alloc_rec, (unsigned long long)bg_blkno); + + bg_bh = sb_getblk(osb->sb, bg_blkno); + if (!bg_bh) { + status = -EIO; + mlog_errno(status); + goto bail; + } + ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh); + + status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, + bg_blkno, num_bits, alloc_rec, cl); + if (status < 0) { + brelse(bg_bh); + mlog_errno(status); + } + +bail: + return status ? ERR_PTR(status) : bg_bh; +} + +static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb, + handle_t *handle, + struct ocfs2_alloc_context *ac, + unsigned int min_bits, + u32 *bit_off, u32 *num_bits) +{ + int status = 0; + + while (min_bits) { + status = ocfs2_claim_clusters(handle, ac, min_bits, + bit_off, num_bits); + if (status != -ENOSPC) + break; + + min_bits >>= 1; + } + + return status; +} + +static int ocfs2_block_group_grow_discontig(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *bg_bh, + struct ocfs2_alloc_context *ac, + struct ocfs2_chain_list *cl, + unsigned int min_bits) +{ + int status; + struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); + struct ocfs2_group_desc *bg = + (struct ocfs2_group_desc *)bg_bh->b_data; + unsigned int needed = le16_to_cpu(cl->cl_cpg) - + le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc); + u32 p_cpos, clusters; + u64 p_blkno; + struct ocfs2_extent_list *el = &bg->bg_list; + + status = ocfs2_journal_access_gd(handle, + INODE_CACHE(alloc_inode), + bg_bh, + OCFS2_JOURNAL_ACCESS_CREATE); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) < + le16_to_cpu(el->l_count))) { + if (min_bits > needed) + min_bits = needed; + status = ocfs2_block_group_claim_bits(osb, handle, ac, + min_bits, &p_cpos, + &clusters); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto bail; + } + p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos); + ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno, + clusters); + + min_bits = clusters; + needed = le16_to_cpu(cl->cl_cpg) - + le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc); + } + + if (needed > 0) { + /* + * We have used up all the extent rec but can't fill up + * the cpg. So bail out. + */ + status = -ENOSPC; + goto bail; + } + + ocfs2_journal_dirty(handle, bg_bh); + +bail: + return status; +} + +static void ocfs2_bg_alloc_cleanup(handle_t *handle, + struct ocfs2_alloc_context *cluster_ac, + struct inode *alloc_inode, + struct buffer_head *bg_bh) +{ + int i, ret; + struct ocfs2_group_desc *bg; + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + + if (!bg_bh) + return; + + bg = (struct ocfs2_group_desc *)bg_bh->b_data; + el = &bg->bg_list; + for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { + rec = &el->l_recs[i]; + ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode, + cluster_ac->ac_bh, + le64_to_cpu(rec->e_blkno), + le32_to_cpu(rec->e_leaf_clusters)); + if (ret) + mlog_errno(ret); + /* Try all the clusters to free */ + } + + ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh); + brelse(bg_bh); +} + +static struct buffer_head * +ocfs2_block_group_alloc_discontig(handle_t *handle, + struct inode *alloc_inode, + struct ocfs2_alloc_context *ac, + struct ocfs2_chain_list *cl) +{ + int status; + u32 bit_off, num_bits; + u64 bg_blkno; + unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1; + struct buffer_head *bg_bh = NULL; + unsigned int alloc_rec = ocfs2_find_smallest_chain(cl); + struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); + + if (!ocfs2_supports_discontig_bg(osb)) { + status = -ENOSPC; + goto bail; + } + + status = ocfs2_extend_trans(handle, + ocfs2_calc_bg_discontig_credits(osb->sb)); + if (status) { + mlog_errno(status); + goto bail; + } + + /* + * We're going to be grabbing from multiple cluster groups. + * We don't have enough credits to relink them all, and the + * cluster groups will be staying in cache for the duration of + * this operation. + */ + ac->ac_allow_chain_relink = 0; + + /* Claim the first region */ + status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, + &bit_off, &num_bits); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto bail; + } + min_bits = num_bits; + + /* setup the group */ + bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); + mlog(0, "new descriptor, record %u, at block %llu\n", + alloc_rec, (unsigned long long)bg_blkno); + + bg_bh = sb_getblk(osb->sb, bg_blkno); + if (!bg_bh) { + status = -EIO; + mlog_errno(status); + goto bail; + } + ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh); + + status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, + bg_blkno, num_bits, alloc_rec, cl); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + status = ocfs2_block_group_grow_discontig(handle, alloc_inode, + bg_bh, ac, cl, min_bits); + if (status) + mlog_errno(status); + +bail: + if (status) + ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh); + return status ? ERR_PTR(status) : bg_bh; +} + /* * We expect the block group allocator to already be locked. */ @@ -416,9 +701,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct ocfs2_chain_list *cl; struct ocfs2_alloc_context *ac = NULL; handle_t *handle = NULL; - u32 bit_off, num_bits; u16 alloc_rec; - u64 bg_blkno; struct buffer_head *bg_bh = NULL; struct ocfs2_group_desc *bg; @@ -451,44 +734,20 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, (unsigned long long)*last_alloc_group); ac->ac_last_group = *last_alloc_group; } - status = ocfs2_claim_clusters(osb, - handle, - ac, - le16_to_cpu(cl->cl_cpg), - &bit_off, - &num_bits); - if (status < 0) { + + bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, + ac, cl); + if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC)) + bg_bh = ocfs2_block_group_alloc_discontig(handle, + alloc_inode, + ac, cl); + if (IS_ERR(bg_bh)) { + status = PTR_ERR(bg_bh); + bg_bh = NULL; if (status != -ENOSPC) mlog_errno(status); goto bail; } - - alloc_rec = ocfs2_find_smallest_chain(cl); - - /* setup the group */ - bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); - mlog(0, "new descriptor, record %u, at block %llu\n", - alloc_rec, (unsigned long long)bg_blkno); - - bg_bh = sb_getblk(osb->sb, bg_blkno); - if (!bg_bh) { - status = -EIO; - mlog_errno(status); - goto bail; - } - ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh); - - status = ocfs2_block_group_fill(handle, - alloc_inode, - bg_bh, - bg_blkno, - alloc_rec, - cl); - if (status < 0) { - mlog_errno(status); - goto bail; - } - bg = (struct ocfs2_group_desc *) bg_bh->b_data; status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), @@ -498,10 +757,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, goto bail; } + alloc_rec = le16_to_cpu(bg->bg_chain); le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, le16_to_cpu(bg->bg_free_bits_count)); - le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); - cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); + le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, + le16_to_cpu(bg->bg_bits)); + cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno; if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) le16_add_cpu(&cl->cl_next_free_rec, 1); @@ -510,11 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); - status = ocfs2_journal_dirty(handle, bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, bh); spin_lock(&OCFS2_I(alloc_inode)->ip_lock); OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); @@ -637,12 +894,113 @@ bail: return status; } +static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) +{ + spin_lock(&osb->osb_lock); + osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; + spin_unlock(&osb->osb_lock); + atomic_set(&osb->s_num_inodes_stolen, 0); +} + +static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb) +{ + spin_lock(&osb->osb_lock); + osb->s_meta_steal_slot = OCFS2_INVALID_SLOT; + spin_unlock(&osb->osb_lock); + atomic_set(&osb->s_num_meta_stolen, 0); +} + +void ocfs2_init_steal_slots(struct ocfs2_super *osb) +{ + ocfs2_init_inode_steal_slot(osb); + ocfs2_init_meta_steal_slot(osb); +} + +static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type) +{ + spin_lock(&osb->osb_lock); + if (type == INODE_ALLOC_SYSTEM_INODE) + osb->s_inode_steal_slot = slot; + else if (type == EXTENT_ALLOC_SYSTEM_INODE) + osb->s_meta_steal_slot = slot; + spin_unlock(&osb->osb_lock); +} + +static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type) +{ + int slot = OCFS2_INVALID_SLOT; + + spin_lock(&osb->osb_lock); + if (type == INODE_ALLOC_SYSTEM_INODE) + slot = osb->s_inode_steal_slot; + else if (type == EXTENT_ALLOC_SYSTEM_INODE) + slot = osb->s_meta_steal_slot; + spin_unlock(&osb->osb_lock); + + return slot; +} + +static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) +{ + return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE); +} + +static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb) +{ + return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE); +} + +static int ocfs2_steal_resource(struct ocfs2_super *osb, + struct ocfs2_alloc_context *ac, + int type) +{ + int i, status = -ENOSPC; + int slot = __ocfs2_get_steal_slot(osb, type); + + /* Start to steal resource from the first slot after ours. */ + if (slot == OCFS2_INVALID_SLOT) + slot = osb->slot_num + 1; + + for (i = 0; i < osb->max_slots; i++, slot++) { + if (slot == osb->max_slots) + slot = 0; + + if (slot == osb->slot_num) + continue; + + status = ocfs2_reserve_suballoc_bits(osb, ac, + type, + (u32)slot, NULL, + NOT_ALLOC_NEW_GROUP); + if (status >= 0) { + __ocfs2_set_steal_slot(osb, slot, type); + break; + } + + ocfs2_free_ac_resource(ac); + } + + return status; +} + +static int ocfs2_steal_inode(struct ocfs2_super *osb, + struct ocfs2_alloc_context *ac) +{ + return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE); +} + +static int ocfs2_steal_meta(struct ocfs2_super *osb, + struct ocfs2_alloc_context *ac) +{ + return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE); +} + int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, int blocks, struct ocfs2_alloc_context **ac) { int status; - u32 slot; + int slot = ocfs2_get_meta_steal_slot(osb); *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { @@ -653,12 +1011,34 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, (*ac)->ac_bits_wanted = blocks; (*ac)->ac_which = OCFS2_AC_USE_META; - slot = osb->slot_num; (*ac)->ac_group_search = ocfs2_block_group_search; + if (slot != OCFS2_INVALID_SLOT && + atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL) + goto extent_steal; + + atomic_set(&osb->s_num_meta_stolen, 0); status = ocfs2_reserve_suballoc_bits(osb, (*ac), EXTENT_ALLOC_SYSTEM_INODE, - slot, NULL, ALLOC_NEW_GROUP); + (u32)osb->slot_num, NULL, + ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP); + + + if (status >= 0) { + status = 0; + if (slot != OCFS2_INVALID_SLOT) + ocfs2_init_meta_steal_slot(osb); + goto bail; + } else if (status < 0 && status != -ENOSPC) { + mlog_errno(status); + goto bail; + } + + ocfs2_free_ac_resource(*ac); + +extent_steal: + status = ocfs2_steal_meta(osb, *ac); + atomic_inc(&osb->s_num_meta_stolen); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -685,43 +1065,11 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, ac); } -static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, - struct ocfs2_alloc_context *ac) -{ - int i, status = -ENOSPC; - s16 slot = ocfs2_get_inode_steal_slot(osb); - - /* Start to steal inodes from the first slot after ours. */ - if (slot == OCFS2_INVALID_SLOT) - slot = osb->slot_num + 1; - - for (i = 0; i < osb->max_slots; i++, slot++) { - if (slot == osb->max_slots) - slot = 0; - - if (slot == osb->slot_num) - continue; - - status = ocfs2_reserve_suballoc_bits(osb, ac, - INODE_ALLOC_SYSTEM_INODE, - slot, NULL, - NOT_ALLOC_NEW_GROUP); - if (status >= 0) { - ocfs2_set_inode_steal_slot(osb, slot); - break; - } - - ocfs2_free_ac_resource(ac); - } - - return status; -} - int ocfs2_reserve_new_inode(struct ocfs2_super *osb, struct ocfs2_alloc_context **ac) { int status; - s16 slot = ocfs2_get_inode_steal_slot(osb); + int slot = ocfs2_get_inode_steal_slot(osb); u64 alloc_group; *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); @@ -754,14 +1102,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, * need to check our slots to see whether there is some space for us. */ if (slot != OCFS2_INVALID_SLOT && - atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) + atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL) goto inode_steal; atomic_set(&osb->s_num_inodes_stolen, 0); alloc_group = osb->osb_inode_alloc_group; status = ocfs2_reserve_suballoc_bits(osb, *ac, INODE_ALLOC_SYSTEM_INODE, - osb->slot_num, + (u32)osb->slot_num, &alloc_group, ALLOC_NEW_GROUP | ALLOC_GROUPS_FROM_GLOBAL); @@ -789,7 +1137,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, ocfs2_free_ac_resource(*ac); inode_steal: - status = ocfs2_steal_inode_from_other_nodes(osb, *ac); + status = ocfs2_steal_inode(osb, *ac); atomic_inc(&osb->s_num_inodes_stolen); if (status < 0) { if (status != -ENOSPC) @@ -859,11 +1207,7 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, status = ocfs2_reserve_local_alloc_bits(osb, bits_wanted, *ac); - if (status == -EFBIG) { - /* The local alloc window is outside ac_max_block. - * use the main bitmap. */ - status = -ENOSPC; - } else if ((status < 0) && (status != -ENOSPC)) { + if ((status < 0) && (status != -ENOSPC)) { mlog_errno(status); goto bail; } @@ -946,8 +1290,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, struct buffer_head *bg_bh, unsigned int bits_wanted, unsigned int total_bits, - u16 *bit_off, - u16 *bits_found) + struct ocfs2_suballoc_result *res) { void *bitmap; u16 best_offset, best_size; @@ -991,14 +1334,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, } } - /* XXX: I think the first clause is equivalent to the second - * - jlbec */ - if (found == bits_wanted) { - *bit_off = start - found; - *bits_found = found; - } else if (best_size) { - *bit_off = best_offset; - *bits_found = best_size; + if (best_size) { + res->sr_bit_offset = best_offset; + res->sr_bits = best_size; } else { status = -ENOSPC; /* No error log here -- see the comment above @@ -1042,16 +1380,18 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, } le16_add_cpu(&bg->bg_free_bits_count, -num_bits); - + if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" + " count %u but claims %u are freed. num_bits %d", + (unsigned long long)le64_to_cpu(bg->bg_blkno), + le16_to_cpu(bg->bg_bits), + le16_to_cpu(bg->bg_free_bits_count), num_bits); + return -EROFS; + } while(num_bits--) ocfs2_set_bit(bit_off++, bitmap); - status = ocfs2_journal_dirty(handle, - group_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, group_bh); bail: mlog_exit(status); @@ -1115,12 +1455,7 @@ static int ocfs2_relink_block_group(handle_t *handle, } prev_bg->bg_next_group = bg->bg_next_group; - - status = ocfs2_journal_dirty(handle, prev_bg_bh); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + ocfs2_journal_dirty(handle, prev_bg_bh); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1130,12 +1465,7 @@ static int ocfs2_relink_block_group(handle_t *handle, } bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; - - status = ocfs2_journal_dirty(handle, bg_bh); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + ocfs2_journal_dirty(handle, bg_bh); status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1145,14 +1475,8 @@ static int ocfs2_relink_block_group(handle_t *handle, } fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; + ocfs2_journal_dirty(handle, fe_bh); - status = ocfs2_journal_dirty(handle, fe_bh); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } - - status = 0; out_rollback: if (status < 0) { fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); @@ -1176,14 +1500,13 @@ static int ocfs2_cluster_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u64 max_block, - u16 *bit_off, u16 *bits_found) + struct ocfs2_suballoc_result *res) { int search = -ENOSPC; int ret; u64 blkoff; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - u16 tmp_off, tmp_found; unsigned int max_bits, gd_cluster_off; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); @@ -1210,15 +1533,15 @@ static int ocfs2_cluster_group_search(struct inode *inode, ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, - max_bits, - &tmp_off, &tmp_found); + max_bits, res); if (ret) return ret; if (max_block) { blkoff = ocfs2_clusters_to_blocks(inode->i_sb, gd_cluster_off + - tmp_off + tmp_found); + res->sr_bit_offset + + res->sr_bits); mlog(0, "Checking %llu against %llu\n", (unsigned long long)blkoff, (unsigned long long)max_block); @@ -1230,16 +1553,14 @@ static int ocfs2_cluster_group_search(struct inode *inode, * return success, but we still want to return * -ENOSPC unless it found the minimum number * of bits. */ - if (min_bits <= tmp_found) { - *bit_off = tmp_off; - *bits_found = tmp_found; + if (min_bits <= res->sr_bits) search = 0; /* success */ - } else if (tmp_found) { + else if (res->sr_bits) { /* * Don't show bits which we'll be returning * for allocation to the local alloc bitmap. */ - ocfs2_local_alloc_seen_free_bits(osb, tmp_found); + ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits); } } @@ -1250,7 +1571,7 @@ static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u64 max_block, - u16 *bit_off, u16 *bits_found) + struct ocfs2_suballoc_result *res) { int ret = -ENOSPC; u64 blkoff; @@ -1263,10 +1584,10 @@ static int ocfs2_block_group_search(struct inode *inode, ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, le16_to_cpu(bg->bg_bits), - bit_off, bits_found); + res); if (!ret && max_block) { - blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + - *bits_found; + blkoff = le64_to_cpu(bg->bg_blkno) + + res->sr_bit_offset + res->sr_bits; mlog(0, "Checking %llu against %llu\n", (unsigned long long)blkoff, (unsigned long long)max_block); @@ -1299,33 +1620,76 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode, tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); - - ret = ocfs2_journal_dirty(handle, di_bh); - if (ret < 0) - mlog_errno(ret); + ocfs2_journal_dirty(handle, di_bh); out: return ret; } +static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, + struct ocfs2_extent_rec *rec, + struct ocfs2_chain_list *cl) +{ + unsigned int bpc = le16_to_cpu(cl->cl_bpc); + unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc; + unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc; + + if (res->sr_bit_offset < bitoff) + return 0; + if (res->sr_bit_offset >= (bitoff + bitcount)) + return 0; + res->sr_blkno = le64_to_cpu(rec->e_blkno) + + (res->sr_bit_offset - bitoff); + if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount)) + res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset; + return 1; +} + +static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac, + struct ocfs2_group_desc *bg, + struct ocfs2_suballoc_result *res) +{ + int i; + u64 bg_blkno = res->sr_bg_blkno; /* Save off */ + struct ocfs2_extent_rec *rec; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; + struct ocfs2_chain_list *cl = &di->id2.i_chain; + + if (ocfs2_is_cluster_bitmap(ac->ac_inode)) { + res->sr_blkno = 0; + return; + } + + res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset; + res->sr_bg_blkno = 0; /* Clear it for contig block groups */ + if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) || + !bg->bg_list.l_next_free_rec) + return; + + for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) { + rec = &bg->bg_list.l_recs[i]; + if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) { + res->sr_bg_blkno = bg_blkno; /* Restore */ + break; + } + } +} + static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 gd_blkno, + struct ocfs2_suballoc_result *res, u16 *bits_left) { int ret; - u16 found; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *gd; struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, - &group_bh); + ret = ocfs2_read_group_descriptor(alloc_inode, di, + res->sr_bg_blkno, &group_bh); if (ret < 0) { mlog_errno(ret); return ret; @@ -1333,17 +1697,27 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, gd = (struct ocfs2_group_desc *) group_bh->b_data; ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, - ac->ac_max_block, bit_off, &found); + ac->ac_max_block, res); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } - *num_bits = found; + if (!ret) + ocfs2_bg_discontig_fix_result(ac, gd, res); + + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; + + if (ac->ac_find_loc_only) + goto out_loc_only; ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, - *num_bits, + res->sr_bits, le16_to_cpu(gd->bg_chain)); if (ret < 0) { mlog_errno(ret); @@ -1351,10 +1725,11 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, } ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, - *bit_off, *num_bits); + res->sr_bit_offset, res->sr_bits); if (ret < 0) mlog_errno(ret); +out_loc_only: *bits_left = le16_to_cpu(gd->bg_free_bits_count); out: @@ -1367,14 +1742,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno, + struct ocfs2_suballoc_result *res, u16 *bits_left) { int status; - u16 chain, tmp_bits; - u32 tmp_used; + u16 chain; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1402,8 +1774,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * the 1st group with any empty bits. */ while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, - ac->ac_max_block, bit_off, - &tmp_bits)) == -ENOSPC) { + ac->ac_max_block, + res)) == -ENOSPC) { if (!bg->bg_next_group) break; @@ -1428,11 +1800,19 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, } mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", - tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); + res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); + + res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno); - *num_bits = tmp_bits; + BUG_ON(res->sr_bits == 0); + if (!status) + ocfs2_bg_discontig_fix_result(ac, bg, res); - BUG_ON(*num_bits == 0); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; /* * Keep track of previous block descriptor read. When @@ -1449,7 +1829,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, */ if (ac->ac_allow_chain_relink && (prev_group_bh) && - (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { + (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, ac->ac_bh, group_bh, prev_group_bh, chain); @@ -1459,24 +1839,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, } } - /* Ok, claim our bits now: set the info on dinode, chainlist - * and then the group */ - status = ocfs2_journal_access_di(handle, - INODE_CACHE(alloc_inode), - ac->ac_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto bail; - } + if (ac->ac_find_loc_only) + goto out_loc_only; - tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); - fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); - le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); - - status = ocfs2_journal_dirty(handle, - ac->ac_bh); - if (status < 0) { + status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (status) { mlog_errno(status); goto bail; } @@ -1485,17 +1854,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, alloc_inode, bg, group_bh, - *bit_off, - *num_bits); + res->sr_bit_offset, + res->sr_bits); if (status < 0) { mlog_errno(status); goto bail; } - mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, + mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, (unsigned long long)le64_to_cpu(fe->i_blkno)); - *bg_blkno = le64_to_cpu(bg->bg_blkno); +out_loc_only: *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: brelse(group_bh); @@ -1506,19 +1875,16 @@ bail: } /* will give out up to bits_wanted contiguous bits. */ -static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, - struct ocfs2_alloc_context *ac, +static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno) + struct ocfs2_suballoc_result *res) { int status; u16 victim, i; u16 bits_left = 0; - u64 hint_blkno = ac->ac_last_group; + u64 hint = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1536,7 +1902,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, if (le32_to_cpu(fe->id1.bitmap1.i_used) >= le32_to_cpu(fe->id1.bitmap1.i_total)) { - ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " + ocfs2_error(ac->ac_inode->i_sb, + "Chain allocator dinode %llu has %u used " "bits but only %u total.", (unsigned long long)le64_to_cpu(fe->i_blkno), le32_to_cpu(fe->id1.bitmap1.i_used), @@ -1545,22 +1912,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } - if (hint_blkno) { + res->sr_bg_blkno = hint; + if (res->sr_bg_blkno) { /* Attempt to short-circuit the usual search mechanism * by jumping straight to the most recently used * allocation group. This helps us mantain some * contiguousness across allocations. */ status = ocfs2_search_one_group(ac, handle, bits_wanted, - min_bits, bit_off, num_bits, - hint_blkno, &bits_left); - if (!status) { - /* Be careful to update *bg_blkno here as the - * caller is expecting it to be filled in, and - * ocfs2_search_one_group() won't do that for - * us. */ - *bg_blkno = hint_blkno; + min_bits, res, &bits_left); + if (!status) goto set_hint; - } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1573,10 +1934,12 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_chain = victim; ac->ac_allow_chain_relink = 1; - status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, - num_bits, bg_blkno, &bits_left); - if (!status) + status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, + res, &bits_left); + if (!status) { + hint = ocfs2_group_from_res(res); goto set_hint; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1598,10 +1961,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_chain = i; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, - bit_off, num_bits, bg_blkno, - &bits_left); - if (!status) + res, &bits_left); + if (!status) { + hint = ocfs2_group_from_res(res); break; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1616,7 +1980,7 @@ set_hint: if (bits_left < min_bits) ac->ac_last_group = 0; else - ac->ac_last_group = *bg_blkno; + ac->ac_last_group = hint; } bail: @@ -1624,37 +1988,37 @@ bail: return status; } -int ocfs2_claim_metadata(struct ocfs2_super *osb, - handle_t *handle, +int ocfs2_claim_metadata(handle_t *handle, struct ocfs2_alloc_context *ac, u32 bits_wanted, + u64 *suballoc_loc, u16 *suballoc_bit_start, unsigned int *num_bits, u64 *blkno_start) { int status; - u64 bg_blkno; + struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; BUG_ON(!ac); BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); BUG_ON(ac->ac_which != OCFS2_AC_USE_META); - status = ocfs2_claim_suballoc_bits(osb, - ac, + status = ocfs2_claim_suballoc_bits(ac, handle, bits_wanted, 1, - suballoc_bit_start, - num_bits, - &bg_blkno); + &res); if (status < 0) { mlog_errno(status); goto bail; } - atomic_inc(&osb->alloc_stats.bg_allocs); + atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); - *blkno_start = bg_blkno + (u64) *suballoc_bit_start; - ac->ac_bits_given += (*num_bits); + *suballoc_loc = res.sr_bg_blkno; + *suballoc_bit_start = res.sr_bit_offset; + *blkno_start = res.sr_blkno; + ac->ac_bits_given += res.sr_bits; + *num_bits = res.sr_bits; status = 0; bail: mlog_exit(status); @@ -1662,10 +2026,10 @@ bail: } static void ocfs2_init_inode_ac_group(struct inode *dir, - struct buffer_head *parent_fe_bh, + struct buffer_head *parent_di_bh, struct ocfs2_alloc_context *ac) { - struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data; /* * Try to allocate inodes from some specific group. * @@ -1679,10 +2043,14 @@ static void ocfs2_init_inode_ac_group(struct inode *dir, if (OCFS2_I(dir)->ip_last_used_group && OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot) ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group; - else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot) - ac->ac_last_group = ocfs2_which_suballoc_group( - le64_to_cpu(fe->i_blkno), - le16_to_cpu(fe->i_suballoc_bit)); + else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) { + if (di->i_suballoc_loc) + ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc); + else + ac->ac_last_group = ocfs2_which_suballoc_group( + le64_to_cpu(di->i_blkno), + le16_to_cpu(di->i_suballoc_bit)); + } } static inline void ocfs2_save_inode_ac_group(struct inode *dir, @@ -1692,17 +2060,146 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; } -int ocfs2_claim_new_inode(struct ocfs2_super *osb, - handle_t *handle, +int ocfs2_find_new_inode_loc(struct inode *dir, + struct buffer_head *parent_fe_bh, + struct ocfs2_alloc_context *ac, + u64 *fe_blkno) +{ + int ret; + handle_t *handle = NULL; + struct ocfs2_suballoc_result *res; + + BUG_ON(!ac); + BUG_ON(ac->ac_bits_given != 0); + BUG_ON(ac->ac_bits_wanted != 1); + BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); + + res = kzalloc(sizeof(*res), GFP_NOFS); + if (res == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); + + /* + * The handle started here is for chain relink. Alternatively, + * we could just disable relink for these calls. + */ + handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + handle = NULL; + mlog_errno(ret); + goto out; + } + + /* + * This will instruct ocfs2_claim_suballoc_bits and + * ocfs2_search_one_group to search but save actual allocation + * for later. + */ + ac->ac_find_loc_only = 1; + + ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ac->ac_find_loc_priv = res; + *fe_blkno = res->sr_blkno; + +out: + if (handle) + ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); + + if (ret) + kfree(res); + + return ret; +} + +int ocfs2_claim_new_inode_at_loc(handle_t *handle, + struct inode *dir, + struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, + u16 *suballoc_bit, + u64 di_blkno) +{ + int ret; + u16 chain; + struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; + struct buffer_head *bg_bh = NULL; + struct ocfs2_group_desc *bg; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; + + /* + * Since di_blkno is being passed back in, we check for any + * inconsistencies which may have happened between + * calls. These are code bugs as di_blkno is not expected to + * change once returned from ocfs2_find_new_inode_loc() + */ + BUG_ON(res->sr_blkno != di_blkno); + + ret = ocfs2_read_group_descriptor(ac->ac_inode, di, + res->sr_bg_stable_blkno, &bg_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + bg = (struct ocfs2_group_desc *) bg_bh->b_data; + chain = le16_to_cpu(bg->bg_chain); + + ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, + ac->ac_inode, + bg, + bg_bh, + res->sr_bit_offset, + res->sr_bits); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, + (unsigned long long)di_blkno); + + atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); + + BUG_ON(res->sr_bits != 1); + + *suballoc_loc = res->sr_bg_blkno; + *suballoc_bit = res->sr_bit_offset; + ac->ac_bits_given++; + ocfs2_save_inode_ac_group(dir, ac); + +out: + brelse(bg_bh); + + return ret; +} + +int ocfs2_claim_new_inode(handle_t *handle, struct inode *dir, struct buffer_head *parent_fe_bh, struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, u16 *suballoc_bit, u64 *fe_blkno) { int status; - unsigned int num_bits; - u64 bg_blkno; + struct ocfs2_suballoc_result res; mlog_entry_void(); @@ -1713,23 +2210,22 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb, ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); - status = ocfs2_claim_suballoc_bits(osb, - ac, + status = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, - suballoc_bit, - &num_bits, - &bg_blkno); + &res); if (status < 0) { mlog_errno(status); goto bail; } - atomic_inc(&osb->alloc_stats.bg_allocs); + atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); - BUG_ON(num_bits != 1); + BUG_ON(res.sr_bits != 1); - *fe_blkno = bg_blkno + (u64) (*suballoc_bit); + *suballoc_loc = res.sr_bg_blkno; + *suballoc_bit = res.sr_bit_offset; + *fe_blkno = res.sr_blkno; ac->ac_bits_given++; ocfs2_save_inode_ac_group(dir, ac); status = 0; @@ -1799,8 +2295,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, * contig. allocation, set to '1' to indicate we can deal with extents * of any size. */ -int __ocfs2_claim_clusters(struct ocfs2_super *osb, - handle_t *handle, +int __ocfs2_claim_clusters(handle_t *handle, struct ocfs2_alloc_context *ac, u32 min_clusters, u32 max_clusters, @@ -1809,8 +2304,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, { int status; unsigned int bits_wanted = max_clusters; - u64 bg_blkno = 0; - u16 bg_bit_off; + struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; + struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb); mlog_entry_void(); @@ -1820,6 +2315,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, && ac->ac_which != OCFS2_AC_USE_MAIN); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { + WARN_ON(min_clusters > 1); + status = ocfs2_claim_local_alloc_bits(osb, handle, ac, @@ -1842,20 +2339,19 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, if (bits_wanted > (osb->bitmap_cpg - 1)) bits_wanted = osb->bitmap_cpg - 1; - status = ocfs2_claim_suballoc_bits(osb, - ac, + status = ocfs2_claim_suballoc_bits(ac, handle, bits_wanted, min_clusters, - &bg_bit_off, - num_clusters, - &bg_blkno); + &res); if (!status) { + BUG_ON(res.sr_blkno); /* cluster alloc can't set */ *cluster_start = ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, - bg_blkno, - bg_bit_off); + res.sr_bg_blkno, + res.sr_bit_offset); atomic_inc(&osb->alloc_stats.bitmap_data); + *num_clusters = res.sr_bits; } } if (status < 0) { @@ -1871,8 +2367,7 @@ bail: return status; } -int ocfs2_claim_clusters(struct ocfs2_super *osb, - handle_t *handle, +int ocfs2_claim_clusters(handle_t *handle, struct ocfs2_alloc_context *ac, u32 min_clusters, u32 *cluster_start, @@ -1880,22 +2375,22 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, { unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; - return __ocfs2_claim_clusters(osb, handle, ac, min_clusters, + return __ocfs2_claim_clusters(handle, ac, min_clusters, bits_wanted, cluster_start, num_clusters); } -static inline int ocfs2_block_group_clear_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits) +static int ocfs2_block_group_clear_bits(handle_t *handle, + struct inode *alloc_inode, + struct ocfs2_group_desc *bg, + struct buffer_head *group_bh, + unsigned int bit_off, + unsigned int num_bits, + void (*undo_fn)(unsigned int bit, + unsigned long *bmap)) { int status; unsigned int tmp; - int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; struct ocfs2_group_desc *undo_bg = NULL; - int cluster_bitmap = 0; mlog_entry_void(); @@ -1905,20 +2400,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog(0, "off = %u, num = %u\n", bit_off, num_bits); - if (ocfs2_is_cluster_bitmap(alloc_inode)) - journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - + BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), - group_bh, journal_type); + group_bh, + undo_fn ? + OCFS2_JOURNAL_ACCESS_UNDO : + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } - if (ocfs2_is_cluster_bitmap(alloc_inode)) - cluster_bitmap = 1; - - if (cluster_bitmap) { + if (undo_fn) { jbd_lock_bh_state(group_bh); undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; @@ -1929,18 +2422,24 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, while(tmp--) { ocfs2_clear_bit((bit_off + tmp), (unsigned long *) bg->bg_bitmap); - if (cluster_bitmap) - ocfs2_set_bit(bit_off + tmp, - (unsigned long *) undo_bg->bg_bitmap); + if (undo_fn) + undo_fn(bit_off + tmp, + (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); + if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" + " count %u but claims %u are freed. num_bits %d", + (unsigned long long)le64_to_cpu(bg->bg_blkno), + le16_to_cpu(bg->bg_bits), + le16_to_cpu(bg->bg_free_bits_count), num_bits); + return -EROFS; + } - if (cluster_bitmap) + if (undo_fn) jbd_unlock_bh_state(group_bh); - status = ocfs2_journal_dirty(handle, group_bh); - if (status < 0) - mlog_errno(status); + ocfs2_journal_dirty(handle, group_bh); bail: return status; } @@ -1948,12 +2447,14 @@ bail: /* * expects the suballoc inode to already be locked. */ -int ocfs2_free_suballoc_bits(handle_t *handle, - struct inode *alloc_inode, - struct buffer_head *alloc_bh, - unsigned int start_bit, - u64 bg_blkno, - unsigned int count) +static int _ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count, + void (*undo_fn)(unsigned int bit, + unsigned long *bitmap)) { int status = 0; u32 tmp_used; @@ -1988,7 +2489,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, status = ocfs2_block_group_clear_bits(handle, alloc_inode, group, group_bh, - start_bit, count); + start_bit, count, undo_fn); if (status < 0) { mlog_errno(status); goto bail; @@ -2005,12 +2506,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, count); tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); - - status = ocfs2_journal_dirty(handle, alloc_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, alloc_bh); bail: brelse(group_bh); @@ -2019,6 +2515,17 @@ bail: return status; } +int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count) +{ + return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, + start_bit, bg_blkno, count, NULL); +} + int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, @@ -2028,15 +2535,19 @@ int ocfs2_free_dinode(handle_t *handle, u16 bit = le16_to_cpu(di->i_suballoc_bit); u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); + if (di->i_suballoc_loc) + bg_blkno = le64_to_cpu(di->i_suballoc_loc); return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, inode_alloc_bh, bit, bg_blkno, 1); } -int ocfs2_free_clusters(handle_t *handle, - struct inode *bitmap_inode, - struct buffer_head *bitmap_bh, - u64 start_blk, - unsigned int num_clusters) +static int _ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters, + void (*undo_fn)(unsigned int bit, + unsigned long *bitmap)) { int status; u16 bg_start_bit; @@ -2063,9 +2574,9 @@ int ocfs2_free_clusters(handle_t *handle, mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", (unsigned long long)bg_blkno, bg_start_bit); - status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, - bg_start_bit, bg_blkno, - num_clusters); + status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, + bg_start_bit, bg_blkno, + num_clusters, undo_fn); if (status < 0) { mlog_errno(status); goto out; @@ -2079,6 +2590,32 @@ out: return status; } +int ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters) +{ + return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, + start_blk, num_clusters, + _ocfs2_set_bit); +} + +/* + * Give never-used clusters back to the global bitmap. We don't need + * to protect these bits in the undo buffer. + */ +int ocfs2_release_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters) +{ + return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, + start_blk, num_clusters, + _ocfs2_clear_bit); +} + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) { printk("Block Group:\n"); @@ -2212,7 +2749,8 @@ out: * suballoc_bit. */ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, - u16 *suballoc_slot, u16 *suballoc_bit) + u16 *suballoc_slot, u64 *group_blkno, + u16 *suballoc_bit) { int status; struct buffer_head *inode_bh = NULL; @@ -2249,6 +2787,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); if (suballoc_bit) *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); + if (group_blkno) + *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); bail: brelse(inode_bh); @@ -2266,10 +2806,11 @@ bail: */ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct inode *suballoc, - struct buffer_head *alloc_bh, u64 blkno, + struct buffer_head *alloc_bh, + u64 group_blkno, u64 blkno, u16 bit, int *res) { - struct ocfs2_dinode *alloc_fe; + struct ocfs2_dinode *alloc_di; struct ocfs2_group_desc *group; struct buffer_head *group_bh = NULL; u64 bg_blkno; @@ -2278,17 +2819,18 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, (unsigned int)bit); - alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; - if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { + alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data; + if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) { mlog(ML_ERROR, "suballoc bit %u out of range of %u\n", (unsigned int)bit, - ocfs2_bits_per_group(&alloc_fe->id2.i_chain)); + ocfs2_bits_per_group(&alloc_di->id2.i_chain)); status = -EINVAL; goto bail; } - bg_blkno = ocfs2_which_suballoc_group(blkno, bit); - status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, + bg_blkno = group_blkno ? group_blkno : + ocfs2_which_suballoc_group(blkno, bit); + status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, &group_bh); if (status < 0) { mlog(ML_ERROR, "read group %llu failed %d\n", @@ -2322,6 +2864,7 @@ bail: int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) { int status; + u64 group_blkno = 0; u16 suballoc_bit = 0, suballoc_slot = 0; struct inode *inode_alloc_inode; struct buffer_head *alloc_bh = NULL; @@ -2329,7 +2872,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) mlog_entry("blkno: %llu", (unsigned long long)blkno); status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, - &suballoc_bit); + &group_blkno, &suballoc_bit); if (status < 0) { mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); goto bail; @@ -2357,7 +2900,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) } status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, - blkno, suballoc_bit, res); + group_blkno, blkno, suballoc_bit, res); if (status < 0) mlog(ML_ERROR, "test suballoc bit failed %d\n", status); |