diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 07:20:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 07:20:17 -0700 |
commit | 03e62303cf56e87337115f14842321043df2b4bb (patch) | |
tree | 3024495955beccddbae347d99613bcdd33801ee4 /fs/ocfs2/alloc.c | |
parent | 33cf23b0a535475aead57707cb9f4fe135a93544 (diff) | |
parent | 18d3a98f3c1b0e27ce026afa4d1ef042f2903726 (diff) |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (47 commits)
ocfs2: Silence a gcc warning.
ocfs2: Don't retry xattr set in case value extension fails.
ocfs2:dlm: avoid dlm->ast_lock lockres->spinlock dependency break
ocfs2: Reset xattr value size after xa_cleanup_value_truncate().
fs/ocfs2/dlm: Use kstrdup
fs/ocfs2/dlm: Drop memory allocation cast
Ocfs2: Optimize punching-hole code.
Ocfs2: Make ocfs2_find_cpos_for_left_leaf() public.
Ocfs2: Fix hole punching to correctly do CoW during cluster zeroing.
Ocfs2: Optimize ocfs2 truncate to use ocfs2_remove_btree_range() instead.
ocfs2: Block signals for mkdir/link/symlink/O_CREAT.
ocfs2: Wrap signal blocking in void functions.
ocfs2/dlm: Increase o2dlm lockres hash size
ocfs2: Make ocfs2_extend_trans() really extend.
ocfs2/trivial: Code cleanup for allocation reservation.
ocfs2: make ocfs2_adjust_resv_from_alloc simple.
ocfs2: Make nointr a default mount option
ocfs2/dlm: Make o2dlm domain join/leave messages KERN_NOTICE
o2net: log socket state changes
ocfs2: print node # when tcp fails
...
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r-- | fs/ocfs2/alloc.c | 908 |
1 files changed, 219 insertions, 689 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 9f8bd913c51..215e12ce1d8 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1006,7 +1006,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, int count, status, i; u16 suballoc_bit_start; u32 num_got; - u64 first_blkno; + u64 suballoc_loc, first_blkno; struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); struct ocfs2_extent_block *eb; @@ -1015,10 +1015,10 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, count = 0; while (count < wanted) { - status = ocfs2_claim_metadata(osb, - handle, + status = ocfs2_claim_metadata(handle, meta_ac, wanted - count, + &suballoc_loc, &suballoc_bit_start, &num_got, &first_blkno); @@ -1052,6 +1052,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, eb->h_fs_generation = cpu_to_le32(osb->fs_generation); eb->h_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + eb->h_suballoc_loc = cpu_to_le64(suballoc_loc); eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); eb->h_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); @@ -1061,11 +1062,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, /* We'll also be dirtied by the caller, so * this isn't absolutely necessary. */ - status = ocfs2_journal_dirty(handle, bhs[i]); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, bhs[i]); } count += num_got; @@ -1129,8 +1126,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle, goto out; } - status = ocfs2_extend_trans(handle, path_num_items(path) + - handle->h_buffer_credits); + status = ocfs2_extend_trans(handle, path_num_items(path)); if (status < 0) { mlog_errno(status); goto out; @@ -1270,12 +1266,7 @@ static int ocfs2_add_branch(handle_t *handle, if (!eb_el->l_tree_depth) new_last_eb_blk = le64_to_cpu(eb->h_blkno); - status = ocfs2_journal_dirty(handle, bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } - + ocfs2_journal_dirty(handle, bh); next_blkno = le64_to_cpu(eb->h_blkno); } @@ -1321,17 +1312,10 @@ static int ocfs2_add_branch(handle_t *handle, eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); - status = ocfs2_journal_dirty(handle, *last_eb_bh); - if (status < 0) - mlog_errno(status); - status = ocfs2_journal_dirty(handle, et->et_root_bh); - if (status < 0) - mlog_errno(status); - if (eb_bh) { - status = ocfs2_journal_dirty(handle, eb_bh); - if (status < 0) - mlog_errno(status); - } + ocfs2_journal_dirty(handle, *last_eb_bh); + ocfs2_journal_dirty(handle, et->et_root_bh); + if (eb_bh) + ocfs2_journal_dirty(handle, eb_bh); /* * Some callers want to track the rightmost leaf so pass it @@ -1399,11 +1383,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle, for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) eb_el->l_recs[i] = root_el->l_recs[i]; - status = ocfs2_journal_dirty(handle, new_eb_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, new_eb_bh); status = ocfs2_et_root_journal_access(handle, et, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1428,11 +1408,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle, if (root_el->l_tree_depth == cpu_to_le16(1)) ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); - status = ocfs2_journal_dirty(handle, et->et_root_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, et->et_root_bh); *ret_new_eb_bh = new_eb_bh; new_eb_bh = NULL; @@ -2064,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle, struct ocfs2_path *right_path, int subtree_index) { - int ret, i, idx; + int i, idx; struct ocfs2_extent_list *el, *left_el, *right_el; struct ocfs2_extent_rec *left_rec, *right_rec; struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; @@ -2102,13 +2078,8 @@ static void ocfs2_complete_edge_insert(handle_t *handle, ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, right_el); - ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); - if (ret) - mlog_errno(ret); - - ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, left_path->p_node[i].bh); + ocfs2_journal_dirty(handle, right_path->p_node[i].bh); /* * Setup our list pointers now so that the current @@ -2132,9 +2103,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle, root_bh = left_path->p_node[subtree_index].bh; - ret = ocfs2_journal_dirty(handle, root_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, root_bh); } static int ocfs2_rotate_subtree_right(handle_t *handle, @@ -2207,11 +2176,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle, ocfs2_create_empty_extent(right_el); - ret = ocfs2_journal_dirty(handle, right_leaf_bh); - if (ret) { - mlog_errno(ret); - goto out; - } + ocfs2_journal_dirty(handle, right_leaf_bh); /* Do the copy now. */ i = le16_to_cpu(left_el->l_next_free_rec) - 1; @@ -2230,11 +2195,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle, memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); le16_add_cpu(&left_el->l_next_free_rec, 1); - ret = ocfs2_journal_dirty(handle, left_leaf_bh); - if (ret) { - mlog_errno(ret); - goto out; - } + ocfs2_journal_dirty(handle, left_leaf_bh); ocfs2_complete_edge_insert(handle, left_path, right_path, subtree_index); @@ -2249,8 +2210,8 @@ out: * * Will return zero if the path passed in is already the leftmost path. */ -static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, - struct ocfs2_path *path, u32 *cpos) +int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, + struct ocfs2_path *path, u32 *cpos) { int i, j, ret = 0; u64 blkno; @@ -2327,20 +2288,14 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, int op_credits, struct ocfs2_path *path) { - int ret; + int ret = 0; int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; - if (handle->h_buffer_credits < credits) { + if (handle->h_buffer_credits < credits) ret = ocfs2_extend_trans(handle, credits - handle->h_buffer_credits); - if (ret) - return ret; - if (unlikely(handle->h_buffer_credits < credits)) - return ocfs2_extend_trans(handle, credits); - } - - return 0; + return ret; } /* @@ -2584,8 +2539,7 @@ static int ocfs2_update_edge_lengths(handle_t *handle, * records for all the bh in the path. * So we have to allocate extra credits and access them. */ - ret = ocfs2_extend_trans(handle, - handle->h_buffer_credits + subtree_index); + ret = ocfs2_extend_trans(handle, subtree_index); if (ret) { mlog_errno(ret); goto out; @@ -2823,12 +2777,8 @@ static int ocfs2_rotate_subtree_left(handle_t *handle, ocfs2_remove_empty_extent(right_leaf_el); } - ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); - if (ret) - mlog_errno(ret); - ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); if (del_right_subtree) { ocfs2_unlink_subtree(handle, et, left_path, right_path, @@ -2851,9 +2801,7 @@ static int ocfs2_rotate_subtree_left(handle_t *handle, if (right_has_empty) ocfs2_remove_empty_extent(left_leaf_el); - ret = ocfs2_journal_dirty(handle, et_root_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, et_root_bh); *deleted = 1; } else @@ -2962,10 +2910,7 @@ static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle, } ocfs2_remove_empty_extent(el); - - ret = ocfs2_journal_dirty(handle, bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, bh); out: return ret; @@ -3506,15 +3451,9 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, ocfs2_cleanup_merge(el, index); - ret = ocfs2_journal_dirty(handle, bh); - if (ret) - mlog_errno(ret); - + ocfs2_journal_dirty(handle, bh); if (right_path) { - ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); - if (ret) - mlog_errno(ret); - + ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); ocfs2_complete_edge_insert(handle, left_path, right_path, subtree_index); } @@ -3683,14 +3622,9 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, ocfs2_cleanup_merge(el, index); - ret = ocfs2_journal_dirty(handle, bh); - if (ret) - mlog_errno(ret); - + ocfs2_journal_dirty(handle, bh); if (left_path) { - ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); /* * In the situation that the right_rec is empty and the extent @@ -4016,10 +3950,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle, le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); - ret = ocfs2_journal_dirty(handle, bh); - if (ret) - mlog_errno(ret); - + ocfs2_journal_dirty(handle, bh); } } @@ -4203,17 +4134,13 @@ static int ocfs2_insert_path(handle_t *handle, struct buffer_head *leaf_bh = path_leaf_bh(right_path); if (left_path) { - int credits = handle->h_buffer_credits; - /* * There's a chance that left_path got passed back to * us without being accounted for in the * journal. Extend our transaction here to be sure we * can change those blocks. */ - credits += left_path->p_tree_depth; - - ret = ocfs2_extend_trans(handle, credits); + ret = ocfs2_extend_trans(handle, left_path->p_tree_depth); if (ret < 0) { mlog_errno(ret); goto out; @@ -4251,17 +4178,13 @@ static int ocfs2_insert_path(handle_t *handle, * dirty this for us. */ if (left_path) - ret = ocfs2_journal_dirty(handle, - path_leaf_bh(left_path)); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, + path_leaf_bh(left_path)); } else ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path), insert); - ret = ocfs2_journal_dirty(handle, leaf_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, leaf_bh); if (left_path) { /* @@ -4384,9 +4307,7 @@ out_update_clusters: ocfs2_et_update_clusters(et, le16_to_cpu(insert_rec->e_leaf_clusters)); - ret = ocfs2_journal_dirty(handle, et->et_root_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, et->et_root_bh); out: ocfs2_free_path(left_path); @@ -4866,7 +4787,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, goto leave; } - status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, + status = __ocfs2_claim_clusters(handle, data_ac, 1, clusters_to_add, &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) @@ -4895,11 +4816,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, goto leave; } - status = ocfs2_journal_dirty(handle, et->et_root_bh); - if (status < 0) { - mlog_errno(status); - goto leave; - } + ocfs2_journal_dirty(handle, et->et_root_bh); clusters_to_add -= num_bits; *logical_offset += num_bits; @@ -5309,7 +5226,7 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et, int index, u32 new_range, struct ocfs2_alloc_context *meta_ac) { - int ret, depth, credits = handle->h_buffer_credits; + int ret, depth, credits; struct buffer_head *last_eb_bh = NULL; struct ocfs2_extent_block *eb; struct ocfs2_extent_list *rightmost_el, *el; @@ -5340,8 +5257,8 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et, } else rightmost_el = path_leaf_el(path); - credits += path->p_tree_depth + - ocfs2_extend_meta_needed(et->et_root_el); + credits = path->p_tree_depth + + ocfs2_extend_meta_needed(et->et_root_el); ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); @@ -5671,19 +5588,97 @@ out: return ret; } +/* + * ocfs2_reserve_blocks_for_rec_trunc() would look basically the + * same as ocfs2_lock_alloctors(), except for it accepts a blocks + * number to reserve some extra blocks, and it only handles meta + * data allocations. + * + * Currently, only ocfs2_remove_btree_range() uses it for truncating + * and punching holes. + */ +static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode, + struct ocfs2_extent_tree *et, + u32 extents_to_split, + struct ocfs2_alloc_context **ac, + int extra_blocks) +{ + int ret = 0, num_free_extents; + unsigned int max_recs_needed = 2 * extents_to_split; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + *ac = NULL; + + num_free_extents = ocfs2_num_free_extents(osb, et); + if (num_free_extents < 0) { + ret = num_free_extents; + mlog_errno(ret); + goto out; + } + + if (!num_free_extents || + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) + extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); + + if (extra_blocks) { + ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + } + +out: + if (ret) { + if (*ac) { + ocfs2_free_alloc_context(*ac); + *ac = NULL; + } + } + + return ret; +} + int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, - u32 cpos, u32 phys_cpos, u32 len, - struct ocfs2_cached_dealloc_ctxt *dealloc) + u32 cpos, u32 phys_cpos, u32 len, int flags, + struct ocfs2_cached_dealloc_ctxt *dealloc, + u64 refcount_loc) { - int ret; + int ret, credits = 0, extra_blocks = 0; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; handle_t *handle; struct ocfs2_alloc_context *meta_ac = NULL; + struct ocfs2_refcount_tree *ref_tree = NULL; + + if ((flags & OCFS2_EXT_REFCOUNTED) && len) { + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & + OCFS2_HAS_REFCOUNT_FL)); + + ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (ret) { + mlog_errno(ret); + goto out; + } - ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); + ret = ocfs2_prepare_refcount_change_for_del(inode, + refcount_loc, + phys_blkno, + len, + &credits, + &extra_blocks); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac, + extra_blocks); if (ret) { mlog_errno(ret); return ret; @@ -5699,7 +5694,8 @@ int ocfs2_remove_btree_range(struct inode *inode, } } - handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); + handle = ocfs2_start_trans(osb, + ocfs2_remove_extent_credits(osb->sb) + credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); @@ -5724,15 +5720,22 @@ int ocfs2_remove_btree_range(struct inode *inode, ocfs2_et_update_clusters(et, -len); - ret = ocfs2_journal_dirty(handle, et->et_root_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } + ocfs2_journal_dirty(handle, et->et_root_bh); - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); - if (ret) - mlog_errno(ret); + if (phys_blkno) { + if (flags & OCFS2_EXT_REFCOUNTED) + ret = ocfs2_decrease_refcount(inode, handle, + ocfs2_blocks_to_clusters(osb->sb, + phys_blkno), + len, meta_ac, + dealloc, 1); + else + ret = ocfs2_truncate_log_append(osb, handle, + phys_blkno, len); + if (ret) + mlog_errno(ret); + + } out_commit: ocfs2_commit_trans(osb, handle); @@ -5742,6 +5745,9 @@ out: if (meta_ac) ocfs2_free_alloc_context(meta_ac); + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + return ret; } @@ -5850,11 +5856,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, } tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); - status = ocfs2_journal_dirty(handle, tl_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, tl_bh); bail: mlog_exit(status); @@ -5893,11 +5895,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, tl->tl_used = cpu_to_le16(i); - status = ocfs2_journal_dirty(handle, tl_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_journal_dirty(handle, tl_bh); /* TODO: Perhaps we can calculate the bulk of the * credits up front rather than extending like @@ -6298,6 +6296,7 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) */ struct ocfs2_cached_block_free { struct ocfs2_cached_block_free *free_next; + u64 free_bg; u64 free_blk; unsigned int free_bit; }; @@ -6344,8 +6343,11 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, } while (head) { - bg_blkno = ocfs2_which_suballoc_group(head->free_blk, - head->free_bit); + if (head->free_bg) + bg_blkno = head->free_bg; + else + bg_blkno = ocfs2_which_suballoc_group(head->free_blk, + head->free_bit); mlog(0, "Free bit: (bit %u, blkno %llu)\n", head->free_bit, (unsigned long long)head->free_blk); @@ -6393,7 +6395,7 @@ int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, int ret = 0; struct ocfs2_cached_block_free *item; - item = kmalloc(sizeof(*item), GFP_NOFS); + item = kzalloc(sizeof(*item), GFP_NOFS); if (item == NULL) { ret = -ENOMEM; mlog_errno(ret); @@ -6533,8 +6535,8 @@ ocfs2_find_per_slot_free_list(int type, } int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, - int type, int slot, u64 blkno, - unsigned int bit) + int type, int slot, u64 suballoc, + u64 blkno, unsigned int bit) { int ret; struct ocfs2_per_slot_free_list *fl; @@ -6547,7 +6549,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, goto out; } - item = kmalloc(sizeof(*item), GFP_NOFS); + item = kzalloc(sizeof(*item), GFP_NOFS); if (item == NULL) { ret = -ENOMEM; mlog_errno(ret); @@ -6557,6 +6559,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", type, slot, bit, (unsigned long long)blkno); + item->free_bg = suballoc; item->free_blk = blkno; item->free_bit = bit; item->free_next = fl->f_first; @@ -6573,433 +6576,11 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, { return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, le16_to_cpu(eb->h_suballoc_slot), + le64_to_cpu(eb->h_suballoc_loc), le64_to_cpu(eb->h_blkno), le16_to_cpu(eb->h_suballoc_bit)); } -/* This function will figure out whether the currently last extent - * block will be deleted, and if it will, what the new last extent - * block will be so we can update his h_next_leaf_blk field, as well - * as the dinodes i_last_eb_blk */ -static int ocfs2_find_new_last_ext_blk(struct inode *inode, - unsigned int clusters_to_del, - struct ocfs2_path *path, - struct buffer_head **new_last_eb) -{ - int next_free, ret = 0; - u32 cpos; - struct ocfs2_extent_rec *rec; - struct ocfs2_extent_block *eb; - struct ocfs2_extent_list *el; - struct buffer_head *bh = NULL; - - *new_last_eb = NULL; - - /* we have no tree, so of course, no last_eb. */ - if (!path->p_tree_depth) - goto out; - - /* trunc to zero special case - this makes tree_depth = 0 - * regardless of what it is. */ - if (OCFS2_I(inode)->ip_clusters == clusters_to_del) - goto out; - - el = path_leaf_el(path); - BUG_ON(!el->l_next_free_rec); - - /* - * Make sure that this extent list will actually be empty - * after we clear away the data. We can shortcut out if - * there's more than one non-empty extent in the - * list. Otherwise, a check of the remaining extent is - * necessary. - */ - next_free = le16_to_cpu(el->l_next_free_rec); - rec = NULL; - if (ocfs2_is_empty_extent(&el->l_recs[0])) { - if (next_free > 2) - goto out; - - /* We may have a valid extent in index 1, check it. */ - if (next_free == 2) - rec = &el->l_recs[1]; - - /* - * Fall through - no more nonempty extents, so we want - * to delete this leaf. - */ - } else { - if (next_free > 1) - goto out; - - rec = &el->l_recs[0]; - } - - if (rec) { - /* - * Check it we'll only be trimming off the end of this - * cluster. - */ - if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del) - goto out; - } - - ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh); - if (ret) { - mlog_errno(ret); - goto out; - } - - eb = (struct ocfs2_extent_block *) bh->b_data; - el = &eb->h_list; - - /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block(). - * Any corruption is a code bug. */ - BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); - - *new_last_eb = bh; - get_bh(*new_last_eb); - mlog(0, "returning block %llu, (cpos: %u)\n", - (unsigned long long)le64_to_cpu(eb->h_blkno), cpos); -out: - brelse(bh); - - return ret; -} - -/* - * Trim some clusters off the rightmost edge of a tree. Only called - * during truncate. - * - * The caller needs to: - * - start journaling of each path component. - * - compute and fully set up any new last ext block - */ -static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, - handle_t *handle, struct ocfs2_truncate_context *tc, - u32 clusters_to_del, u64 *delete_start, u8 *flags) -{ - int ret, i, index = path->p_tree_depth; - u32 new_edge = 0; - u64 deleted_eb = 0; - struct buffer_head *bh; - struct ocfs2_extent_list *el; - struct ocfs2_extent_rec *rec; - - *delete_start = 0; - *flags = 0; - - while (index >= 0) { - bh = path->p_node[index].bh; - el = path->p_node[index].el; - - mlog(0, "traveling tree (index = %d, block = %llu)\n", - index, (unsigned long long)bh->b_blocknr); - - BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); - - if (index != - (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { - ocfs2_error(inode->i_sb, - "Inode %lu has invalid ext. block %llu", - inode->i_ino, - (unsigned long long)bh->b_blocknr); - ret = -EROFS; - goto out; - } - -find_tail_record: - i = le16_to_cpu(el->l_next_free_rec) - 1; - rec = &el->l_recs[i]; - - mlog(0, "Extent list before: record %d: (%u, %u, %llu), " - "next = %u\n", i, le32_to_cpu(rec->e_cpos), - ocfs2_rec_clusters(el, rec), - (unsigned long long)le64_to_cpu(rec->e_blkno), - le16_to_cpu(el->l_next_free_rec)); - - BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del); - - if (le16_to_cpu(el->l_tree_depth) == 0) { - /* - * If the leaf block contains a single empty - * extent and no records, we can just remove - * the block. - */ - if (i == 0 && ocfs2_is_empty_extent(rec)) { - memset(rec, 0, - sizeof(struct ocfs2_extent_rec)); - el->l_next_free_rec = cpu_to_le16(0); - - goto delete; - } - - /* - * Remove any empty extents by shifting things - * left. That should make life much easier on - * the code below. This condition is rare - * enough that we shouldn't see a performance - * hit. - */ - if (ocfs2_is_empty_extent(&el->l_recs[0])) { - le16_add_cpu(&el->l_next_free_rec, -1); - - for(i = 0; - i < le16_to_cpu(el->l_next_free_rec); i++) - el->l_recs[i] = el->l_recs[i + 1]; - - memset(&el->l_recs[i], 0, - sizeof(struct ocfs2_extent_rec)); - - /* - * We've modified our extent list. The - * simplest way to handle this change - * is to being the search from the - * start again. - */ - goto find_tail_record; - } - - le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del); - - /* - * We'll use "new_edge" on our way back up the - * tree to know what our rightmost cpos is. - */ - new_edge = le16_to_cpu(rec->e_leaf_clusters); - new_edge += le32_to_cpu(rec->e_cpos); - - /* - * The caller will use this to delete data blocks. - */ - *delete_start = le64_to_cpu(rec->e_blkno) - + ocfs2_clusters_to_blocks(inode->i_sb, - le16_to_cpu(rec->e_leaf_clusters)); - *flags = rec->e_flags; - - /* - * If it's now empty, remove this record. - */ - if (le16_to_cpu(rec->e_leaf_clusters) == 0) { - memset(rec, 0, - sizeof(struct ocfs2_extent_rec)); - le16_add_cpu(&el->l_next_free_rec, -1); - } - } else { - if (le64_to_cpu(rec->e_blkno) == deleted_eb) { - memset(rec, 0, - sizeof(struct ocfs2_extent_rec)); - le16_add_cpu(&el->l_next_free_rec, -1); - - goto delete; - } - - /* Can this actually happen? */ - if (le16_to_cpu(el->l_next_free_rec) == 0) - goto delete; - - /* - * We never actually deleted any clusters - * because our leaf was empty. There's no - * reason to adjust the rightmost edge then. - */ - if (new_edge == 0) - goto delete; - - rec->e_int_clusters = cpu_to_le32(new_edge); - le32_add_cpu(&rec->e_int_clusters, - -le32_to_cpu(rec->e_cpos)); - - /* - * A deleted child record should have been - * caught above. - */ - BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0); - } - -delete: - ret = ocfs2_journal_dirty(handle, bh); - if (ret) { - mlog_errno(ret); - goto out; - } - - mlog(0, "extent list container %llu, after: record %d: " - "(%u, %u, %llu), next = %u.\n", - (unsigned long long)bh->b_blocknr, i, - le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec), - (unsigned long long)le64_to_cpu(rec->e_blkno), - le16_to_cpu(el->l_next_free_rec)); - - /* - * We must be careful to only attempt delete of an - * extent block (and not the root inode block). - */ - if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { - struct ocfs2_extent_block *eb = - (struct ocfs2_extent_block *)bh->b_data; - - /* - * Save this for use when processing the - * parent block. - */ - deleted_eb = le64_to_cpu(eb->h_blkno); - - mlog(0, "deleting this extent block.\n"); - - ocfs2_remove_from_cache(INODE_CACHE(inode), bh); - - BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); - BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); - BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); - - ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); - /* An error here is not fatal. */ - if (ret < 0) - mlog_errno(ret); - } else { - deleted_eb = 0; - } - - index--; - } - - ret = 0; -out: - return ret; -} - -static int ocfs2_do_truncate(struct ocfs2_super *osb, - unsigned int clusters_to_del, - struct inode *inode, - struct buffer_head *fe_bh, - handle_t *handle, - struct ocfs2_truncate_context *tc, - struct ocfs2_path *path, - struct ocfs2_alloc_context *meta_ac) -{ - int status; - struct ocfs2_dinode *fe; - struct ocfs2_extent_block *last_eb = NULL; - struct ocfs2_extent_list *el; - struct buffer_head *last_eb_bh = NULL; - u64 delete_blk = 0; - u8 rec_flags; - - fe = (struct ocfs2_dinode *) fe_bh->b_data; - - status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, - path, &last_eb_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - /* - * Each component will be touched, so we might as well journal - * here to avoid having to handle errors later. - */ - status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - if (last_eb_bh) { - status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - } - - el = &(fe->id2.i_list); - - /* - * Lower levels depend on this never happening, but it's best - * to check it up here before changing the tree. - */ - if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) { - ocfs2_error(inode->i_sb, - "Inode %lu has an empty extent record, depth %u\n", - inode->i_ino, le16_to_cpu(el->l_tree_depth)); - status = -EROFS; - goto bail; - } - - dquot_free_space_nodirty(inode, - ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); - spin_lock(&OCFS2_I(inode)->ip_lock); - OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - - clusters_to_del; - spin_unlock(&OCFS2_I(inode)->ip_lock); - le32_add_cpu(&fe->i_clusters, -clusters_to_del); - inode->i_blocks = ocfs2_inode_sector_count(inode); - - status = ocfs2_trim_tree(inode, path, handle, tc, - clusters_to_del, &delete_blk, &rec_flags); - if (status) { - mlog_errno(status); - goto bail; - } - - if (le32_to_cpu(fe->i_clusters) == 0) { - /* trunc to zero is a special case. */ - el->l_tree_depth = 0; - fe->i_last_eb_blk = 0; - } else if (last_eb) - fe->i_last_eb_blk = last_eb->h_blkno; - - status = ocfs2_journal_dirty(handle, fe_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - if (last_eb) { - /* If there will be a new last extent block, then by - * definition, there cannot be any leaves to the right of - * him. */ - last_eb->h_next_leaf_blk = 0; - status = ocfs2_journal_dirty(handle, last_eb_bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } - } - - if (delete_blk) { - if (rec_flags & OCFS2_EXT_REFCOUNTED) - status = ocfs2_decrease_refcount(inode, handle, - ocfs2_blocks_to_clusters(osb->sb, - delete_blk), - clusters_to_del, meta_ac, - &tc->tc_dealloc, 1); - else - status = ocfs2_truncate_log_append(osb, handle, - delete_blk, - clusters_to_del); - if (status < 0) { - mlog_errno(status); - goto bail; - } - } - status = 0; -bail: - brelse(last_eb_bh); - mlog_exit(status); - return status; -} - static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) { set_buffer_uptodate(bh); @@ -7307,7 +6888,9 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, goto out_commit; did_quota = 1; - ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, + data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; + + ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &num); if (ret) { mlog_errno(ret); @@ -7406,26 +6989,29 @@ out: */ int ocfs2_commit_truncate(struct ocfs2_super *osb, struct inode *inode, - struct buffer_head *fe_bh, - struct ocfs2_truncate_context *tc) + struct buffer_head *di_bh) { - int status, i, credits, tl_sem = 0; - u32 clusters_to_del, new_highest_cpos, range; + int status = 0, i, flags = 0; + u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff; u64 blkno = 0; struct ocfs2_extent_list *el; - handle_t *handle = NULL; - struct inode *tl_inode = osb->osb_tl_inode; + struct ocfs2_extent_rec *rec; struct ocfs2_path *path = NULL; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; - struct ocfs2_alloc_context *meta_ac = NULL; - struct ocfs2_refcount_tree *ref_tree = NULL; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + struct ocfs2_extent_list *root_el = &(di->id2.i_list); + u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); + struct ocfs2_extent_tree et; + struct ocfs2_cached_dealloc_ctxt dealloc; mlog_entry_void(); + ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); + ocfs2_init_dealloc_ctxt(&dealloc); + new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, i_size_read(inode)); - path = ocfs2_new_path(fe_bh, &di->id2.i_list, + path = ocfs2_new_path(di_bh, &di->id2.i_list, ocfs2_journal_access_di); if (!path) { status = -ENOMEM; @@ -7444,8 +7030,6 @@ start: goto bail; } - credits = 0; - /* * Truncate always works against the rightmost tree branch. */ @@ -7480,101 +7064,62 @@ start: } i = le16_to_cpu(el->l_next_free_rec) - 1; - range = le32_to_cpu(el->l_recs[i].e_cpos) + - ocfs2_rec_clusters(el, &el->l_recs[i]); - if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { - clusters_to_del = 0; - } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { - clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); - blkno = le64_to_cpu(el->l_recs[i].e_blkno); + rec = &el->l_recs[i]; + flags = rec->e_flags; + range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); + + if (i == 0 && ocfs2_is_empty_extent(rec)) { + /* + * Lower levels depend on this never happening, but it's best + * to check it up here before changing the tree. + */ + if (root_el->l_tree_depth && rec->e_int_clusters == 0) { + ocfs2_error(inode->i_sb, "Inode %lu has an empty " + "extent record, depth %u\n", inode->i_ino, + le16_to_cpu(root_el->l_tree_depth)); + status = -EROFS; + goto bail; + } + trunc_cpos = le32_to_cpu(rec->e_cpos); + trunc_len = 0; + blkno = 0; + } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) { + /* + * Truncate entire record. + */ + trunc_cpos = le32_to_cpu(rec->e_cpos); + trunc_len = ocfs2_rec_clusters(el, rec); + blkno = le64_to_cpu(rec->e_blkno); } else if (range > new_highest_cpos) { - clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + - le32_to_cpu(el->l_recs[i].e_cpos)) - - new_highest_cpos; - blkno = le64_to_cpu(el->l_recs[i].e_blkno) + - ocfs2_clusters_to_blocks(inode->i_sb, - ocfs2_rec_clusters(el, &el->l_recs[i]) - - clusters_to_del); + /* + * Partial truncate. it also should be + * the last truncate we're doing. + */ + trunc_cpos = new_highest_cpos; + trunc_len = range - new_highest_cpos; + coff = new_highest_cpos - le32_to_cpu(rec->e_cpos); + blkno = le64_to_cpu(rec->e_blkno) + + ocfs2_clusters_to_blocks(inode->i_sb, coff); } else { + /* + * Truncate completed, leave happily. + */ status = 0; goto bail; } - mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", - clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); - - if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) { - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); - - status = ocfs2_lock_refcount_tree(osb, - le64_to_cpu(di->i_refcount_loc), - 1, &ref_tree, NULL); - if (status) { - mlog_errno(status); - goto bail; - } - - status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh, - blkno, - clusters_to_del, - &credits, - &meta_ac); - if (status < 0) { - mlog_errno(status); - goto bail; - } - } - - mutex_lock(&tl_inode->i_mutex); - tl_sem = 1; - /* ocfs2_truncate_log_needs_flush guarantees us at least one - * record is free for use. If there isn't any, we flush to get - * an empty truncate log. */ - if (ocfs2_truncate_log_needs_flush(osb)) { - status = __ocfs2_flush_truncate_log(osb); - if (status < 0) { - mlog_errno(status); - goto bail; - } - } + phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); - credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, - (struct ocfs2_dinode *)fe_bh->b_data, - el); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - handle = NULL; - mlog_errno(status); - goto bail; - } - - status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, - tc, path, meta_ac); + status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, + phys_cpos, trunc_len, flags, &dealloc, + refcount_loc); if (status < 0) { mlog_errno(status); goto bail; } - mutex_unlock(&tl_inode->i_mutex); - tl_sem = 0; - - ocfs2_commit_trans(osb, handle); - handle = NULL; - ocfs2_reinit_path(path, 1); - if (meta_ac) { - ocfs2_free_alloc_context(meta_ac); - meta_ac = NULL; - } - - if (ref_tree) { - ocfs2_unlock_refcount_tree(osb, ref_tree, 1); - ref_tree = NULL; - } - /* * The check above will catch the case where we've truncated * away all allocation. @@ -7585,25 +7130,10 @@ bail: ocfs2_schedule_truncate_log_flush(osb, 1); - if (tl_sem) - mutex_unlock(&tl_inode->i_mutex); - - if (handle) - ocfs2_commit_trans(osb, handle); - - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - - if (ref_tree) - ocfs2_unlock_refcount_tree(osb, ref_tree, 1); - - ocfs2_run_deallocs(osb, &tc->tc_dealloc); + ocfs2_run_deallocs(osb, &dealloc); ocfs2_free_path(path); - /* This will drop the ext_alloc cluster lock for us */ - ocfs2_free_truncate_context(tc); - mlog_exit(status); return status; } |