From f775738f6fba9c7f6deaa540860d6fb7e2d28445 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Fri, 30 Mar 2012 15:14:27 +0800 Subject: btrfs/ctree.c: remove the unnecessary 'return -1;' at the end of bin_search The code path should not reach there. Remove it. Signed-off-by: Wang Sheng-Hui --- fs/btrfs/ctree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4106264fbc6..26847999c64 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -854,20 +854,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb, static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot) { - if (level == 0) { + if (level == 0) return generic_bin_search(eb, offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), key, btrfs_header_nritems(eb), slot); - } else { + else return generic_bin_search(eb, offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), key, btrfs_header_nritems(eb), slot); - } - return -1; } int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, -- cgit v1.2.3-70-g09d2 From 5581a51a59a1f5f51ac3d4bacafb738d35e0350b Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 16 May 2012 17:04:52 +0200 Subject: Btrfs: don't set for_cow parameter for tree block functions Three callers of btrfs_free_tree_block or btrfs_alloc_tree_block passed parameter for_cow = 1. In fact, these two functions should never mark their tree modification operations as for_cow, because they can change the number of blocks referenced by a tree. Hence, we remove the extra for_cow parameter from these functions and make them pass a zero down. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 22 +++++++++++----------- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 10 +++++----- fs/btrfs/ioctl.c | 2 +- 5 files changed, 20 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4106264fbc6..56485b3b7c3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -255,7 +255,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, cow = btrfs_alloc_free_block(trans, root, buf->len, 0, new_root_objectid, &disk_key, level, - buf->start, 0, 1); + buf->start, 0); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -467,7 +467,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, root->root_key.objectid, &disk_key, - level, search_start, empty_size, 1); + level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -509,7 +509,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, rcu_assign_pointer(root->node, cow); btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref, 1); + last_ref); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -525,7 +525,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid); btrfs_mark_buffer_dirty(parent); btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref, 1); + last_ref); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -987,7 +987,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1, 0); + btrfs_free_tree_block(trans, root, mid, 0, 1); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; @@ -1042,7 +1042,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(right); del_ptr(trans, root, path, level + 1, pslot + 1); root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1, 0); + btrfs_free_tree_block(trans, root, right, 0, 1); free_extent_buffer_stale(right); right = NULL; } else { @@ -1084,7 +1084,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(mid); del_ptr(trans, root, path, level + 1, pslot); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1, 0); + btrfs_free_tree_block(trans, root, mid, 0, 1); free_extent_buffer_stale(mid); mid = NULL; } else { @@ -2129,7 +2129,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, &lower_key, - level, root->node->start, 0, 0); + level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -2252,7 +2252,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, - &disk_key, level, c->start, 0, 0); + &disk_key, level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -3004,7 +3004,7 @@ again: right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0, 0); + &disk_key, 0, l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); @@ -3804,7 +3804,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); extent_buffer_get(leaf); - btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); + btrfs_free_tree_block(trans, root, leaf, 0, 1); free_extent_buffer_stale(leaf); } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ec42a24e935..e863188a3f1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2496,11 +2496,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size, int for_cow); + u64 hint, u64 empty_size); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - u64 parent, int last_ref, int for_cow); + u64 parent, int last_ref); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a7ffc88a7db..f4330749251 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, BTRFS_TREE_LOG_OBJECTID, NULL, - 0, 0, 0, 0); + 0, 0, 0); if (IS_ERR(leaf)) { kfree(root); return ERR_CAST(leaf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49fd7b66d57..b68eb7ad05a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5217,7 +5217,7 @@ out: void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - u64 parent, int last_ref, int for_cow) + u64 parent, int last_ref) { struct btrfs_block_group_cache *cache = NULL; int ret; @@ -5227,7 +5227,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, buf->start, buf->len, parent, root->root_key.objectid, btrfs_header_level(buf), - BTRFS_DROP_DELAYED_REF, NULL, for_cow); + BTRFS_DROP_DELAYED_REF, NULL, 0); BUG_ON(ret); /* -ENOMEM */ } @@ -6249,7 +6249,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size, int for_cow) + u64 hint, u64 empty_size) { struct btrfs_key ins; struct btrfs_block_rsv *block_rsv; @@ -6297,7 +6297,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, - extent_op, for_cow); + extent_op, 0); BUG_ON(ret); /* -ENOMEM */ } return buf; @@ -6715,7 +6715,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, btrfs_header_owner(path->nodes[level + 1])); } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); + btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); out: wc->refs[level] = 0; wc->flags[level] = 0; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14f8e1faa46..7f3a91367d7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, return PTR_ERR(trans); leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, objectid, NULL, 0, 0, 0, 0); + 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; -- cgit v1.2.3-70-g09d2 From bd989ba359f2acb8bc5f5490e19010fc0a6f8356 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 16 May 2012 17:18:50 +0200 Subject: Btrfs: add tree modification log functions The tree mod log will log modifications made fs-tree nodes. Most modifications are done by autobalance of the tree. Such changes are recorded as long as a block entry exists. When released, the log is cleaned. With the tree modification log, it's possible to reconstruct a consistent old state of the tree. This is required to do backref walking on a busy file system. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 408 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/ctree.h | 5 + 2 files changed, 412 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 56485b3b7c3..72b9f97e2fd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -18,6 +18,7 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -288,6 +289,412 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } +enum mod_log_op { + MOD_LOG_KEY_REPLACE, + MOD_LOG_KEY_ADD, + MOD_LOG_KEY_REMOVE, + MOD_LOG_KEY_REMOVE_WHILE_FREEING, + MOD_LOG_KEY_REMOVE_WHILE_MOVING, + MOD_LOG_MOVE_KEYS, + MOD_LOG_ROOT_REPLACE, +}; + +struct tree_mod_move { + int dst_slot; + int nr_items; +}; + +struct tree_mod_root { + u64 logical; + u8 level; +}; + +struct tree_mod_elem { + struct rb_node node; + u64 index; /* shifted logical */ + struct seq_list elem; + enum mod_log_op op; + + /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ + int slot; + + /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ + u64 generation; + + /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ + struct btrfs_disk_key key; + u64 blockptr; + + /* this is used for op == MOD_LOG_MOVE_KEYS */ + struct tree_mod_move move; + + /* this is used for op == MOD_LOG_ROOT_REPLACE */ + struct tree_mod_root old_root; +}; + +static inline void +__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) +{ + elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); + list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); +} + +void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem) +{ + elem->flags = 1; + spin_lock(&fs_info->tree_mod_seq_lock); + __get_tree_mod_seq(fs_info, elem); + spin_unlock(&fs_info->tree_mod_seq_lock); +} + +void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem) +{ + struct rb_root *tm_root; + struct rb_node *node; + struct rb_node *next; + struct seq_list *cur_elem; + struct tree_mod_elem *tm; + u64 min_seq = (u64)-1; + u64 seq_putting = elem->seq; + + if (!seq_putting) + return; + + BUG_ON(!(elem->flags & 1)); + spin_lock(&fs_info->tree_mod_seq_lock); + list_del(&elem->list); + + list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { + if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { + if (seq_putting > cur_elem->seq) { + /* + * blocker with lower sequence number exists, we + * cannot remove anything from the log + */ + goto out; + } + min_seq = cur_elem->seq; + } + } + + /* + * anything that's lower than the lowest existing (read: blocked) + * sequence number can be removed from the tree. + */ + write_lock(&fs_info->tree_mod_log_lock); + tm_root = &fs_info->tree_mod_log; + for (node = rb_first(tm_root); node; node = next) { + next = rb_next(node); + tm = container_of(node, struct tree_mod_elem, node); + if (tm->elem.seq > min_seq) + continue; + rb_erase(node, tm_root); + list_del(&tm->elem.list); + kfree(tm); + } + write_unlock(&fs_info->tree_mod_log_lock); +out: + spin_unlock(&fs_info->tree_mod_seq_lock); +} + +/* + * key order of the log: + * index -> sequence + * + * the index is the shifted logical of the *new* root node for root replace + * operations, or the shifted logical of the affected block for all other + * operations. + */ +static noinline int +__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) +{ + struct rb_root *tm_root; + struct rb_node **new; + struct rb_node *parent = NULL; + struct tree_mod_elem *cur; + int ret = 0; + + BUG_ON(!tm || !tm->elem.seq); + + write_lock(&fs_info->tree_mod_log_lock); + tm_root = &fs_info->tree_mod_log; + new = &tm_root->rb_node; + while (*new) { + cur = container_of(*new, struct tree_mod_elem, node); + parent = *new; + if (cur->index < tm->index) + new = &((*new)->rb_left); + else if (cur->index > tm->index) + new = &((*new)->rb_right); + else if (cur->elem.seq < tm->elem.seq) + new = &((*new)->rb_left); + else if (cur->elem.seq > tm->elem.seq) + new = &((*new)->rb_right); + else { + kfree(tm); + ret = -EEXIST; + goto unlock; + } + } + + rb_link_node(&tm->node, parent, new); + rb_insert_color(&tm->node, tm_root); +unlock: + write_unlock(&fs_info->tree_mod_log_lock); + return ret; +} + +int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, + struct tree_mod_elem **tm_ret) +{ + struct tree_mod_elem *tm; + u64 seq = 0; + + smp_mb(); + if (list_empty(&fs_info->tree_mod_seq_list)) + return 0; + + tm = *tm_ret = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; + + __get_tree_mod_seq(fs_info, &tm->elem); + seq = tm->elem.seq; + tm->elem.flags = 0; + + return seq; +} + +static noinline int +tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) +{ + struct tree_mod_elem *tm; + int ret; + + ret = tree_mod_alloc(fs_info, flags, &tm); + if (ret <= 0) + return ret; + + tm->index = eb->start >> PAGE_CACHE_SHIFT; + if (op != MOD_LOG_KEY_ADD) { + btrfs_node_key(eb, &tm->key, slot); + tm->blockptr = btrfs_node_blockptr(eb, slot); + } + tm->op = op; + tm->slot = slot; + tm->generation = btrfs_node_ptr_generation(eb, slot); + + return __tree_mod_log_insert(fs_info, tm); +} + +static noinline int +tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, + int slot, enum mod_log_op op) +{ + return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); +} + +static noinline int +tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int dst_slot, int src_slot, + int nr_items, gfp_t flags) +{ + struct tree_mod_elem *tm; + int ret; + int i; + + ret = tree_mod_alloc(fs_info, flags, &tm); + if (ret <= 0) + return ret; + + for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { + ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + MOD_LOG_KEY_REMOVE_WHILE_MOVING); + BUG_ON(ret < 0); + } + + tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->slot = src_slot; + tm->move.dst_slot = dst_slot; + tm->move.nr_items = nr_items; + tm->op = MOD_LOG_MOVE_KEYS; + + return __tree_mod_log_insert(fs_info, tm); +} + +static noinline int +tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, + struct extent_buffer *old_root, + struct extent_buffer *new_root, gfp_t flags) +{ + struct tree_mod_elem *tm; + int ret; + + ret = tree_mod_alloc(fs_info, flags, &tm); + if (ret <= 0) + return ret; + + tm->index = new_root->start >> PAGE_CACHE_SHIFT; + tm->old_root.logical = old_root->start; + tm->old_root.level = btrfs_header_level(old_root); + tm->generation = btrfs_header_generation(old_root); + tm->op = MOD_LOG_ROOT_REPLACE; + + return __tree_mod_log_insert(fs_info, tm); +} + +static struct tree_mod_elem * +__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, + int smallest) +{ + struct rb_root *tm_root; + struct rb_node *node; + struct tree_mod_elem *cur = NULL; + struct tree_mod_elem *found = NULL; + u64 index = start >> PAGE_CACHE_SHIFT; + + read_lock(&fs_info->tree_mod_log_lock); + tm_root = &fs_info->tree_mod_log; + node = tm_root->rb_node; + while (node) { + cur = container_of(node, struct tree_mod_elem, node); + if (cur->index < index) { + node = node->rb_left; + } else if (cur->index > index) { + node = node->rb_right; + } else if (cur->elem.seq < min_seq) { + node = node->rb_left; + } else if (!smallest) { + /* we want the node with the highest seq */ + if (found) + BUG_ON(found->elem.seq > cur->elem.seq); + found = cur; + node = node->rb_left; + } else if (cur->elem.seq > min_seq) { + /* we want the node with the smallest seq */ + if (found) + BUG_ON(found->elem.seq < cur->elem.seq); + found = cur; + node = node->rb_right; + } else { + found = cur; + break; + } + } + read_unlock(&fs_info->tree_mod_log_lock); + + return found; +} + +/* + * this returns the element from the log with the smallest time sequence + * value that's in the log (the oldest log item). any element with a time + * sequence lower than min_seq will be ignored. + */ +static struct tree_mod_elem * +tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, + u64 min_seq) +{ + return __tree_mod_log_search(fs_info, start, min_seq, 1); +} + +/* + * this returns the element from the log with the largest time sequence + * value that's in the log (the most recent log item). any element with + * a time sequence lower than min_seq will be ignored. + */ +static struct tree_mod_elem * +tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) +{ + return __tree_mod_log_search(fs_info, start, min_seq, 0); +} + +static inline void +tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, + struct extent_buffer *src, unsigned long dst_offset, + unsigned long src_offset, int nr_items) +{ + int ret; + int i; + + smp_mb(); + if (list_empty(&fs_info->tree_mod_seq_list)) + return; + + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) + return; + + /* speed this up by single seq for all operations? */ + for (i = 0; i < nr_items; i++) { + ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, + MOD_LOG_KEY_REMOVE); + BUG_ON(ret < 0); + ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, + MOD_LOG_KEY_ADD); + BUG_ON(ret < 0); + } +} + +static inline void +tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, + int dst_offset, int src_offset, int nr_items) +{ + int ret; + ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, + nr_items, GFP_NOFS); + BUG_ON(ret < 0); +} + +static inline void +tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int slot, int atomic) +{ + int ret; + + ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, + MOD_LOG_KEY_REPLACE, + atomic ? GFP_ATOMIC : GFP_NOFS); + BUG_ON(ret < 0); +} + +static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb) +{ + int i; + int ret; + u32 nritems; + + smp_mb(); + if (list_empty(&fs_info->tree_mod_seq_list)) + return; + + if (btrfs_header_level(eb) == 0) + return; + + nritems = btrfs_header_nritems(eb); + for (i = nritems - 1; i >= 0; i--) { + ret = tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING); + BUG_ON(ret < 0); + } +} + +static inline void +tree_mod_log_set_root_pointer(struct btrfs_root *root, + struct extent_buffer *new_root_node) +{ + int ret; + tree_mod_log_free_eb(root->fs_info, root->node); + ret = tree_mod_log_insert_root(root->fs_info, root->node, + new_root_node, GFP_NOFS); + BUG_ON(ret < 0); +} + /* * check if the tree block can be shared by multiple trees */ @@ -2271,7 +2678,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); - copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(mid), diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 01639e10004..e53bfb9b391 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3114,4 +3114,9 @@ struct seq_list { u32 flags; }; +void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem); +void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem); + #endif -- cgit v1.2.3-70-g09d2 From f230475e62f77930e776881deb6e95cfd2226bd4 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Sat, 26 May 2012 11:43:17 +0200 Subject: Btrfs: put all block modifications into the tree mod log When running functions that can make changes to the internal trees (e.g. btrfs_search_slot), we check if somebody may be interested in the block we're currently modifying. If so, we record our modification to be able to rewind it later on. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 72b9f97e2fd..07c1a96aa4a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -39,6 +39,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb); +struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize, u64 parent_transid, + u64 time_seq); +struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize, + u64 time_seq); struct btrfs_path *btrfs_alloc_path(void) { @@ -816,6 +824,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); /* -ENOMEM */ } + /* + * don't log freeing in case we're freeing the root node, this + * is done by tree_mod_log_set_root_pointer later + */ + if (buf != root->node && btrfs_header_level(buf) != 0) + tree_mod_log_free_eb(root->fs_info, buf); clean_tree_block(trans, root, buf); *last_ref = 1; } @@ -913,6 +927,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = 0; extent_buffer_get(cow); + tree_mod_log_set_root_pointer(root, cow); rcu_assign_pointer(root->node, cow); btrfs_free_tree_block(trans, root, buf, parent_start, @@ -926,6 +941,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = 0; WARN_ON(trans->transid != btrfs_header_generation(parent)); + tree_mod_log_insert_key(root->fs_info, parent, parent_slot, + MOD_LOG_KEY_REPLACE); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, @@ -1381,6 +1398,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } + tree_mod_log_set_root_pointer(root, child); rcu_assign_pointer(root->node, child); add_root_to_dirty_list(root); @@ -1455,6 +1473,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, } else { struct btrfs_disk_key right_key; btrfs_node_key(right, &right_key, 0); + tree_mod_log_set_node_key(root->fs_info, parent, + &right_key, pslot + 1, 0); btrfs_set_node_key(parent, &right_key, pslot + 1); btrfs_mark_buffer_dirty(parent); } @@ -1498,6 +1518,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* update the parent key to reflect our changes */ struct btrfs_disk_key mid_key; btrfs_node_key(mid, &mid_key, 0); + tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, + pslot, 0); btrfs_set_node_key(parent, &mid_key, pslot); btrfs_mark_buffer_dirty(parent); } @@ -1595,6 +1617,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; orig_slot += left_nr; btrfs_node_key(mid, &disk_key, 0); + tree_mod_log_set_node_key(root->fs_info, parent, + &disk_key, pslot, 0); btrfs_set_node_key(parent, &disk_key, pslot); btrfs_mark_buffer_dirty(parent); if (btrfs_header_nritems(left) > orig_slot) { @@ -1646,6 +1670,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; btrfs_node_key(right, &disk_key, 0); + tree_mod_log_set_node_key(root->fs_info, parent, + &disk_key, pslot + 1, 0); btrfs_set_node_key(parent, &disk_key, pslot + 1); btrfs_mark_buffer_dirty(parent); @@ -2348,6 +2374,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, if (!path->nodes[i]) break; t = path->nodes[i]; + tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) @@ -2430,12 +2457,16 @@ static int push_node_left(struct btrfs_trans_handle *trans, } else push_items = min(src_nritems - 8, push_items); + tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, + push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { + tree_mod_log_eb_move(root->fs_info, src, 0, push_items, + src_nritems - push_items); memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(push_items), (src_nritems - push_items) * @@ -2489,11 +2520,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, if (max_push < push_items) push_items = max_push; + tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), btrfs_node_key_ptr_offset(0), (dst_nritems) * sizeof(struct btrfs_key_ptr)); + tree_mod_log_eb_copy(root->fs_info, dst, src, 0, + src_nritems - push_items, push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), @@ -2568,6 +2602,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); old = root->node; + tree_mod_log_set_root_pointer(root, c); rcu_assign_pointer(root->node, c); /* the super has an extra ref to root->node */ @@ -2678,6 +2713,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); + tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(mid), -- cgit v1.2.3-70-g09d2 From f3ea38da3e76455fbd6d405cdca4d050ed085458 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Sat, 26 May 2012 11:45:21 +0200 Subject: Btrfs: add del_ptr and insert_ptr modifications to the tree mod log Record all relevant modifications to block pointers in the tree mod log so that we can rewind them later on for backref walking. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 07c1a96aa4a..69ce3f7deee 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -38,7 +38,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot); + struct btrfs_path *path, int level, int slot, + int tree_mod_log); static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb); struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, @@ -1465,7 +1466,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(right) == 0) { clean_tree_block(trans, root, right); btrfs_tree_unlock(right); - del_ptr(trans, root, path, level + 1, pslot + 1); + del_ptr(trans, root, path, level + 1, pslot + 1, 1); root_sub_used(root, right->len); btrfs_free_tree_block(trans, root, right, 0, 1); free_extent_buffer_stale(right); @@ -1509,7 +1510,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(mid) == 0) { clean_tree_block(trans, root, mid); btrfs_tree_unlock(mid); - del_ptr(trans, root, path, level + 1, pslot); + del_ptr(trans, root, path, level + 1, pslot, 1); root_sub_used(root, mid->len); btrfs_free_tree_block(trans, root, mid, 0, 1); free_extent_buffer_stale(mid); @@ -2626,10 +2627,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, static void insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, - int slot, int level) + int slot, int level, int tree_mod_log) { struct extent_buffer *lower; int nritems; + int ret; BUG_ON(!path->nodes[level]); btrfs_assert_tree_locked(path->nodes[level]); @@ -2638,11 +2640,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans, BUG_ON(slot > nritems); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != nritems) { + if (tree_mod_log && level) + tree_mod_log_eb_move(root->fs_info, lower, slot + 1, + slot, nritems - slot); memmove_extent_buffer(lower, btrfs_node_key_ptr_offset(slot + 1), btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } + if (tree_mod_log && level) { + ret = tree_mod_log_insert_key(root->fs_info, lower, slot, + MOD_LOG_KEY_ADD); + BUG_ON(ret < 0); + } btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, bytenr); WARN_ON(trans->transid == 0); @@ -2726,7 +2736,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(split); insert_ptr(trans, root, path, &disk_key, split->start, - path->slots[level + 1] + 1, level + 1); + path->slots[level + 1] + 1, level + 1, 1); if (path->slots[level] >= mid) { path->slots[level] -= mid; @@ -3263,7 +3273,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); + path->slots[1] + 1, 1, 0); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -3470,7 +3480,7 @@ again: if (mid <= slot) { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); + path->slots[1] + 1, 1, 0); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3479,7 +3489,7 @@ again: } else { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1], 1); + path->slots[1], 1, 0); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -4191,19 +4201,31 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root * empty a node. */ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot) + struct btrfs_path *path, int level, int slot, + int tree_mod_log) { struct extent_buffer *parent = path->nodes[level]; u32 nritems; + int ret; nritems = btrfs_header_nritems(parent); if (slot != nritems - 1) { + if (tree_mod_log && level) + tree_mod_log_eb_move(root->fs_info, parent, slot, + slot + 1, nritems - slot - 1); memmove_extent_buffer(parent, btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } + + if (tree_mod_log && level) { + ret = tree_mod_log_insert_key(root->fs_info, parent, slot, + MOD_LOG_KEY_REMOVE); + BUG_ON(ret < 0); + } + nritems--; btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { @@ -4235,7 +4257,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, struct extent_buffer *leaf) { WARN_ON(btrfs_header_generation(leaf) != trans->transid); - del_ptr(trans, root, path, 1, path->slots[1]); + del_ptr(trans, root, path, 1, path->slots[1], 1); /* * btrfs_free_extent is expensive, we want to make sure we -- cgit v1.2.3-70-g09d2 From 5d9e75c41d11ca79b1d1ff6ed17c88c9047d1fea Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 16 May 2012 18:25:47 +0200 Subject: Btrfs: add btrfs_search_old_slot The tree modification log together with the current state of the tree gives a consistent, old version of the tree. btrfs_search_old_slot is used to search through this old version and return old (dummy!) extent buffers. Naturally, this function cannot do any tree modifications. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 319 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/ctree.h | 2 + 2 files changed, 317 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 69ce3f7deee..0954f1770fd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -960,6 +960,207 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } +/* + * returns the logical address of the oldest predecessor of the given root. + * entries older than time_seq are ignored. + */ +static struct tree_mod_elem * +__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, + struct btrfs_root *root, u64 time_seq) +{ + struct tree_mod_elem *tm; + struct tree_mod_elem *found = NULL; + u64 root_logical = root->node->start; + int looped = 0; + + if (!time_seq) + return 0; + + /* + * the very last operation that's logged for a root is the replacement + * operation (if it is replaced at all). this has the index of the *new* + * root, making it the very first operation that's logged for this root. + */ + while (1) { + tm = tree_mod_log_search_oldest(fs_info, root_logical, + time_seq); + if (!looped && !tm) + return 0; + /* + * we must have key remove operations in the log before the + * replace operation. + */ + BUG_ON(!tm); + + if (tm->op != MOD_LOG_ROOT_REPLACE) + break; + + found = tm; + root_logical = tm->old_root.logical; + BUG_ON(root_logical == root->node->start); + looped = 1; + } + + return found; +} + +/* + * tm is a pointer to the first operation to rewind within eb. then, all + * previous operations will be rewinded (until we reach something older than + * time_seq). + */ +static void +__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, + struct tree_mod_elem *first_tm) +{ + u32 n; + struct rb_node *next; + struct tree_mod_elem *tm = first_tm; + unsigned long o_dst; + unsigned long o_src; + unsigned long p_size = sizeof(struct btrfs_key_ptr); + + n = btrfs_header_nritems(eb); + while (tm && tm->elem.seq >= time_seq) { + /* + * all the operations are recorded with the operator used for + * the modification. as we're going backwards, we do the + * opposite of each operation here. + */ + switch (tm->op) { + case MOD_LOG_KEY_REMOVE_WHILE_FREEING: + BUG_ON(tm->slot < n); + case MOD_LOG_KEY_REMOVE_WHILE_MOVING: + case MOD_LOG_KEY_REMOVE: + btrfs_set_node_key(eb, &tm->key, tm->slot); + btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); + btrfs_set_node_ptr_generation(eb, tm->slot, + tm->generation); + n++; + break; + case MOD_LOG_KEY_REPLACE: + BUG_ON(tm->slot >= n); + btrfs_set_node_key(eb, &tm->key, tm->slot); + btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); + btrfs_set_node_ptr_generation(eb, tm->slot, + tm->generation); + break; + case MOD_LOG_KEY_ADD: + if (tm->slot != n - 1) { + o_dst = btrfs_node_key_ptr_offset(tm->slot); + o_src = btrfs_node_key_ptr_offset(tm->slot + 1); + memmove_extent_buffer(eb, o_dst, o_src, p_size); + } + n--; + break; + case MOD_LOG_MOVE_KEYS: + memmove_extent_buffer(eb, tm->slot, tm->move.dst_slot, + tm->move.nr_items * p_size); + break; + case MOD_LOG_ROOT_REPLACE: + /* + * this operation is special. for roots, this must be + * handled explicitly before rewinding. + * for non-roots, this operation may exist if the node + * was a root: root A -> child B; then A gets empty and + * B is promoted to the new root. in the mod log, we'll + * have a root-replace operation for B, a tree block + * that is no root. we simply ignore that operation. + */ + break; + } + next = rb_next(&tm->node); + if (!next) + break; + tm = container_of(next, struct tree_mod_elem, node); + if (tm->index != first_tm->index) + break; + } + btrfs_set_header_nritems(eb, n); +} + +static struct extent_buffer * +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, + u64 time_seq) +{ + struct extent_buffer *eb_rewin; + struct tree_mod_elem *tm; + + if (!time_seq) + return eb; + + if (btrfs_header_level(eb) == 0) + return eb; + + tm = tree_mod_log_search(fs_info, eb->start, time_seq); + if (!tm) + return eb; + + if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { + BUG_ON(tm->slot != 0); + eb_rewin = alloc_dummy_extent_buffer(eb->start, + fs_info->tree_root->nodesize); + BUG_ON(!eb_rewin); + btrfs_set_header_bytenr(eb_rewin, eb->start); + btrfs_set_header_backref_rev(eb_rewin, + btrfs_header_backref_rev(eb)); + btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); + } else { + eb_rewin = btrfs_clone_extent_buffer(eb); + BUG_ON(!eb_rewin); + } + + extent_buffer_get(eb_rewin); + free_extent_buffer(eb); + + __tree_mod_log_rewind(eb_rewin, time_seq, tm); + + return eb_rewin; +} + +static inline struct extent_buffer * +get_old_root(struct btrfs_root *root, u64 time_seq) +{ + struct tree_mod_elem *tm; + struct extent_buffer *eb; + struct tree_mod_root *old_root; + u64 old_generation; + + tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); + if (!tm) + return root->node; + + old_root = &tm->old_root; + old_generation = tm->generation; + + tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); + /* + * there was an item in the log when __tree_mod_log_oldest_root + * returned. this one must not go away, because the time_seq passed to + * us must be blocking its removal. + */ + BUG_ON(!tm); + + if (old_root->logical == root->node->start) { + /* there are logged operations for the current root */ + eb = btrfs_clone_extent_buffer(root->node); + } else { + /* there's a root replace operation for the current root */ + eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, + root->nodesize); + btrfs_set_header_bytenr(eb, eb->start); + btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(eb, root->root_key.objectid); + } + if (!eb) + return NULL; + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); + __tree_mod_log_rewind(eb, time_seq, tm); + + return eb; +} + static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) @@ -1930,7 +2131,7 @@ static int read_block_for_search(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *p, struct extent_buffer **eb_ret, int level, int slot, - struct btrfs_key *key) + struct btrfs_key *key, u64 time_seq) { u64 blocknr; u64 gen; @@ -2284,7 +2485,7 @@ cow_done: } err = read_block_for_search(trans, root, p, - &b, level, slot, key); + &b, level, slot, key, 0); if (err == -EAGAIN) goto again; if (err) { @@ -2355,6 +2556,116 @@ done: return ret; } +/* + * Like btrfs_search_slot, this looks for a key in the given tree. It uses the + * current state of the tree together with the operations recorded in the tree + * modification log to search for the key in a previous version of this tree, as + * denoted by the time_seq parameter. + * + * Naturally, there is no support for insert, delete or cow operations. + * + * The resulting path and return value will be set up as if we called + * btrfs_search_slot at that point in time with ins_len and cow both set to 0. + */ +int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *p, u64 time_seq) +{ + struct extent_buffer *b; + int slot; + int ret; + int err; + int level; + int lowest_unlock = 1; + u8 lowest_level = 0; + + lowest_level = p->lowest_level; + WARN_ON(p->nodes[0] != NULL); + + if (p->search_commit_root) { + BUG_ON(time_seq); + return btrfs_search_slot(NULL, root, key, p, 0, 0); + } + +again: + level = 0; + b = get_old_root(root, time_seq); + extent_buffer_get(b); + level = btrfs_header_level(b); + btrfs_tree_read_lock(b); + p->locks[level] = BTRFS_READ_LOCK; + + while (b) { + level = btrfs_header_level(b); + p->nodes[level] = b; + btrfs_clear_path_blocking(p, NULL, 0); + + /* + * we have a lock on b and as long as we aren't changing + * the tree, there is no way to for the items in b to change. + * It is safe to drop the lock on our parent before we + * go through the expensive btree search on b. + */ + btrfs_unlock_up_safe(p, level + 1); + + ret = bin_search(b, key, level, &slot); + + if (level != 0) { + int dec = 0; + if (ret && slot > 0) { + dec = 1; + slot -= 1; + } + p->slots[level] = slot; + unlock_up(p, level, lowest_unlock, 0, NULL); + + if (level == lowest_level) { + if (dec) + p->slots[level]++; + goto done; + } + + err = read_block_for_search(NULL, root, p, &b, level, + slot, key, time_seq); + if (err == -EAGAIN) + goto again; + if (err) { + ret = err; + goto done; + } + + level = btrfs_header_level(b); + err = btrfs_try_tree_read_lock(b); + if (!err) { + btrfs_set_path_blocking(p); + btrfs_tree_read_lock(b); + btrfs_clear_path_blocking(p, b, + BTRFS_READ_LOCK); + } + p->locks[level] = BTRFS_READ_LOCK; + p->nodes[level] = b; + b = tree_mod_log_rewind(root->fs_info, b, time_seq); + if (b != p->nodes[level]) { + btrfs_tree_unlock_rw(p->nodes[level], + p->locks[level]); + p->locks[level] = 0; + p->nodes[level] = b; + } + } else { + p->slots[level] = slot; + unlock_up(p, level, lowest_unlock, 0, NULL); + goto done; + } + } + ret = 1; +done: + if (!p->leave_spinning) + btrfs_set_path_blocking(p); + if (ret < 0) + btrfs_release_path(p); + + return ret; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. @@ -4735,7 +5046,7 @@ again: next = c; next_rw_lock = path->locks[level]; ret = read_block_for_search(NULL, root, path, &next, level, - slot, &key); + slot, &key, 0); if (ret == -EAGAIN) goto again; @@ -4772,7 +5083,7 @@ again: break; ret = read_block_for_search(NULL, root, path, &next, level, - 0, &key); + 0, &key, 0); if (ret == -EAGAIN) goto again; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e53bfb9b391..6ba21b12cec 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2668,6 +2668,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); +int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *p, u64 time_seq); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, -- cgit v1.2.3-70-g09d2 From 018642a1f197887058e97291460b890d296e8953 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 29 May 2012 18:10:13 +0900 Subject: Btrfs: return value of btrfs_read_buffer is checked correctly btrfs_read_buffer() has the possibility of returning the error. Therefore, I add the code in which the return value of btrfs_read_buffer() is checked. Signed-off-by: Tsutomu Itoh --- fs/btrfs/ctree.c | 6 +++++- fs/btrfs/tree-log.c | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 26847999c64..99fcad631a2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -739,7 +739,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (!cur) return -EIO; } else if (!uptodate) { - btrfs_read_buffer(cur, gen); + err = btrfs_read_buffer(cur, gen); + if (err) { + free_extent_buffer(cur); + return err; + } } } if (search_start == 0) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index eb1ae908582..6f22a4fca8d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, int i; int ret; - btrfs_read_buffer(eb, gen); + ret = btrfs_read_buffer(eb, gen); + if (ret) + return ret; level = btrfs_header_level(eb); @@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, path->slots[*level]++; if (wc->free) { - btrfs_read_buffer(next, ptr_gen); + ret = btrfs_read_buffer(next, ptr_gen); + if (ret) { + free_extent_buffer(next); + return ret; + } btrfs_tree_lock(next); btrfs_set_lock_blocking(next); @@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); continue; } - btrfs_read_buffer(next, ptr_gen); + ret = btrfs_read_buffer(next, ptr_gen); + if (ret) { + free_extent_buffer(next); + return ret; + } WARN_ON(*level <= 0); if (path->nodes[*level-1]) -- cgit v1.2.3-70-g09d2 From 926dd8a640da1bbf7478eebea1c23a842fc9c890 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 31 May 2012 14:00:15 +0200 Subject: Btrfs: add missing spin_lock for insertion into tree mod log tree_mod_alloc calls __get_tree_mod_seq and must acquire a spinlock before doing so. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0954f1770fd..26e8dc1681b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -455,11 +455,11 @@ unlock: return ret; } -int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) +static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, + struct tree_mod_elem **tm_ret) { struct tree_mod_elem *tm; - u64 seq = 0; + int seq; smp_mb(); if (list_empty(&fs_info->tree_mod_seq_list)) @@ -469,9 +469,22 @@ int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, if (!tm) return -ENOMEM; - __get_tree_mod_seq(fs_info, &tm->elem); - seq = tm->elem.seq; tm->elem.flags = 0; + spin_lock(&fs_info->tree_mod_seq_lock); + if (list_empty(&fs_info->tree_mod_seq_list)) { + /* + * someone emptied the list while we were waiting for the lock. + * we must not add to the list, because no blocker exists. items + * are removed from the list only when the existing blocker is + * removed from the list. + */ + kfree(tm); + seq = 0; + } else { + __get_tree_mod_seq(fs_info, &tm->elem); + seq = tm->elem.seq; + } + spin_unlock(&fs_info->tree_mod_seq_lock); return seq; } -- cgit v1.2.3-70-g09d2 From e9b7fd4d8b7c915cff353ca085b83bd19737396b Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 31 May 2012 14:59:09 +0200 Subject: Btrfs: add tree_mod_dont_log helper Replace duplicate code by small inline helper function. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 26e8dc1681b..c7c48489b96 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -455,14 +455,25 @@ unlock: return ret; } +static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb) { + smp_mb(); + if (list_empty(&(fs_info)->tree_mod_seq_list)) + return 1; + if (!eb) + return 0; + if (btrfs_header_level(eb) == 0) + return 1; + return 0; +} + static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, struct tree_mod_elem **tm_ret) { struct tree_mod_elem *tm; int seq; - smp_mb(); - if (list_empty(&fs_info->tree_mod_seq_list)) + if (tree_mod_dont_log(fs_info, NULL)) return 0; tm = *tm_ret = kzalloc(sizeof(*tm), flags); @@ -643,8 +654,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, int ret; int i; - smp_mb(); - if (list_empty(&fs_info->tree_mod_seq_list)) + if (tree_mod_dont_log(fs_info, NULL)) return; if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) @@ -691,11 +701,7 @@ static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, int ret; u32 nritems; - smp_mb(); - if (list_empty(&fs_info->tree_mod_seq_list)) - return; - - if (btrfs_header_level(eb) == 0) + if (tree_mod_dont_log(fs_info, eb)) return; nritems = btrfs_header_nritems(eb); -- cgit v1.2.3-70-g09d2 From f395694c2cd76cb1882fa82dd37e761598367fe9 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 31 May 2012 15:02:32 +0200 Subject: Btrfs: fix tree mod log del_ptr Logging for del_ptr when we're not deleting the last pointer was wrong. This fixes both, duplicate log entries and log sequence. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c7c48489b96..63147c1315a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -540,9 +540,8 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, int ret; int i; - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) - return ret; + if (tree_mod_dont_log(fs_info, eb)) + return 0; for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, @@ -550,6 +549,10 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, BUG_ON(ret < 0); } + ret = tree_mod_alloc(fs_info, flags, &tm); + if (ret <= 0) + return ret; + tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; tm->move.dst_slot = dst_slot; @@ -4548,9 +4551,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); - } - - if (tree_mod_log && level) { + } else if (tree_mod_log && level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, MOD_LOG_KEY_REMOVE); BUG_ON(ret < 0); -- cgit v1.2.3-70-g09d2 From c31931088fd6cf953bd0868a2647b6c3928e6c96 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 31 May 2012 19:24:36 +0200 Subject: Btrfs: fix tree mod log rewinded level and rewinding of moved keys When we rewind REMOVE_WHILE_FREEING operations, there's code that allocates a fresh buffer instead of cloning the old one. Setting that buffer's level correctly was missing in this case. When rewinding a MOVE_KEYS operation, btrfs_node_key_ptr_offset(slot) was missing for memmove_extent_buffer()'s arguments. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 63147c1315a..b4534d918e4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1076,7 +1076,9 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, n--; break; case MOD_LOG_MOVE_KEYS: - memmove_extent_buffer(eb, tm->slot, tm->move.dst_slot, + o_dst = btrfs_node_key_ptr_offset(tm->slot); + o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); + memmove_extent_buffer(eb, o_dst, o_src, tm->move.nr_items * p_size); break; case MOD_LOG_ROOT_REPLACE: @@ -1127,6 +1129,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); + btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); BUG_ON(!eb_rewin); @@ -2609,7 +2612,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: - level = 0; b = get_old_root(root, time_seq); extent_buffer_get(b); level = btrfs_header_level(b); -- cgit v1.2.3-70-g09d2