diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 299 |
1 files changed, 237 insertions, 62 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b3343621100..eea5da7a2b9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -38,8 +38,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot, - int tree_mod_log); + struct btrfs_path *path, int level, int slot); static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb); struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, @@ -596,6 +595,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, if (tree_mod_dont_log(fs_info, eb)) return 0; + /* + * When we override something during the move, we log these removals. + * This can only happen when we move towards the beginning of the + * buffer, i.e. dst_slot < src_slot. + */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, MOD_LOG_KEY_REMOVE_WHILE_MOVING); @@ -647,8 +651,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (tree_mod_dont_log(fs_info, NULL)) return 0; - __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); if (ret < 0) goto out; @@ -773,8 +775,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, static noinline void tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int slot, int atomic) + struct extent_buffer *eb, int slot, int atomic) { int ret; @@ -926,12 +927,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); /* -ENOMEM */ } - /* - * don't log freeing in case we're freeing the root node, this - * is done by tree_mod_log_set_root_pointer later - */ - if (buf != root->node && btrfs_header_level(buf) != 0) - tree_mod_log_free_eb(root->fs_info, buf); + tree_mod_log_free_eb(root->fs_info, buf); clean_tree_block(trans, root, buf); *last_ref = 1; } @@ -1225,6 +1221,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, free_extent_buffer(eb); __tree_mod_log_rewind(eb_rewin, time_seq, tm); + WARN_ON(btrfs_header_nritems(eb_rewin) > + BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); return eb_rewin; } @@ -1241,9 +1239,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq) { struct tree_mod_elem *tm; struct extent_buffer *eb; + struct extent_buffer *old; struct tree_mod_root *old_root = NULL; u64 old_generation = 0; u64 logical; + u32 blocksize; eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); @@ -1259,14 +1259,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } tm = tree_mod_log_search(root->fs_info, logical, time_seq); - if (old_root) + if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + blocksize = btrfs_level_size(root, old_root->level); + old = read_tree_block(root, logical, blocksize, 0); + if (!old) { + pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", + logical); + WARN_ON(1); + } else { + eb = btrfs_clone_extent_buffer(old); + free_extent_buffer(old); + } + } else if (old_root) { + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); eb = alloc_dummy_extent_buffer(logical, root->nodesize); - else + } else { eb = btrfs_clone_extent_buffer(root->node); - btrfs_tree_read_unlock(root->node); - free_extent_buffer(root->node); + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + } + if (!eb) return NULL; + extent_buffer_get(eb); btrfs_tree_read_lock(eb); if (old_root) { btrfs_set_header_bytenr(eb, eb->start); @@ -1279,11 +1297,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq) __tree_mod_log_rewind(eb, time_seq, tm); else WARN_ON(btrfs_header_level(eb) != 0); - extent_buffer_get(eb); + WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); return eb; } +int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq) +{ + struct tree_mod_elem *tm; + int level; + + tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); + if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { + level = tm->old_root.level; + } else { + rcu_read_lock(); + level = btrfs_header_level(root->node); + rcu_read_unlock(); + } + + return level; +} + static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) @@ -1324,19 +1359,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, u64 search_start; int ret; - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %llu running %llu\n", + if (trans->transaction != root->fs_info->running_transaction) + WARN(1, KERN_CRIT "trans %llu running %llu\n", (unsigned long long)trans->transid, (unsigned long long) root->fs_info->running_transaction->transid); - WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %llu running %llu\n", + + if (trans->transid != root->fs_info->generation) + WARN(1, KERN_CRIT "trans %llu running %llu\n", (unsigned long long)trans->transid, (unsigned long long)root->fs_info->generation); - WARN_ON(1); - } if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; @@ -1432,10 +1464,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (cache_only && parent_level != 1) return 0; - if (trans->transaction != root->fs_info->running_transaction) - WARN_ON(1); - if (trans->transid != root->fs_info->generation) - WARN_ON(1); + WARN_ON(trans->transaction != root->fs_info->running_transaction); + WARN_ON(trans->transid != root->fs_info->generation); parent_nritems = btrfs_header_nritems(parent); blocksize = btrfs_level_size(root, parent_level - 1); @@ -1725,6 +1755,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } + tree_mod_log_free_eb(root->fs_info, root->node); tree_mod_log_set_root_pointer(root, child); rcu_assign_pointer(root->node, child); @@ -1789,7 +1820,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(right) == 0) { clean_tree_block(trans, root, right); btrfs_tree_unlock(right); - del_ptr(trans, root, path, level + 1, pslot + 1, 1); + del_ptr(trans, root, path, level + 1, pslot + 1); root_sub_used(root, right->len); btrfs_free_tree_block(trans, root, right, 0, 1); free_extent_buffer_stale(right); @@ -1798,7 +1829,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_disk_key right_key; btrfs_node_key(right, &right_key, 0); tree_mod_log_set_node_key(root->fs_info, parent, - &right_key, pslot + 1, 0); + pslot + 1, 0); btrfs_set_node_key(parent, &right_key, pslot + 1); btrfs_mark_buffer_dirty(parent); } @@ -1833,7 +1864,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(mid) == 0) { clean_tree_block(trans, root, mid); btrfs_tree_unlock(mid); - del_ptr(trans, root, path, level + 1, pslot, 1); + del_ptr(trans, root, path, level + 1, pslot); root_sub_used(root, mid->len); btrfs_free_tree_block(trans, root, mid, 0, 1); free_extent_buffer_stale(mid); @@ -1842,7 +1873,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* update the parent key to reflect our changes */ struct btrfs_disk_key mid_key; btrfs_node_key(mid, &mid_key, 0); - tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, + tree_mod_log_set_node_key(root->fs_info, parent, pslot, 0); btrfs_set_node_key(parent, &mid_key, pslot); btrfs_mark_buffer_dirty(parent); @@ -1942,7 +1973,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, orig_slot += left_nr; btrfs_node_key(mid, &disk_key, 0); tree_mod_log_set_node_key(root->fs_info, parent, - &disk_key, pslot, 0); + pslot, 0); btrfs_set_node_key(parent, &disk_key, pslot); btrfs_mark_buffer_dirty(parent); if (btrfs_header_nritems(left) > orig_slot) { @@ -1995,7 +2026,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, btrfs_node_key(right, &disk_key, 0); tree_mod_log_set_node_key(root->fs_info, parent, - &disk_key, pslot + 1, 0); + pslot + 1, 0); btrfs_set_node_key(parent, &disk_key, pslot + 1); btrfs_mark_buffer_dirty(parent); @@ -2181,6 +2212,9 @@ static noinline void unlock_up(struct btrfs_path *path, int level, int no_skips = 0; struct extent_buffer *t; + if (path->really_keep_locks) + return; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { if (!path->nodes[i]) break; @@ -2228,7 +2262,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) { int i; - if (path->keep_locks) + if (path->keep_locks || path->really_keep_locks) return; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -2461,7 +2495,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (!cow) write_lock_level = -1; - if (cow && (p->keep_locks || p->lowest_level)) + if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level)) write_lock_level = BTRFS_MAX_LEVEL; min_write_lock_level = write_lock_level; @@ -2530,7 +2564,10 @@ again: * must have write locks on this node and the * parent */ - if (level + 1 > write_lock_level) { + if (level > write_lock_level || + (level + 1 > write_lock_level && + level + 1 < BTRFS_MAX_LEVEL && + p->nodes[level + 1])) { write_lock_level = level + 1; btrfs_release_path(p); goto again; @@ -2879,7 +2916,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, if (!path->nodes[i]) break; t = path->nodes[i]; - tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); + tree_mod_log_set_node_key(root->fs_info, t, tslot, 1); btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) @@ -2970,8 +3007,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - tree_mod_log_eb_move(root->fs_info, src, 0, push_items, - src_nritems - push_items); + /* + * don't call tree_mod_log_eb_move here, key removal was already + * fully logged by tree_mod_log_eb_copy above. + */ memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(push_items), (src_nritems - push_items) * @@ -3262,14 +3301,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans, */ static int leaf_space_used(struct extent_buffer *l, int start, int nr) { + struct btrfs_item *start_item; + struct btrfs_item *end_item; + struct btrfs_map_token token; int data_len; int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; - data_len = btrfs_item_end_nr(l, start); - data_len = data_len - btrfs_item_offset_nr(l, end); + btrfs_init_map_token(&token); + start_item = btrfs_item_nr(l, start); + end_item = btrfs_item_nr(l, end); + data_len = btrfs_token_item_offset(l, start_item, &token) + + btrfs_token_item_size(l, start_item, &token); + data_len = data_len - btrfs_token_item_offset(l, end_item, &token); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; @@ -3363,8 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (push_items == 0) goto out_unlock; - if (!empty && push_items == left_nritems) - WARN_ON(1); + WARN_ON(!empty && push_items == left_nritems); /* push left to right */ right_nritems = btrfs_header_nritems(right); @@ -3602,11 +3647,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ - if (push_items > right_nritems) { - printk(KERN_CRIT "push items %d nr %u\n", push_items, + if (push_items > right_nritems) + WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, right_nritems); - WARN_ON(1); - } if (push_items < right_nritems) { push_space = btrfs_item_offset_nr(right, push_items - 1) - @@ -4562,8 +4605,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root * empty a node. */ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot, - int tree_mod_log) + struct btrfs_path *path, int level, int slot) { struct extent_buffer *parent = path->nodes[level]; u32 nritems; @@ -4571,7 +4613,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems = btrfs_header_nritems(parent); if (slot != nritems - 1) { - if (tree_mod_log && level) + if (level) tree_mod_log_eb_move(root->fs_info, parent, slot, slot + 1, nritems - slot - 1); memmove_extent_buffer(parent, @@ -4579,7 +4621,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); - } else if (tree_mod_log && level) { + } else if (level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, MOD_LOG_KEY_REMOVE); BUG_ON(ret < 0); @@ -4616,7 +4658,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, struct extent_buffer *leaf) { WARN_ON(btrfs_header_generation(leaf) != trans->transid); - del_ptr(trans, root, path, 1, path->slots[1], 1); + del_ptr(trans, root, path, 1, path->slots[1]); /* * btrfs_free_extent is expensive, we want to make sure we @@ -5083,13 +5125,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root, right_path->search_commit_root = 1; right_path->skip_locking = 1; - spin_lock(&left_root->root_times_lock); + spin_lock(&left_root->root_item_lock); left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); - spin_unlock(&left_root->root_times_lock); + spin_unlock(&left_root->root_item_lock); - spin_lock(&right_root->root_times_lock); + spin_lock(&right_root->root_item_lock); right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); - spin_unlock(&right_root->root_times_lock); + spin_unlock(&right_root->root_item_lock); trans = btrfs_join_transaction(left_root); if (IS_ERR(trans)) { @@ -5184,15 +5226,15 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - spin_lock(&left_root->root_times_lock); + spin_lock(&left_root->root_item_lock); ctransid = btrfs_root_ctransid(&left_root->root_item); - spin_unlock(&left_root->root_times_lock); + spin_unlock(&left_root->root_item_lock); if (ctransid != left_start_ctransid) left_start_ctransid = 0; - spin_lock(&right_root->root_times_lock); + spin_lock(&right_root->root_item_lock); ctransid = btrfs_root_ctransid(&right_root->root_item); - spin_unlock(&right_root->root_times_lock); + spin_unlock(&right_root->root_item_lock); if (ctransid != right_start_ctransid) right_start_ctransid = 0; @@ -5456,6 +5498,139 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) return btrfs_next_old_leaf(root, path, 0); } +/* Release the path up to but not including the given level */ +static void btrfs_release_level(struct btrfs_path *path, int level) +{ + int i; + + for (i = 0; i < level; i++) { + path->slots[i] = 0; + if (!path->nodes[i]) + continue; + if (path->locks[i]) { + btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); + path->locks[i] = 0; + } + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } +} + +/* + * This function assumes 2 things + * + * 1) You are using path->keep_locks + * 2) You are not inserting items. + * + * If either of these are not true do not use this function. If you need a next + * leaf with either of these not being true then this function can be easily + * adapted to do that, but at the moment these are the limitations. + */ +int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + int del) +{ + struct extent_buffer *b; + struct btrfs_key key; + u32 nritems; + int level = 1; + int slot; + int ret = 1; + int write_lock_level = BTRFS_MAX_LEVEL; + int ins_len = del ? -1 : 0; + + WARN_ON(!(path->keep_locks || path->really_keep_locks)); + + nritems = btrfs_header_nritems(path->nodes[0]); + btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); + + while (path->nodes[level]) { + nritems = btrfs_header_nritems(path->nodes[level]); + if (!(path->locks[level] & BTRFS_WRITE_LOCK)) { +search: + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, + ins_len, 1); + if (ret < 0) + goto out; + level = 1; + continue; + } + + if (path->slots[level] >= nritems - 1) { + level++; + continue; + } + + btrfs_release_level(path, level); + break; + } + + if (!path->nodes[level]) { + ret = 1; + goto out; + } + + path->slots[level]++; + b = path->nodes[level]; + + while (b) { + level = btrfs_header_level(b); + + if (!should_cow_block(trans, root, b)) + goto cow_done; + + btrfs_set_path_blocking(path); + ret = btrfs_cow_block(trans, root, b, + path->nodes[level + 1], + path->slots[level + 1], &b); + if (ret) + goto out; +cow_done: + path->nodes[level] = b; + btrfs_clear_path_blocking(path, NULL, 0); + if (level != 0) { + ret = setup_nodes_for_search(trans, root, path, b, + level, ins_len, + &write_lock_level); + if (ret == -EAGAIN) + goto search; + if (ret) + goto out; + + b = path->nodes[level]; + slot = path->slots[level]; + + ret = read_block_for_search(trans, root, path, + &b, level, slot, &key, 0); + if (ret == -EAGAIN) + goto search; + if (ret) + goto out; + level = btrfs_header_level(b); + if (!btrfs_try_tree_write_lock(b)) { + btrfs_set_path_blocking(path); + btrfs_tree_lock(b); + btrfs_clear_path_blocking(path, b, + BTRFS_WRITE_LOCK); + } + path->locks[level] = BTRFS_WRITE_LOCK; + path->nodes[level] = b; + path->slots[level] = 0; + } else { + path->slots[level] = 0; + ret = 0; + break; + } + } + +out: + if (ret) + btrfs_release_path(path); + + return ret; +} + int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq) { |