summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c243
1 files changed, 183 insertions, 60 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c4bc570a396..c3df14ce2cc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -17,6 +17,7 @@
*/
#include <linux/sched.h>
+#include <linux/slab.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -279,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- struct extent_buffer *cow)
+ struct extent_buffer *cow,
+ int *last_ref)
{
u64 refs;
u64 owner;
@@ -365,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
+ *last_ref = 1;
}
return 0;
}
@@ -391,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
int level;
+ int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
@@ -441,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
- update_ref_for_cow(trans, root, buf, cow);
+ update_ref_for_cow(trans, root, buf, cow, &last_ref);
+
+ if (root->ref_cows)
+ btrfs_reloc_cow_block(trans, root, buf, cow);
if (buf == root->node) {
WARN_ON(parent && parent != buf);
@@ -456,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
- btrfs_free_tree_block(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid, level);
+ btrfs_free_tree_block(trans, root, buf, parent_start,
+ last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -472,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- btrfs_free_tree_block(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid, level);
+ btrfs_free_tree_block(trans, root, buf, parent_start,
+ last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -948,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return bin_search(eb, key, level, slot);
}
+static void root_add_used(struct btrfs_root *root, u32 size)
+{
+ spin_lock(&root->accounting_lock);
+ btrfs_set_root_used(&root->root_item,
+ btrfs_root_used(&root->root_item) + size);
+ spin_unlock(&root->accounting_lock);
+}
+
+static void root_sub_used(struct btrfs_root *root, u32 size)
+{
+ spin_lock(&root->accounting_lock);
+ btrfs_set_root_used(&root->root_item,
+ btrfs_root_used(&root->root_item) - size);
+ spin_unlock(&root->accounting_lock);
+}
+
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
* NULL is returned on error.
@@ -1018,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ goto enospc;
+ }
spin_lock(&root->node_lock);
root->node = child;
@@ -1033,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
- ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
- 0, root->root_key.objectid, level);
+
+ root_sub_used(root, mid->len);
+ btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
- return ret;
+ return 0;
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@@ -1087,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- u64 bytenr = right->start;
- u32 blocksize = right->len;
-
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
- free_extent_buffer(right);
- right = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot +
1);
if (wret)
ret = wret;
- wret = btrfs_free_tree_block(trans, root,
- bytenr, blocksize, 0,
- root->root_key.objectid,
- level);
- if (wret)
- ret = wret;
+ root_sub_used(root, right->len);
+ btrfs_free_tree_block(trans, root, right, 0, 1);
+ free_extent_buffer(right);
+ right = NULL;
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
@@ -1135,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- /* we've managed to empty the middle node, drop it */
- u64 bytenr = mid->start;
- u32 blocksize = mid->len;
-
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
- free_extent_buffer(mid);
- mid = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
- wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
- 0, root->root_key.objectid, level);
- if (wret)
- ret = wret;
+ root_sub_used(root, mid->len);
+ btrfs_free_tree_block(trans, root, mid, 0, 1);
+ free_extent_buffer(mid);
+ mid = NULL;
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
@@ -1589,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_release_path(NULL, p);
ret = -EAGAIN;
- tmp = read_tree_block(root, blocknr, blocksize, gen);
+ tmp = read_tree_block(root, blocknr, blocksize, 0);
if (tmp) {
/*
* If the read above didn't mark this buffer up to date,
@@ -1739,7 +1754,6 @@ again:
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) {
- free_extent_buffer(b);
ret = err;
goto done;
}
@@ -2075,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (IS_ERR(c))
return PTR_ERR(c);
+ root_add_used(root, root->nodesize);
+
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
btrfs_set_header_level(c, level);
@@ -2133,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
int nritems;
BUG_ON(!path->nodes[level]);
+ btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
@@ -2201,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (IS_ERR(split))
return PTR_ERR(split);
+ root_add_used(root, root->nodesize);
+
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(split, btrfs_header_level(c));
btrfs_set_header_bytenr(split, split->start);
@@ -2285,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2308,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2414,6 +2438,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
+ else
+ clean_tree_block(trans, root, left);
+
btrfs_mark_buffer_dirty(right);
btrfs_item_key(right, &disk_key, 0);
@@ -2447,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2492,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2503,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2527,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2659,6 +2695,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(left);
if (right_nritems)
btrfs_mark_buffer_dirty(right);
+ else
+ clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@@ -2668,8 +2706,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
- if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(trans, root, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = left;
@@ -2690,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2739,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2833,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2854,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2862,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2901,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2917,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -2931,10 +3037,10 @@ again:
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
- if (IS_ERR(right)) {
- BUG_ON(1);
+ if (IS_ERR(right))
return PTR_ERR(right);
- }
+
+ root_add_used(root, root->leafsize);
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(right, right->start);
@@ -2997,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3040,6 +3153,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
goto err;
+ /* the leaf has changed, it now has room. return now */
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
+ goto err;
+
if (key.type == BTRFS_EXTENT_DATA_KEY) {
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -3049,7 +3166,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1);
- BUG_ON(ret);
+ if (ret)
+ goto err;
path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
@@ -3791,9 +3909,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
- ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
- 0, root->root_key.objectid, 0);
- return ret;
+ root_sub_used(root, leaf->len);
+
+ btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ return 0;
}
/*
* delete the item at the leaf level in path. If that empties
@@ -3860,6 +3979,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
+ btrfs_set_path_blocking(path);
+ clean_tree_block(trans, root, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
BUG_ON(ret);
}
@@ -3885,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}