summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c195
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c53
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c33
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/transaction.c105
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-defrag.c222
11 files changed, 553 insertions, 89 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a4e2df66bdb..9321438c5ee 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
- transaction.o bit-radix.o inode.o file.o
+ transaction.o bit-radix.o inode.o file.o tree-defrag.o
#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7a08491e208..c7e47e77723 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
memset(p, 0, sizeof(*p));
}
-static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
+static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
- **cow_ret)
+ **cow_ret, u64 search_start, u64 empty_size)
{
struct buffer_head *cow;
struct btrfs_node *cow_node;
- int ret;
+ int ret = 0;
+ int different_trans = 0;
+ WARN_ON(root->ref_cows && trans->transid != root->last_trans);
WARN_ON(!buffer_uptodate(buf));
- if (trans->transaction != root->fs_info->running_transaction) {
- printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
- root->fs_info->running_transaction->transid);
- WARN_ON(1);
- }
- if (trans->transid != root->fs_info->generation) {
- printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
- root->fs_info->generation);
- WARN_ON(1);
- }
- if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
- trans->transid) {
- *cow_ret = buf;
- return 0;
- }
- cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
+ cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
if (IS_ERR(cow))
return PTR_ERR(cow);
+
cow_node = btrfs_buffer_node(cow);
if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
WARN_ON(1);
+
memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
btrfs_set_header_generation(&cow_node->header, trans->transid);
btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
- ret = btrfs_inc_ref(trans, root, buf);
- if (ret)
- return ret;
+
+ WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
+ trans->transid);
+ if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
+ trans->transid) {
+ different_trans = 1;
+ ret = btrfs_inc_ref(trans, root, buf);
+ if (ret)
+ return ret;
+ } else {
+ WARN_ON(!root->ref_cows);
+ clean_tree_block(trans, root, buf);
+ }
+
if (buf == root->node) {
root->node = cow;
get_bh(cow);
@@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
bh_blocknr(cow));
btrfs_mark_buffer_dirty(parent);
+ WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
+ trans->transid);
btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
@@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}
+int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct buffer_head *buf, struct buffer_head
+ *parent, int parent_slot, struct buffer_head
+ **cow_ret)
+{
+ u64 search_start;
+ if (trans->transaction != root->fs_info->running_transaction) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->running_transaction->transid);
+ WARN_ON(1);
+ }
+ if (trans->transid != root->fs_info->generation) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->generation);
+ WARN_ON(1);
+ }
+ if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
+ trans->transid) {
+ *cow_ret = buf;
+ return 0;
+ }
+
+ search_start = bh_blocknr(buf) & ~((u64)65535);
+ return __btrfs_cow_block(trans, root, buf, parent,
+ parent_slot, cow_ret, search_start, 0);
+}
+
+static int close_blocks(u64 blocknr, u64 other)
+{
+ if (blocknr < other && other - blocknr < 8)
+ return 1;
+ if (blocknr > other && blocknr - other < 8)
+ return 1;
+ return 0;
+}
+
+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct buffer_head *parent,
+ int cache_only)
+{
+ struct btrfs_node *parent_node;
+ struct buffer_head *cur_bh;
+ struct buffer_head *tmp_bh;
+ u64 blocknr;
+ u64 search_start = 0;
+ u64 other;
+ u32 parent_nritems;
+ int start_slot;
+ int end_slot;
+ int i;
+ int err = 0;
+
+ if (trans->transaction != root->fs_info->running_transaction) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->running_transaction->transid);
+ WARN_ON(1);
+ }
+ if (trans->transid != root->fs_info->generation) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->generation);
+ WARN_ON(1);
+ }
+ parent_node = btrfs_buffer_node(parent);
+ parent_nritems = btrfs_header_nritems(&parent_node->header);
+
+ start_slot = 0;
+ end_slot = parent_nritems;
+
+ if (parent_nritems == 1)
+ return 0;
+
+ for (i = start_slot; i < end_slot; i++) {
+ int close = 1;
+ blocknr = btrfs_node_blockptr(parent_node, i);
+ if (i > 0) {
+ other = btrfs_node_blockptr(parent_node, i - 1);
+ close = close_blocks(blocknr, other);
+ }
+ if (close && i < end_slot - 1) {
+ other = btrfs_node_blockptr(parent_node, i + 1);
+ close = close_blocks(blocknr, other);
+ }
+ if (close)
+ continue;
+
+ cur_bh = btrfs_find_tree_block(root, blocknr);
+ if (!cur_bh || !buffer_uptodate(cur_bh) ||
+ buffer_locked(cur_bh)) {
+ if (cache_only) {
+ brelse(cur_bh);
+ continue;
+ }
+ brelse(cur_bh);
+ cur_bh = read_tree_block(root, blocknr);
+ }
+ if (search_start == 0) {
+ search_start = bh_blocknr(cur_bh) & ~((u64)65535);
+ }
+ err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
+ &tmp_bh, search_start,
+ min(8, end_slot - i));
+ if (err)
+ break;
+ search_start = bh_blocknr(tmp_bh);
+ brelse(tmp_bh);
+ }
+ return err;
+}
+
/*
* The leaf data grows from end-to-front in the node.
* this returns the address of the start of the last item,
@@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
parent_slot = path->slots[level + 1];
parent_key = &parent->ptrs[parent_slot].key;
+
BUG_ON(memcmp(parent_key, &leaf->items[0].key,
sizeof(struct btrfs_disk_key)));
BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
@@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
* readahead one full node of leaves
*/
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
- int slot)
+ int level, int slot)
{
struct btrfs_node *node;
int i;
@@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
unsigned long gang[8];
struct buffer_head *bh;
- if (!path->nodes[1])
+ if (level == 0)
+ return;
+
+ if (!path->nodes[level])
return;
- node = btrfs_buffer_node(path->nodes[1]);
+ node = btrfs_buffer_node(path->nodes[level]);
search = btrfs_node_blockptr(node, slot);
bh = btrfs_find_tree_block(root, search);
if (bh) {
@@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < ret; i++) {
blocknr = gang[i];
clear_radix_bit(&found, blocknr);
- if (nread > 64)
+ if (nread > 32)
continue;
if (direction > 0 && cluster_start <= blocknr &&
cluster_start + 8 > blocknr) {
@@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
struct buffer_head *b;
struct buffer_head *cow_buf;
struct btrfs_node *c;
- struct btrfs_root_item *root_item = &root->root_item;
u64 blocknr;
int slot;
int ret;
@@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
int should_reada = p->reada;
u8 lowest_level = 0;
- if (btrfs_root_refs(root_item) == 0 && root->ref_cows) {
- lowest_level = root_item->drop_level;
- WARN_ON(ins_len || cow);
- }
-
+ lowest_level = p->lowest_level;
+ WARN_ON(lowest_level && ins_len);
WARN_ON(p->nodes[0] != NULL);
WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
again:
@@ -798,8 +909,8 @@ again:
if (level == lowest_level)
break;
blocknr = btrfs_node_blockptr(c, slot);
- if (level == 1 && should_reada)
- reada_for_search(root, p, slot);
+ if (should_reada)
+ reada_for_search(root, p, level, slot);
b = read_tree_block(root, btrfs_node_blockptr(c, slot));
} else {
@@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
BUG_ON(path->nodes[level]);
BUG_ON(path->nodes[level-1] != root->node);
- t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr);
+ t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
if (IS_ERR(t))
return PTR_ERR(t);
c = btrfs_buffer_node(t);
@@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
}
c_nritems = btrfs_header_nritems(&c->header);
- split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr);
+ split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
if (IS_ERR(split_buffer))
return PTR_ERR(split_buffer);
@@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
nritems = btrfs_header_nritems(&l->header);
mid = (nritems + 1)/ 2;
- right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
+ right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
if (IS_ERR(right_buffer))
return PTR_ERR(right_buffer);
@@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
if (!double_split)
return ret;
- right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
+ right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
if (IS_ERR(right_buffer))
return PTR_ERR(right_buffer);
@@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
blocknr = btrfs_node_blockptr(c_node, slot);
if (next)
btrfs_block_release(root, next);
- if (level == 1 && path->reada)
- reada_for_search(root, path, slot);
+ if (path->reada)
+ reada_for_search(root, path, level, slot);
next = read_tree_block(root, blocknr);
break;
}
@@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
path->slots[level] = 0;
if (!level)
break;
- if (level == 1 && path->reada)
- reada_for_search(root, path, slot);
+ if (path->reada)
+ reada_for_search(root, path, level, slot);
next = read_tree_block(root,
btrfs_node_blockptr(btrfs_buffer_node(next), 0));
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c5a18d5d7f7..42aa20301bc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -178,6 +178,7 @@ struct btrfs_path {
struct buffer_head *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL];
int reada;
+ int lowest_level;
};
/*
@@ -338,6 +339,9 @@ struct btrfs_root {
u64 highest_inode;
u64 last_inode_alloc;
int ref_cows;
+ struct btrfs_key defrag_progress;
+ int defrag_running;
+ int defrag_level;
};
/* the lower bits in the key flags defines the item type */
@@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 hint);
+ struct btrfs_root *root, u64 hint,
+ u64 empty_size);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner,
- u64 num_blocks, u64 search_start,
+ u64 num_blocks, u64 empty_size, u64 search_start,
u64 search_end, struct btrfs_key *ins, int data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct buffer_head *buf);
@@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
/* ctree.c */
+int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct buffer_head *buf, struct buffer_head
+ *parent, int parent_slot, struct buffer_head
+ **cow_ret);
int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, u32 data_size);
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct buffer_head *parent,
+ int cache_only);
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
@@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 *hint_block);
+/* tree-defrag.c */
+int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int cache_only);
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60db85bc4ba..c948416fea3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct buffer_head *buf)
{
WARN_ON(atomic_read(&buf->b_count) == 0);
+ lock_buffer(buf);
clear_buffer_dirty(buf);
+ unlock_buffer(buf);
return 0;
}
@@ -294,6 +296,9 @@ static int __setup_root(int blocksize,
root->last_inode_alloc = 0;
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
+ memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
+ root->defrag_running = 0;
+ root->defrag_level = 0;
root->root_key.objectid = objectid;
return 0;
}
@@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
btrfs_transaction_flush_work(root);
mutex_lock(&fs_info->fs_mutex);
+ btrfs_defrag_dirty_roots(root->fs_info);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
/* run commit again to drop the original snapshot */
@@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh)
{
struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
u64 transid = btrfs_header_generation(btrfs_buffer_header(bh));
+
WARN_ON(!atomic_read(&bh->b_count));
+
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
(unsigned long long)bh->b_blocknr,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5d4d5d8db8e..26b8d340649 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,7 +23,8 @@
#include "transaction.h"
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
- *orig_root, u64 num_blocks, u64 search_start,
+ *orig_root, u64 num_blocks, u64 empty_size,
+ u64 search_start,
u64 search_end, u64 hint_block,
struct btrfs_key *ins, u64 exclude_start,
u64 exclude_nr, int data);
@@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0,
+ ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0,
(u64)-1, 0, &ins, 0, 0, 0);
if (ret) {
btrfs_free_path(path);
@@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
struct btrfs_block_group_item *bi;
struct btrfs_key ins;
- ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins,
+ ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins,
0, 0, 0);
/* FIXME, set bit to recalc cache groups on next mount */
if (ret)
@@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
static int try_remove_page(struct address_space *mapping, unsigned long index)
{
int ret;
+ return 0;
ret = invalidate_mapping_pages(mapping, index, index);
return ret;
}
@@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
if (!path)
return -ENOMEM;
- ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
+ ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
if (ret) {
btrfs_free_path(path);
return ret;
@@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
* Any available blocks before search_start are skipped.
*/
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
- *orig_root, u64 num_blocks, u64 search_start, u64
- search_end, u64 hint_block,
+ *orig_root, u64 num_blocks, u64 empty_size,
+ u64 search_start, u64 search_end, u64 hint_block,
struct btrfs_key *ins, u64 exclude_start,
u64 exclude_nr, int data)
{
@@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
data, 1);
}
+ total_needed += empty_size;
path = btrfs_alloc_path();
check_failed:
@@ -1157,9 +1160,11 @@ check_pending:
goto error;
}
search_start = orig_search_start;
- if (wrapped)
+ if (wrapped) {
+ if (!full_scan)
+ total_needed -= empty_size;
full_scan = 1;
- else
+ } else
wrapped = 1;
goto new_group;
}
@@ -1238,9 +1243,11 @@ new_group:
ret = -ENOSPC;
goto error;
}
- if (wrapped)
+ if (wrapped) {
+ if (!full_scan)
+ total_needed -= empty_size;
full_scan = 1;
- else
+ } else
wrapped = 1;
}
block_group = btrfs_lookup_block_group(info, search_start);
@@ -1264,7 +1271,7 @@ error:
*/
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner,
- u64 num_blocks, u64 hint_block,
+ u64 num_blocks, u64 empty_size, u64 hint_block,
u64 search_end, struct btrfs_key *ins, int data)
{
int ret;
@@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
* in the correct block group.
*/
if (data) {
- ret = find_free_extent(trans, root, 0, 0,
+ ret = find_free_extent(trans, root, 0, 0, 0,
search_end, 0, &prealloc_key, 0, 0, 0);
BUG_ON(ret);
if (ret)
@@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
}
/* do the real allocation */
- ret = find_free_extent(trans, root, num_blocks, search_start,
- search_end, hint_block, ins,
+ ret = find_free_extent(trans, root, num_blocks, empty_size,
+ search_start, search_end, hint_block, ins,
exclude_start, exclude_nr, data);
BUG_ON(ret);
if (ret)
@@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
exclude_start = ins->objectid;
exclude_nr = ins->offset;
hint_block = exclude_start + exclude_nr;
- ret = find_free_extent(trans, root, 0, search_start,
+ ret = find_free_extent(trans, root, 0, 0, search_start,
search_end, hint_block,
&prealloc_key, exclude_start,
exclude_nr, 0);
@@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
* returns the tree buffer or NULL.
*/
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 hint)
+ struct btrfs_root *root, u64 hint,
+ u64 empty_size)
{
struct btrfs_key ins;
int ret;
struct buffer_head *buf;
ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
- 1, hint, (unsigned long)-1, &ins, 0);
+ 1, empty_size, hint,
+ (unsigned long)-1, &ins, 0);
if (ret) {
BUG_ON(ret > 0);
return ERR_PTR(ret);
@@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
btrfs_free_extent(trans, root, ins.objectid, 1, 0);
return ERR_PTR(-ENOMEM);
}
+ WARN_ON(buffer_dirty(buf));
set_buffer_uptodate(buf);
set_buffer_checked(buf);
set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
@@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
struct btrfs_key key;
struct btrfs_disk_key *found_key;
struct btrfs_node *node;
+
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+ level = root_item->drop_level;
+ path->lowest_level = level;
wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
+ if (wret < 0) {
ret = wret;
goto out;
}
- level = root_item->drop_level;
node = btrfs_buffer_node(path->nodes[level]);
found_key = &node->ptrs[path->slots[level]].key;
WARN_ON(memcmp(found_key, &root_item->drop_progress,
@@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
ret = wret;
num_walks++;
if (num_walks > 10) {
- struct btrfs_key key;
- btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
ret = -EAGAIN;
get_bh(root->node);
break;
@@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
for (i = 0; i <= orig_level; i++) {
if (path->nodes[i]) {
btrfs_block_release(root, path->nodes[i]);
+ path->nodes[i] = 0;
}
}
out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1fe38fe8415..00b118a2db6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root,
if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
err = btrfs_alloc_extent(trans, root, inode->i_ino,
- num_blocks, hint_block, (u64)-1,
+ num_blocks, 0, hint_block, (u64)-1,
&ins, 1);
if (err)
goto failed_truncate;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3889032fc44..12aa043b9f6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
&alloc_hint);
if (ret)
goto out;
- ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
+ ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
alloc_hint, (u64)-1, &ins, 1);
if (ret)
goto out;
@@ -1360,7 +1360,7 @@ not_found:
if (create & BTRFS_GET_BLOCK_CREATE) {
struct btrfs_key ins;
ret = btrfs_alloc_extent(trans, root, inode->i_ino,
- 1, alloc_hint, (u64)-1,
+ 1, 0, alloc_hint, (u64)-1,
&ins, 1);
if (ret) {
err = ret;
@@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
- subvol = btrfs_alloc_free_block(trans, root, 0);
+ subvol = btrfs_alloc_free_block(trans, root, 0, 0);
if (IS_ERR(subvol))
return PTR_ERR(subvol);
leaf = btrfs_buffer_leaf(subvol);
@@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_vol_args vol_args;
+ struct btrfs_trans_handle *trans;
int ret = 0;
+ int err;
struct btrfs_dir_item *di;
int namelen;
struct btrfs_path *path;
@@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
else
ret = create_snapshot(root, vol_args.name, namelen);
break;
+
+ case BTRFS_IOC_DEFRAG:
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ memset(&root->defrag_progress, 0,
+ sizeof(root->defrag_progress));
+ while (1) {
+ root->defrag_running = 1;
+ err = btrfs_defrag_leaves(trans, root, 0);
+
+ btrfs_end_transaction(trans, root);
+ mutex_unlock(&root->fs_info->fs_mutex);
+
+ btrfs_btree_balance_dirty(root);
+
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ if (err != -EAGAIN)
+ break;
+ }
+ root->defrag_running = 0;
+ btrfs_end_transaction(trans, root);
+ mutex_unlock(&root->fs_info->fs_mutex);
+ ret = 0;
+ break;
default:
return -ENOTTY;
}
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 23bed48c51a..8bc47dec286 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args {
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \
+#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
struct btrfs_ioctl_vol_args)
#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 498626470a0..338a7199363 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep;
static struct workqueue_struct *trans_wq;
#define BTRFS_ROOT_TRANS_TAG 0
+#define BTRFS_ROOT_DEFRAG_TAG 1
static void put_transaction(struct btrfs_transaction *transaction)
{
@@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root)
return 0;
}
+static int record_root_in_trans(struct btrfs_root *root)
+{
+ u64 running_trans_id = root->fs_info->running_transaction->transid;
+ if (root->ref_cows && root->last_trans < running_trans_id) {
+ WARN_ON(root == root->fs_info->extent_root);
+ if (root->root_item.refs != 0) {
+ radix_tree_tag_set(&root->fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ radix_tree_tag_set(&root->fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_DEFRAG_TAG);
+ root->commit_root = root->node;
+ get_bh(root->node);
+ } else {
+ WARN_ON(1);
+ }
+ root->last_trans = running_trans_id;
+ }
+ return 0;
+}
+
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_blocks)
{
struct btrfs_trans_handle *h =
kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
int ret;
- u64 running_trans_id;
mutex_lock(&root->fs_info->trans_mutex);
ret = join_transaction(root);
BUG_ON(ret);
- running_trans_id = root->fs_info->running_transaction->transid;
- if (root != root->fs_info->tree_root && root->last_trans <
- running_trans_id) {
- WARN_ON(root == root->fs_info->extent_root);
- WARN_ON(root->ref_cows != 1);
- if (root->root_item.refs != 0) {
- radix_tree_tag_set(&root->fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- root->commit_root = root->node;
- get_bh(root->node);
- } else {
- WARN_ON(1);
- }
- }
- root->last_trans = running_trans_id;
- h->transid = running_trans_id;
+ record_root_in_trans(root);
+ h->transid = root->fs_info->running_transaction->transid;
h->transaction = root->fs_info->running_transaction;
h->blocks_reserved = num_blocks;
h->blocks_used = 0;
@@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
gang[i]);
if (!page)
continue;
+ if (PageWriteback(page)) {
+ if (PageDirty(page))
+ wait_on_page_writeback(page);
+ else {
+ unlock_page(page);
+ page_cache_release(page);
+ continue;
+ }
+ }
err = write_one_page(page, 0);
if (err)
werr = err;
@@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
return err;
}
+int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
+{
+ struct btrfs_root *gang[1];
+ struct btrfs_root *root;
+ struct btrfs_root *tree_root = info->tree_root;
+ struct btrfs_trans_handle *trans;
+ int i;
+ int ret;
+ int err = 0;
+ u64 last = 0;
+
+ trans = btrfs_start_transaction(tree_root, 1);
+ while(1) {
+ ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
+ (void **)gang, last,
+ ARRAY_SIZE(gang),
+ BTRFS_ROOT_DEFRAG_TAG);
+ if (ret == 0)
+ break;
+ for (i = 0; i < ret; i++) {
+ root = gang[i];
+ last = root->root_key.objectid + 1;
+ radix_tree_tag_clear(&info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_DEFRAG_TAG);
+ if (root->defrag_running)
+ continue;
+
+ while (1) {
+ mutex_lock(&root->fs_info->trans_mutex);
+ record_root_in_trans(root);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ root->defrag_running = 1;
+ err = btrfs_defrag_leaves(trans, root, 1);
+ btrfs_end_transaction(trans, tree_root);
+ mutex_unlock(&info->fs_mutex);
+
+ btrfs_btree_balance_dirty(root);
+
+ mutex_lock(&info->fs_mutex);
+ trans = btrfs_start_transaction(tree_root, 1);
+ if (err != -EAGAIN)
+ break;
+ }
+ root->defrag_running = 0;
+ }
+ }
+ btrfs_end_transaction(trans, tree_root);
+ return err;
+}
+
static int drop_dirty_roots(struct btrfs_root *tree_root,
struct list_head *list)
{
@@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work)
goto out;
}
mutex_unlock(&root->fs_info->trans_mutex);
+ btrfs_defrag_dirty_roots(root->fs_info);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
out:
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ebf44f3e111..8b2714e6556 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay);
void btrfs_init_transaction_sys(void);
void btrfs_exit_transaction_sys(void);
int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list);
+int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
new file mode 100644
index 00000000000..15d0a486fb5
--- /dev/null
+++ b/fs/btrfs/tree-defrag.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+#include "transaction.h"
+
+static void reada_defrag(struct btrfs_root *root,
+ struct btrfs_node *node)
+{
+ int i;
+ u32 nritems;
+ u64 blocknr;
+ int ret;
+
+ nritems = btrfs_header_nritems(&node->header);
+ for (i = 0; i < nritems; i++) {
+ blocknr = btrfs_node_blockptr(node, i);
+ ret = readahead_tree_block(root, blocknr);
+ if (ret)
+ break;
+ }
+}
+
+static int defrag_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int *level,
+ int cache_only)
+{
+ struct buffer_head *next;
+ struct buffer_head *cur;
+ u64 blocknr;
+ int ret = 0;
+
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+
+ while(*level > 0) {
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+ cur = path->nodes[*level];
+
+ if (!cache_only && *level > 1 && path->slots[*level] == 0)
+ reada_defrag(root, btrfs_buffer_node(cur));
+
+ if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
+ WARN_ON(1);
+
+ if (path->slots[*level] >=
+ btrfs_header_nritems(btrfs_buffer_header(cur)))
+ break;
+
+ if (*level == 1) {
+ ret = btrfs_realloc_node(trans, root,
+ path->nodes[*level],
+ cache_only);
+ break;
+ }
+ blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
+ path->slots[*level]);
+
+ if (cache_only) {
+ next = btrfs_find_tree_block(root, blocknr);
+ if (!next || !buffer_uptodate(next) ||
+ buffer_locked(next)) {
+ brelse(next);
+ path->slots[*level]++;
+ continue;
+ }
+ } else {
+ next = read_tree_block(root, blocknr);
+ }
+ ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
+ path->slots[*level], &next);
+ BUG_ON(ret);
+ ret = btrfs_realloc_node(trans, root, next, cache_only);
+ BUG_ON(ret);
+ WARN_ON(*level <= 0);
+ if (path->nodes[*level-1])
+ btrfs_block_release(root, path->nodes[*level-1]);
+ path->nodes[*level-1] = next;
+ *level = btrfs_header_level(btrfs_buffer_header(next));
+ path->slots[*level] = 0;
+ }
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+ btrfs_block_release(root, path->nodes[*level]);
+ path->nodes[*level] = NULL;
+ *level += 1;
+ WARN_ON(ret);
+ return 0;
+}
+
+static int defrag_walk_up(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int *level,
+ int cache_only)
+{
+ int i;
+ int slot;
+ struct btrfs_node *node;
+
+ for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
+ slot = path->slots[i];
+ if (slot < btrfs_header_nritems(
+ btrfs_buffer_header(path->nodes[i])) - 1) {
+ path->slots[i]++;
+ *level = i;
+ node = btrfs_buffer_node(path->nodes[i]);
+ WARN_ON(i == 0);
+ btrfs_disk_key_to_cpu(&root->defrag_progress,
+ &node->ptrs[path->slots[i]].key);
+ root->defrag_level = i;
+ return 0;
+ } else {
+ btrfs_block_release(root, path->nodes[*level]);
+ path->nodes[*level] = NULL;
+ *level = i + 1;
+ }
+ }
+ return 1;
+}
+
+int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int cache_only)
+{
+ struct btrfs_path *path = NULL;
+ struct buffer_head *tmp;
+ int ret = 0;
+ int wret;
+ int level;
+ int orig_level;
+ int i;
+ int num_runs = 0;
+
+ if (root->ref_cows == 0) {
+ goto out;
+ }
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ level = btrfs_header_level(btrfs_buffer_header(root->node));
+ orig_level = level;
+ if (level == 0) {
+ goto out;
+ }
+ if (root->defrag_progress.objectid == 0) {
+ get_bh(root->node);
+ ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
+ BUG_ON(ret);
+ ret = btrfs_realloc_node(trans, root, root->node, cache_only);
+ BUG_ON(ret);
+ path->nodes[level] = root->node;
+ path->slots[level] = 0;
+ } else {
+ level = root->defrag_level;
+ path->lowest_level = level;
+ wret = btrfs_search_slot(trans, root, &root->defrag_progress,
+ path, 0, 1);
+
+ if (wret < 0) {
+ ret = wret;
+ goto out;
+ }
+ while(level > 0 && !path->nodes[level])
+ level--;
+ if (!path->nodes[level]) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ while(1) {
+ wret = defrag_walk_down(trans, root, path, &level, cache_only);
+ if (wret > 0)
+ break;
+ if (wret < 0)
+ ret = wret;
+
+ wret = defrag_walk_up(trans, root, path, &level, cache_only);
+ if (wret > 0)
+ break;
+ if (wret < 0)
+ ret = wret;
+ if (num_runs++ > 8) {
+ ret = -EAGAIN;
+ break;
+ }
+ }
+ for (i = 0; i <= orig_level; i++) {
+ if (path->nodes[i]) {
+ btrfs_block_release(root, path->nodes[i]);
+ path->nodes[i] = 0;
+ }
+ }
+out:
+ if (path)
+ btrfs_free_path(path);
+ if (ret != -EAGAIN) {
+ memset(&root->defrag_progress, 0,
+ sizeof(root->defrag_progress));
+ }
+ return ret;
+}