From e089f05c18ab36ed5fa7e2319052e03ab800d518 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 16:20:31 -0400 Subject: Btrfs: transaction handles everywhere Signed-off-by: Chris Mason --- fs/btrfs/transaction.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 fs/btrfs/transaction.h (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h new file mode 100644 index 00000000000..3adb6e69fe4 --- /dev/null +++ b/fs/btrfs/transaction.h @@ -0,0 +1,27 @@ +#ifndef __TRANSACTION__ +#define __TRANSACTION__ + +struct btrfs_trans_handle { + u64 transid; + unsigned long blocks_reserved; + unsigned long blocks_used; +}; + +static inline struct btrfs_trans_handle * +btrfs_start_transaction(struct btrfs_root *root, int num_blocks) +{ + struct btrfs_trans_handle *h = malloc(sizeof(*h)); + h->transid = root->root_key.offset; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + return h; +} + +static inline void btrfs_free_transaction(struct btrfs_root *root, + struct btrfs_trans_handle *handle) +{ + memset(handle, 0, sizeof(*handle)); + free(handle); +} + +#endif -- cgit v1.2.3-70-g09d2 From e20d96d64f9cf9288ffecc9ad4714e91c3b97ca8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 12:13:20 -0400 Subject: Mountable btrfs, with readdir Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 273 ++++++++++++++++++++++++----------------------- fs/btrfs/ctree.h | 62 ++++++++--- fs/btrfs/dir-item.c | 17 ++- fs/btrfs/disk-io.c | 281 +++++++++++++++---------------------------------- fs/btrfs/disk-io.h | 51 +++++---- fs/btrfs/extent-tree.c | 144 +++++++++++++------------ fs/btrfs/hash.c | 9 ++ fs/btrfs/inode-map.c | 8 +- fs/btrfs/print-tree.c | 20 ++-- fs/btrfs/print-tree.h | 2 +- fs/btrfs/root-tree.c | 4 +- fs/btrfs/super.c | 261 +++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/transaction.h | 4 +- 14 files changed, 651 insertions(+), 490 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 99e45a54ebd..1a0fb742239 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,8 @@ ifneq ($(KERNELRELEASE),) # kbuild part of makefile obj-m := btrfs.o -btrfs-y := super.o +btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + hash.o file-item.o inode-item.o inode-map.o disk-io.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ @@ -14,7 +15,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $(KERNELDIR) M=`pwd` modules + $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules clean:: rm *.o btrfs.ko endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9fbd07c37fd..e690e2bb47d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -7,11 +7,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst, struct btrfs_buffer + *root, struct buffer_head *dst, struct buffer_head *src); static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf); + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -32,32 +32,34 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *buf, struct btrfs_buffer - *parent, int parent_slot, struct btrfs_buffer + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head **cow_ret) { - struct btrfs_buffer *cow; + struct buffer_head *cow; + struct btrfs_node *cow_node; - if (!list_empty(&buf->dirty)) { + if (!buffer_dirty(buf)) { *cow_ret = buf; return 0; } cow = btrfs_alloc_free_block(trans, root); - memcpy(&cow->node, &buf->node, root->blocksize); - btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); + cow_node = btrfs_buffer_node(cow); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); + btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; - cow->count++; + get_bh(cow); if (buf != root->commit_root) - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); btrfs_block_release(root, buf); } else { - btrfs_set_node_blockptr(&parent->node, parent_slot, - cow->blocknr); - BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, + cow->b_blocknr); + BUG_ON(!buffer_dirty(parent)); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -119,12 +121,12 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, { int i; struct btrfs_node *parent = NULL; - struct btrfs_node *node = &path->nodes[level]->node; + struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); int parent_slot; u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(nritems == 0); if (parent) { @@ -148,13 +150,13 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { int i; - struct btrfs_leaf *leaf = &path->nodes[level]->leaf; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); struct btrfs_node *parent = NULL; int parent_slot; u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); @@ -250,11 +252,11 @@ static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) return -1; } -static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, - struct btrfs_buffer *parent_buf, +static struct buffer_head *read_node_slot(struct btrfs_root *root, + struct buffer_head *parent_buf, int slot) { - struct btrfs_node *node = &parent_buf->node; + struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(&node->header)) @@ -265,10 +267,10 @@ static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *right_buf; - struct btrfs_buffer *mid_buf; - struct btrfs_buffer *left_buf; - struct btrfs_buffer *parent_buf = NULL; + struct buffer_head *right_buf; + struct buffer_head *mid_buf; + struct buffer_head *left_buf; + struct buffer_head *parent_buf = NULL; struct btrfs_node *right = NULL; struct btrfs_node *mid; struct btrfs_node *left = NULL; @@ -283,7 +285,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return 0; mid_buf = path->nodes[level]; - mid = &mid_buf->node; + mid = btrfs_buffer_node(mid_buf); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -295,8 +297,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * by promoting the node below to a root */ if (!parent_buf) { - struct btrfs_buffer *child; - u64 blocknr = mid_buf->blocknr; + struct buffer_head *child; + u64 blocknr = mid_buf->b_blocknr; if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -313,7 +315,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = &parent_buf->node; + parent = btrfs_buffer_node(parent_buf); if (btrfs_header_nritems(&mid->header) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -326,7 +328,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (left_buf) { btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, &left_buf); - left = &left_buf->node; + left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) @@ -339,12 +341,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (right_buf) { btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, &right_buf); - right = &right_buf->node; + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->blocknr; + u64 blocknr = right_buf->b_blocknr; btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); right_buf = NULL; @@ -360,7 +362,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -381,7 +383,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->blocknr; + u64 blocknr = mid_buf->b_blocknr; btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); mid_buf = NULL; @@ -396,13 +398,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } /* update the path */ if (left_buf) { if (btrfs_header_nritems(&left->header) > orig_slot) { - left_buf->count++; // released below + get_bh(left_buf); path->nodes[level] = left_buf; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; @@ -415,8 +417,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* double check we haven't messed things up */ check_block(root, path, level); - if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, - path->slots[level])) + if (orig_ptr != + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), + path->slots[level])) BUG(); if (right_buf) @@ -443,8 +446,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_buffer *b; - struct btrfs_buffer *cow_buf; + struct buffer_head *b; + struct buffer_head *cow_buf; struct btrfs_node *c; int slot; int ret; @@ -452,18 +455,20 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root again: b = root->node; - b->count++; + get_bh(b); while (b) { - level = btrfs_header_level(&b->node.header); + c = btrfs_buffer_node(b); + level = btrfs_header_level(&c->header); if (cow) { int wret; - wret = btrfs_cow_block(trans, root, b, p->nodes[level + - 1], p->slots[level + 1], + wret = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &cow_buf); b = cow_buf; } BUG_ON(!cow && ins_len); - c = &b->node; + c = btrfs_buffer_node(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -480,7 +485,7 @@ again: if (sret) return sret; b = p->nodes[level]; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -490,7 +495,7 @@ again: b = p->nodes[level]; if (!b) goto again; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } @@ -505,11 +510,9 @@ again: if (sret) return sret; } - BUG_ON(root->node->count == 1); return ret; } } - BUG_ON(root->node->count == 1); return 1; } @@ -534,9 +537,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = &path->nodes[i]->node; + t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(list_empty(&path->nodes[i]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[i])); if (tslot != 0) break; } @@ -551,11 +554,11 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst_buf, struct - btrfs_buffer *src_buf) + *root, struct buffer_head *dst_buf, struct + buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; @@ -580,8 +583,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -595,11 +598,11 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf) + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; @@ -628,8 +631,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -643,7 +646,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *lower; struct btrfs_node *c; struct btrfs_disk_key *lower_key; @@ -652,24 +655,24 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root); - c = &t->node; + c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->blocknr); + btrfs_set_header_blocknr(&c->header, t->b_blocknr); btrfs_set_header_parentid(&c->header, - btrfs_header_parentid(&root->node->node.header)); - lower = &path->nodes[level-1]->node; + btrfs_header_parentid(btrfs_buffer_header(root->node))); + lower = btrfs_buffer_node(path->nodes[level-1]); if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); + btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; - t->count++; + get_bh(t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -692,7 +695,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root int nritems; BUG_ON(!path->nodes[level]); - lower = &path->nodes[level]->node; + lower = btrfs_buffer_node(path->nodes[level]); nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); @@ -705,7 +708,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(list_empty(&path->nodes[level]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[level])); return 0; } @@ -721,9 +724,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *c; - struct btrfs_buffer *split_buffer; + struct buffer_head *split_buffer; struct btrfs_node *split; int mid; int ret; @@ -731,7 +734,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root u32 c_nritems; t = path->nodes[level]; - c = &t->node; + c = btrfs_buffer_node(t); if (t == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); @@ -740,11 +743,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root); - split = &split_buffer->node; + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); + btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_parentid(&split->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; memcpy(split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -752,9 +755,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(list_empty(&t->dirty)); + BUG_ON(!buffer_dirty(t)); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->blocknr, path->slots[level + 1] + 1, + split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -798,11 +801,12 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *left_buf = path->nodes[0]; - struct btrfs_leaf *left = &left_buf->leaf; + struct buffer_head *left_buf = path->nodes[0]; + struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); struct btrfs_leaf *right; - struct btrfs_buffer *right_buf; - struct btrfs_buffer *upper; + struct buffer_head *right_buf; + struct buffer_head *upper; + struct btrfs_node *upper_node; int slot; int i; int free_space; @@ -817,12 +821,13 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { + upper_node = btrfs_buffer_node(upper); + if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { return 1; } - right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, - slot + 1)); - right = &right_buf->leaf; + right_buf = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -830,7 +835,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); - right = &right_buf->leaf; + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -881,11 +886,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(list_empty(&left_buf->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); - memcpy(&upper->node.ptrs[slot + 1].key, + BUG_ON(!buffer_dirty(left_buf)); + BUG_ON(!buffer_dirty(right_buf)); + memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&upper->dirty)); + BUG_ON(!buffer_dirty(upper)); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -905,9 +910,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *right_buf = path->nodes[0]; - struct btrfs_leaf *right = &right_buf->leaf; - struct btrfs_buffer *t; + struct buffer_head *right_buf = path->nodes[0]; + struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); + struct buffer_head *t; struct btrfs_leaf *left; int slot; int i; @@ -926,9 +931,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[1]) { return 1; } - t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, - slot - 1)); - left = &t->leaf; + t = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -937,7 +942,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); - left = &t->leaf; + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -999,8 +1004,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(list_empty(&t->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); + BUG_ON(!buffer_dirty(t)); + BUG_ON(!buffer_dirty(right_buf)); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1029,13 +1034,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *l_buf; + struct buffer_head *l_buf; struct btrfs_leaf *l; u32 nritems; int mid; int slot; struct btrfs_leaf *right; - struct btrfs_buffer *right_buffer; + struct buffer_head *right_buffer; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1053,7 +1058,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return wret; } l_buf = path->nodes[0]; - l = &l_buf->leaf; + l = btrfs_buffer_leaf(l_buf); /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1071,7 +1076,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); - right = &right_buffer->leaf; + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); if (mid <= slot) { /* FIXME, just alloc a new leaf here */ @@ -1085,10 +1090,10 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); } btrfs_set_header_nritems(&right->header, nritems - mid); - btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); + btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); memcpy(right->items, l->items + mid, @@ -1107,11 +1112,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->blocknr, path->slots[1] + 1, 1); + right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(list_empty(&right_buffer->dirty)); - BUG_ON(list_empty(&l_buf->dirty)); + BUG_ON(!buffer_dirty(right_buffer)); + BUG_ON(!buffer_dirty(l_buf)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1136,7 +1141,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int slot; int slot_orig; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1156,7 +1161,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root slot_orig = path->slots[0]; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(root, leaf); @@ -1202,7 +1207,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(list_empty(&leaf_buf->dirty)); + BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1225,7 +1230,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], u8); memcpy(ptr, data, data_size); } btrfs_release_path(root, &path); @@ -1243,12 +1249,12 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { struct btrfs_node *node; - struct btrfs_buffer *parent = path->nodes[level]; + struct buffer_head *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = &parent->node; + node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->ptrs + slot, node->ptrs + slot + 1, @@ -1257,16 +1263,17 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems--; btrfs_set_header_nritems(&node->header, nritems); if (nritems == 0 && parent == root->node) { - BUG_ON(btrfs_header_level(&root->node->node.header) != 1); + struct btrfs_header *header = btrfs_buffer_header(root->node); + BUG_ON(btrfs_header_level(header) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(&root->node->node.header, 0); + btrfs_set_header_level(header, 0); } else if (slot == 0) { wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, level + 1); if (wret) ret = wret; } - BUG_ON(list_empty(&parent->dirty)); + BUG_ON(!buffer_dirty(parent)); return ret; } @@ -1279,7 +1286,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, { int slot; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; int doff; int dsize; int ret = 0; @@ -1287,7 +1294,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 nritems; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); slot = path->slots[0]; doff = btrfs_item_offset(leaf->items + slot); dsize = btrfs_item_size(leaf->items + slot); @@ -1313,14 +1320,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (nritems == 0) { if (leaf_buf == root->node) { btrfs_set_header_level(&leaf->header, 0); - BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->blocknr, 1, 1); + leaf_buf->b_blocknr, 1, 1); if (wret) ret = wret; } @@ -1332,7 +1338,6 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { @@ -1341,7 +1346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - leaf_buf->count++; + get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); if (wret < 0) ret = wret; @@ -1352,7 +1357,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->blocknr; + u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) @@ -1380,19 +1385,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct btrfs_buffer *c; - struct btrfs_buffer *next = NULL; + struct buffer_head *c; + struct btrfs_node *c_node; + struct buffer_head *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; - if (slot >= btrfs_header_nritems(&c->node.header)) { + c_node = btrfs_buffer_node(c); + if (slot >= btrfs_header_nritems(&c_node->header)) { level++; continue; } - blocknr = btrfs_node_blockptr(&c->node, slot); + blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); next = read_tree_block(root, blocknr); @@ -1408,7 +1415,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!level) break; next = read_tree_block(root, - btrfs_node_blockptr(&next->node, 0)); + btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ae8518cb94b..7748eecd930 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,6 +1,9 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include +#include + struct btrfs_trans_handle; #define BTRFS_MAGIC "_BtRfS_M" @@ -10,6 +13,12 @@ struct btrfs_trans_handle; #define BTRFS_INODE_MAP_OBJECTID 3 #define BTRFS_FS_TREE_OBJECTID 4 +/* + * we can actually store much bigger names, but lets not confuse the rest + * of linux + */ +#define BTRFS_NAME_LEN 255 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -57,7 +66,7 @@ struct btrfs_header { #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) -struct btrfs_buffer; +struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -120,7 +129,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct btrfs_buffer *nodes[BTRFS_MAX_LEVEL]; + struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; }; @@ -211,17 +220,14 @@ struct btrfs_fs_info { struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; - struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - int cache_size; - int fp; struct btrfs_trans_handle *running_transaction; struct btrfs_super_block *disk_super; + struct buffer_head *sb_buffer; + struct super_block *sb; }; /* @@ -230,8 +236,8 @@ struct btrfs_fs_info { * only for the extent tree. */ struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; + struct buffer_head *node; + struct buffer_head *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -389,6 +395,29 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, i->compat_flags = cpu_to_le16(val); } +static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->sec); +} + +static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->sec = cpu_to_le32(val); +} + +static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->nsec); +} + +static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->nsec = cpu_to_le32(val); +} + + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { @@ -757,15 +786,20 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline struct btrfs_root *btrfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root @@ -783,7 +817,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap); + *root, struct buffer_head *snap); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -800,8 +834,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod); + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 4d8083d92fa..75d6e373e98 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -18,12 +18,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - if (name_len == 1 && *name == '.') - key.offset = 1; - else if (name_len == 2 && name[0] == '.' && name[1] == '.') - key.offset = 2; - else - ret = btrfs_name_hash(name, name_len, &key.offset); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; @@ -31,7 +26,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) goto out; - dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); @@ -45,8 +41,8 @@ out: } int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod) + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod) { int ret; struct btrfs_key key; @@ -69,7 +65,8 @@ int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_dir_item *dir_item; char *name_ptr; - dir_item = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); if (btrfs_dir_name_len(dir_item) != name_len) return 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05637f9fd7c..df2061a735c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,165 +1,67 @@ -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -static int allocated_blocks = 0; -int cache_max = 10000; - -static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) + struct btrfs_node *node = btrfs_buffer_node(buf); + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); - if (root->node && btrfs_header_parentid(&buf->node.header) != - btrfs_header_parentid(&root->node->node.header)) + if (root->node && btrfs_header_parentid(&node->header) != + btrfs_header_parentid(btrfs_buffer_header(root->node))) BUG(); return 0; } -static int free_some_buffers(struct btrfs_root *root) +struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct list_head *node, *next; - struct btrfs_buffer *b; - if (root->fs_info->cache_size < cache_max) - return 0; - list_for_each_safe(node, next, &root->fs_info->cache) { - b = list_entry(node, struct btrfs_buffer, cache); - if (b->count == 1) { - BUG_ON(!list_empty(&b->dirty)); - list_del_init(&b->cache); - btrfs_block_release(root, b); - if (root->fs_info->cache_size < cache_max) - break; - } - } - return 0; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - int ret; - - buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); - if (!buf) - return buf; - allocated_blocks++; - buf->blocknr = blocknr; - buf->count = 2; - INIT_LIST_HEAD(&buf->dirty); - free_some_buffers(root); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); - radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->fs_info->cache); - root->fs_info->cache_size++; - if (ret) { - free(buf); - return NULL; - } - return buf; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) { - BUG(); - return NULL; - } - } - return buf; -} - -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) -{ - loff_t offset = blocknr * root->blocksize; - struct btrfs_buffer *buf; - int ret; + struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fs_info->fp, &buf->node, root->blocksize, - offset); - if (ret != root->blocksize) { - free(buf); - return NULL; - } - } + if (!buf) + return buf; if (check_tree_block(root, buf)) BUG(); return buf; } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) - return 0; - list_add_tail(&buf->dirty, &root->fs_info->trans); - buf->count++; + mark_buffer_dirty(buf); return 0; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) { - list_del_init(&buf->dirty); - btrfs_block_release(root, buf); - } + clear_buffer_dirty(buf); return 0; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - u64 blocknr = buf->blocknr; - loff_t offset = blocknr * root->blocksize; - int ret; - - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) - BUG(); - ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); - if (ret != root->blocksize) - return ret; + mark_buffer_dirty(buf); return 0; } static int __commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_buffer *b; - int ret = 0; - int wret; - while(!list_empty(&root->fs_info->trans)) { - b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, - dirty); - list_del_init(&b->dirty); - wret = write_tree_block(trans, root, b); - if (wret) - ret = wret; - btrfs_block_release(root, b); - } - return ret; + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; } static int commit_tree_roots(struct btrfs_trans_handle *trans, @@ -172,17 +74,17 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *inode_root = fs_info->inode_root; btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->blocknr); + inode_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &inode_root->root_key, &inode_root->root_item); BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->blocknr) + if (old_extent_block == extent_root->node->b_blocknr) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->blocknr); + extent_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -195,7 +97,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; - struct btrfs_buffer *snap = root->commit_root; + struct buffer_head *snap = root->commit_root; struct btrfs_key snap_key; if (root->commit_root == root->node) @@ -204,7 +106,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; - btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); @@ -220,7 +122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); @@ -234,7 +136,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, - u64 objectid, int fp) + u64 objectid) { root->node = NULL; root->commit_root = NULL; @@ -250,11 +152,11 @@ static int find_and_setup_root(struct btrfs_super_block *super, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, - struct btrfs_root *root, int fp) + struct btrfs_root *root) { int ret; - __setup_root(super, root, fs_info, objectid, fp); + __setup_root(super, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -265,32 +167,26 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super) { - int fp; - - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - return NULL; - } - return open_ctree_fd(fp, super); -} - -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) -{ - struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); + struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + GFP_NOFS); int ret; - INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + /* FIXME: don't be stupid */ + if (!btrfs_super_root(disk_super)) + return NULL; INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); - INIT_LIST_HEAD(&fs_info->trans); - INIT_LIST_HEAD(&fs_info->cache); - fs_info->cache_size = 0; - fs_info->fp = fp; fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -298,36 +194,31 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = super; + fs_info->disk_super = disk_super; + fs_info->sb_buffer = sb_buffer; + fs_info->sb = sb; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret == 0 || btrfs_super_root(super) == 0) { - BUG(); - return NULL; - } - BUG_ON(ret < 0); - - __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + tree_root->node = read_tree_block(tree_root, + btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; return root; @@ -336,8 +227,11 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { + return 0; +#if 0 int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { @@ -345,35 +239,38 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } return 0; +#endif } static int drop_cache(struct btrfs_root *root) { + return 0; +#if 0 while(!list_empty(&root->fs_info->cache)) { - struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, - struct btrfs_buffer, + struct buffer_head *b = list_entry(root->fs_info->cache.next, + struct buffer_head, cache); list_del_init(&b->cache); btrfs_block_release(root, b); } return 0; +#endif } -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) + +int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, s); + btrfs_commit_transaction(trans, root, root->fs_info->disk_super); ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); ret = __commit_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root, s); + write_ctree_super(trans, root, root->fs_info->disk_super); drop_cache(root); - BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); if (root->fs_info->extent_root->node) @@ -386,29 +283,17 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) btrfs_block_release(root->fs_info->tree_root, root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); - free(root); - printf("on close %d blocks are allocated\n", allocated_blocks); + btrfs_block_release(root, root->fs_info->sb_buffer); + kfree(root->fs_info->extent_root); + kfree(root->fs_info->inode_root); + kfree(root->fs_info->tree_root); + kfree(root->fs_info); + kfree(root); return 0; } -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - buf->count--; - if (buf->count < 0) - BUG(); - if (buf->count == 0) { - BUG_ON(!list_empty(&buf->cache)); - BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->fs_info->cache_radix, - buf->blocknr)) - BUG(); - radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); - memset(buf, 0, sizeof(*buf)); - free(buf); - BUG_ON(allocated_blocks == 0); - allocated_blocks--; - BUG_ON(root->fs_info->cache_size == 0); - root->fs_info->cache_size--; - } + brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index d888cf5c350..7f4bb729b73 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,36 +1,41 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include "list.h" -struct btrfs_buffer { - u64 blocknr; - int count; - struct list_head dirty; - struct list_head cache; - union { - struct btrfs_node node; - struct btrfs_leaf leaf; - }; -}; +#include -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); +#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) + +static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) +{ + return (struct btrfs_node *)bh->b_data; +} + +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) +{ + return (struct btrfs_leaf *)bh->b_data; +} + +static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) +{ + return &((struct btrfs_node *)bh->b_data)->header; +} + +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_buffer *buf); + struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super); -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super); +int close_ctree(struct btrfs_root *root); +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); int mkfs(int fd, u64 num_blocks, u32 blocksize); - -#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) - #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53a7550b5c1..e3af2c03568 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10,9 +10,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root search_end, struct btrfs_key *ins); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root); - +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent * map while trying to grow the map because of other allocations. To avoid @@ -21,6 +20,7 @@ static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root * manner for deletes. */ #define CTREE_EXTENT_PENDING_DEL 0 +#define CTREE_EXTENT_PINNED 1 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr) @@ -45,15 +45,14 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) BUG(); BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - BUG_ON(list_empty(&path.nodes[0]->dirty)); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); - run_pending(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); return 0; } @@ -74,7 +73,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root 0, 0); if (ret != 0) BUG(); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); btrfs_release_path(root->fs_info->extent_root, &path); @@ -82,18 +81,20 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { u64 blocknr; + struct btrfs_node *buf_node; int i; if (!root->ref_cows) return 0; - if (btrfs_is_leaf(&buf->node)) + buf_node = btrfs_buffer_node(buf); + if (btrfs_is_leaf(buf_node)) return 0; - for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { - blocknr = btrfs_node_blockptr(&buf->node, i); + for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + blocknr = btrfs_node_blockptr(buf_node, i); inc_block_ref(trans, root, blocknr); } return 0; @@ -108,9 +109,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix, + ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix, (void **)gang, 0, - ARRAY_SIZE(gang)); + ARRAY_SIZE(gang), + CTREE_EXTENT_PINNED); if (!ret) break; if (!first) @@ -137,7 +139,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, - btrfs_header_parentid(&extent_root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(extent_root->node))); ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); @@ -156,11 +158,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } +static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) +{ + int err; + err = radix_tree_insert(&root->fs_info->pinned_radix, + blocknr, (void *)blocknr); + BUG_ON(err); + if (err) + return err; + radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, + tag); + return 0; +} + /* * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; struct btrfs_key key; @@ -171,7 +186,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key ins; u32 refs; - BUG_ON(pin && num_blocks != 1); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -186,26 +200,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; - if (pin) { - int err; - radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert(&info->pinned_radix, - blocknr, (void *)blocknr); - BUG_ON(err); - radix_tree_preload_end(); - } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (!pin && extent_root->fs_info->last_insert.objectid > + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) @@ -224,39 +230,32 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - struct btrfs_buffer *gang[4]; + int wret; + int err = 0; + unsigned long gang[4]; int i; + struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; while(1) { ret = radix_tree_gang_lookup_tag( - &extent_root->fs_info->cache_radix, + &extent_root->fs_info->pinned_radix, (void **)gang, 0, ARRAY_SIZE(gang), CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(trans, extent_root, - gang[i]->blocknr, 1, 1); - radix_tree_tag_clear(&extent_root->fs_info->cache_radix, - gang[i]->blocknr, + radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i], CTREE_EXTENT_PENDING_DEL); - btrfs_block_release(extent_root, gang[i]); + wret = __free_extent(trans, extent_root, gang[i], 1); + if (wret) + err = wret; } } - return 0; + return err; } -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root) -{ - while(radix_tree_tagged(&extent_root->fs_info->cache_radix, - CTREE_EXTENT_PENDING_DEL)) - del_pending_extents(trans, extent_root); - return 0; -} - - /* * remove an extent from the root, returns 0 on success */ @@ -264,18 +263,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_buffer *t; + struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { t = find_tree_block(root, blocknr); - radix_tree_tag_set(&root->fs_info->cache_radix, blocknr, - CTREE_EXTENT_PENDING_DEL); + pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); - pending_ret = run_pending(trans, root->fs_info->extent_root); + if (pin) { + ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + ret = __free_extent(trans, root, blocknr, num_blocks); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -296,14 +298,16 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block; + u64 last_block = 0; u64 test_block; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; int total_needed = num_blocks; + int level; - total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; + level = btrfs_header_level(btrfs_buffer_header(root->node)); + total_needed += (level + 1) * 3; if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; @@ -323,7 +327,7 @@ check_failed: path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -429,7 +433,7 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root sizeof(extent_item)); finish_current_insert(trans, extent_root); - pending_ret = run_pending(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); if (ret) return ret; if (pending_ret) @@ -441,16 +445,15 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_key ins; int ret; - struct btrfs_buffer *buf; + struct buffer_head *buf; ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, - btrfs_header_parentid(&root->node->node.header), - &ins); + btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); return NULL; @@ -467,13 +470,13 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct btrfs_buffer *next; - struct btrfs_buffer *cur; + struct buffer_head *next; + struct buffer_head *cur; u64 blocknr; int ret; u32 refs; - ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, + ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, &refs); BUG_ON(ret); if (refs > 1) @@ -484,9 +487,10 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root while(*level > 0) { cur = path->nodes[*level]; if (path->slots[*level] >= - btrfs_header_nritems(&cur->node.header)) + btrfs_header_nritems(btrfs_buffer_header(cur))) break; - blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); ret = lookup_block_ref(trans, root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; @@ -499,12 +503,12 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(&next->node.header); + *level = btrfs_header_level(btrfs_buffer_header(next)); path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, - 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, + 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -525,14 +529,14 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < - btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { path->slots[i]++; *level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->blocknr, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; @@ -549,7 +553,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root * decremented. */ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap) + *root, struct buffer_head *snap) { int ret = 0; int wret; @@ -560,7 +564,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); - level = btrfs_header_level(&snap->node.header); + level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; path.nodes[level] = snap; path.slots[level] = 0; diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 6c2a71a46c7..22519b8e0cf 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -10,6 +10,7 @@ * License. */ +#include #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) @@ -63,6 +64,14 @@ int btrfs_name_hash(const char *name, int len, u64 *hash_result) const char *p; __u32 in[8], buf[2]; + if (len == 1 && *name == '.') { + *hash_result = 1; + return 0; + } else if (len == 2 && name[0] == '.' && name[1] == '.') { + *hash_result = 2; + return 0; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c7fda3bf7b2..c45aec258bd 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -15,7 +15,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int ret; u64 hole_size = 0; int slot = 0; - u64 last_ino; + u64 last_ino = 0; int start_found; struct btrfs_leaf *l; struct btrfs_root *root = fs_root->fs_info->inode_root; @@ -40,7 +40,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -105,8 +105,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, if (ret) goto out; - inode_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_inode_map_item); + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); out: btrfs_release_path(inode_root, &path); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index aa2d3fac880..c8ee938c125 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -17,7 +17,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); - fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); @@ -67,10 +66,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_leaf_data(l) + btrfs_item_offset(item)); break; }; - fflush(stdout); } } -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) + +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) { int i; u32 nr; @@ -78,16 +77,16 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) if (!t) return; - c = &t->node; + c = btrfs_buffer_node(t); nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printk("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, - btrfs_header_level(&c->header), nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); - fflush(stdout); + printk("node %Lu level %d total ptrs %d free spc %u\n", + btrfs_header_blocknr(&c->header), + btrfs_header_level(&c->header), nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { printk("\tkey %d (%Lu %u %Lu) block %Lu\n", i, @@ -95,12 +94,11 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) c->ptrs[i].key.flags, c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); - fflush(stdout); } for (i = 0; i < nr; i++) { - struct btrfs_buffer *next_buf = read_tree_block(root, + struct buffer_head *next_buf = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = &next_buf->node; + struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) BUG(); diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 0882ca904ec..396041a05cf 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,5 +1,5 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 52c83be4b30..a4554c007ef 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -21,7 +21,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, if (ret < 0) goto out; BUG_ON(ret == 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); BUG_ON(path.slots[0] == 0); slot = path.slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { @@ -51,7 +51,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ae76044aea..ccc056aad69 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -8,13 +9,18 @@ #include #include #include "ctree.h" +#include "disk-io.h" #define BTRFS_SUPER_MAGIC 0x9123682E + +static struct inode_operations btrfs_dir_inode_operations; +static struct super_operations btrfs_super_ops; +static struct file_operations btrfs_dir_file_operations; + #if 0 /* some random number */ static struct super_operations ramfs_ops; -static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ @@ -129,46 +135,243 @@ static struct inode_operations ramfs_dir_inode_operations = { }; #endif -struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +static void btrfs_read_locked_inode(struct inode *inode) { - struct inode * inode = new_inode(sb); + struct btrfs_path path; + struct btrfs_inode_item *inode_item; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; +printk("read locked inode %lu\n", inode->i_ino); + btrfs_init_path(&path); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + if (ret) { + make_bad_inode(inode); + return; + } + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_inode_item); - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +printk("found locked inode %lu\n", inode->i_ino); + inode->i_mode = btrfs_inode_mode(inode_item); + inode->i_nlink = btrfs_inode_nlink(inode_item); + inode->i_uid = btrfs_inode_uid(inode_item); + inode->i_gid = btrfs_inode_gid(inode_item); + inode->i_size = btrfs_inode_size(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); + inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); + inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); + inode->i_blocks = btrfs_inode_nblocks(inode_item); + inode->i_generation = btrfs_inode_generation(inode_item); +printk("about to release\n"); + btrfs_release_path(root, &path); + switch (inode->i_mode & S_IFMT) { +#if 0 + default: + init_special_inode(inode, inode->i_mode, + btrfs_inode_rdev(inode_item)); + break; +#endif + case S_IFREG: +printk("inode %lu now a file\n", inode->i_ino); + break; + case S_IFDIR: +printk("inode %lu now a directory\n", inode->i_ino); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + break; + case S_IFLNK: +printk("inode %lu now a link\n", inode->i_ino); + // inode->i_op = &page_symlink_inode_operations; + break; } - return inode; +printk("returning!\n"); + return; } -static struct super_operations btrfs_ops = { - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, -}; +static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ino_t *ino) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct btrfs_dir_item *di; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + int ret; + + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + namelen, 0); + if (ret) { + *ino = 0; + goto out; + } + di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_dir_item); + *ino = btrfs_dir_objectid(di); +out: + btrfs_release_path(root, &path); + return ret; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode * inode; + ino_t ino; + int ret; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ret = btrfs_inode_by_name(dir, dentry, &ino); + if (ret < 0) + return ERR_PTR(ret); + inode = NULL; + if (ino) { +printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); + inode = iget(dir->i_sb, ino); + if (!inode) + return ERR_PTR(-EACCES); + } + return d_splice_alias(inode, dentry); +} + +static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + int advance; + unsigned char d_type = DT_UNKNOWN; + int over; + + key.objectid = inode->i_ino; +printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.offset = filp->f_pos; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + goto err; + } +printk("first ret %d\n", ret); + advance = filp->f_pos > 0 && ret != 0; + while(1) { + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + if (advance) { +printk("advancing!\n"); + if (slot == nritems -1) { + ret = btrfs_next_leaf(root, &path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + } else { + slot++; + path.slots[0]++; + } + } + advance = 1; + item = leaf->items + slot; +printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), + btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); + if (btrfs_disk_key_objectid(&item->key) != key.objectid) + break; + if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) + continue; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); +printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), + (const char *)(di + 1), btrfs_dir_objectid(di)); + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_dir_objectid(di), d_type); + if (over) + break; + filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; + } +printk("filldir all done\n"); + ret = 0; +err: + btrfs_release_path(root, &path); + return ret; +} + +static void btrfs_put_super (struct super_block * sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + int ret; + + ret = close_ctree(root); + if (ret) { + printk("close ctree returns %d\n", ret); + } + sb->s_fs_info = NULL; +} static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; - struct dentry * root; + struct dentry * root_dentry; + struct btrfs_super_block *disk_super; + struct buffer_head *bh; + struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; - sb->s_op = &btrfs_ops; + sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + + bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); + if (!bh) { + printk("btrfs: unable to read on disk super\n"); + return -EIO; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + root = open_ctree(sb, bh, disk_super); + sb->s_fs_info = root; + if (!root) { + printk("btrfs: open_ctree failed\n"); + return -EIO; + } + printk("read in super total blocks %Lu root %Lu\n", + btrfs_super_total_blocks(disk_super), + btrfs_super_root_dir(disk_super)); + + inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); if (!inode) return -ENOMEM; + if (inode->i_state & I_NEW) { + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } - root = d_alloc_root(inode); - if (!root) { + root_dentry = d_alloc_root(inode); + if (!root_dentry) { iput(inode); return -ENOMEM; } - sb->s_root = root; + sb->s_root = root_dentry; + return 0; } @@ -187,6 +390,24 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static struct super_operations btrfs_super_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .put_super = btrfs_put_super, + .read_inode = btrfs_read_locked_inode, +}; + +static struct inode_operations btrfs_dir_inode_operations = { + .lookup = btrfs_lookup, +}; + +static struct file_operations btrfs_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = btrfs_readdir, +}; + + static int __init init_btrfs_fs(void) { printk("btrfs loaded!\n"); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3adb6e69fe4..9ab27b7491c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -10,7 +10,7 @@ struct btrfs_trans_handle { static inline struct btrfs_trans_handle * btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = malloc(sizeof(*h)); + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); h->transid = root->root_key.offset; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -21,7 +21,7 @@ static inline void btrfs_free_transaction(struct btrfs_root *root, struct btrfs_trans_handle *handle) { memset(handle, 0, sizeof(*handle)); - free(handle); + kfree(handle); } #endif -- cgit v1.2.3-70-g09d2 From 79154b1b5bcf87903db7ff16a30b360b78d6fe4f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 15:59:16 -0400 Subject: Btrfs: transaction rework Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 115 +++------------------------- fs/btrfs/disk-io.h | 8 +- fs/btrfs/super.c | 18 ----- fs/btrfs/transaction.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.h | 35 +++++---- 7 files changed, 235 insertions(+), 146 deletions(-) create mode 100644 fs/btrfs/transaction.c (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 1a0fb742239..be7d74cdca0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ - hash.o file-item.o inode-item.o inode-map.o disk-io.o + hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7748eecd930..4c19a3f12af 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -5,6 +5,7 @@ #include struct btrfs_trans_handle; +struct btrfs_transaction; #define BTRFS_MAGIC "_BtRfS_M" @@ -224,10 +225,11 @@ struct btrfs_fs_info { u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - struct btrfs_trans_handle *running_transaction; + struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; struct super_block *sb; + struct mutex trans_mutex; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index df2061a735c..9cacca0c525 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,89 +50,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - mark_buffer_dirty(buf); - return 0; -} - -static int __commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root) -{ - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); - return 0; -} - -static int commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - int ret; - u64 old_extent_block; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *inode_root = fs_info->inode_root; - - btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &inode_root->root_key, - &inode_root->root_item); - BUG_ON(ret); - while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) - break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); - BUG_ON(ret); - } - return 0; -} - -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_super_block *s) -{ - int ret = 0; - struct buffer_head *snap = root->commit_root; - struct btrfs_key snap_key; - - if (root->commit_root == root->node) - return 0; - - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; - - btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); - - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - - ret = __commit_transaction(trans, root); - BUG_ON(ret); - - write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); - - root->commit_root = root->node; - get_bh(root->node); - ret = btrfs_drop_snapshot(trans, root, snap); - BUG_ON(ret); - - ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - - return ret; -} - static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -197,6 +114,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->disk_super = disk_super; fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + mutex_init(&fs_info->trans_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); @@ -225,7 +143,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s) + *root) { return 0; #if 0 @@ -242,34 +160,19 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root #endif } -static int drop_cache(struct btrfs_root *root) -{ - return 0; -#if 0 - while(!list_empty(&root->fs_info->cache)) { - struct buffer_head *b = list_entry(root->fs_info->cache.next, - struct buffer_head, - cache); - list_del_init(&b->cache); - btrfs_block_release(root, b); - } - return 0; -#endif -} - int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; - trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, root->fs_info->disk_super); - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - ret = __commit_transaction(trans, root); + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + /* run commit again to drop the original snapshot */ + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - write_ctree_super(trans, root, root->fs_info->disk_super); - drop_cache(root); + write_ctree_super(NULL, root); if (root->node) btrfs_block_release(root, root->node); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7f4bb729b73..099f7eea0ec 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -28,14 +28,14 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s); +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct btrfs_root *open_ctree(struct super_block *sb, struct buffer_head *sb_buffer, struct btrfs_super_block *disk_super); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); -int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_super_block *s); +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int mkfs(int fd, u64 num_blocks, u32 blocksize); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ccc056aad69..62043082584 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -141,7 +141,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; -printk("read locked inode %lu\n", inode->i_ino); btrfs_init_path(&path); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { @@ -152,7 +151,6 @@ printk("read locked inode %lu\n", inode->i_ino); path.slots[0], struct btrfs_inode_item); -printk("found locked inode %lu\n", inode->i_ino); inode->i_mode = btrfs_inode_mode(inode_item); inode->i_nlink = btrfs_inode_nlink(inode_item); inode->i_uid = btrfs_inode_uid(inode_item); @@ -166,7 +164,6 @@ printk("found locked inode %lu\n", inode->i_ino); inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); -printk("about to release\n"); btrfs_release_path(root, &path); switch (inode->i_mode & S_IFMT) { #if 0 @@ -176,19 +173,15 @@ printk("about to release\n"); break; #endif case S_IFREG: -printk("inode %lu now a file\n", inode->i_ino); break; case S_IFDIR: -printk("inode %lu now a directory\n", inode->i_ino); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; break; case S_IFLNK: -printk("inode %lu now a link\n", inode->i_ino); // inode->i_op = &page_symlink_inode_operations; break; } -printk("returning!\n"); return; } @@ -232,7 +225,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); inode = NULL; if (ino) { -printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); @@ -257,7 +249,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int over; key.objectid = inode->i_ino; -printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = filp->f_pos; @@ -266,15 +257,12 @@ printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); if (ret < 0) { goto err; } -printk("first ret %d\n", ret); advance = filp->f_pos > 0 && ret != 0; while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); if (advance) { -printk("advancing!\n"); if (slot == nritems -1) { ret = btrfs_next_leaf(root, &path); if (ret) @@ -282,7 +270,6 @@ printk("advancing!\n"); leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); } else { slot++; path.slots[0]++; @@ -290,15 +277,11 @@ printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slo } advance = 1; item = leaf->items + slot; -printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) continue; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); -printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), - (const char *)(di + 1), btrfs_dir_objectid(di)); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), @@ -307,7 +290,6 @@ printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), break; filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; } -printk("filldir all done\n"); ret = 0; err: btrfs_release_path(root, &path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c new file mode 100644 index 00000000000..8dc1c170f10 --- /dev/null +++ b/fs/btrfs/transaction.c @@ -0,0 +1,199 @@ +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" + + +static void put_transaction(struct btrfs_transaction *transaction) +{ + transaction->use_count--; + if (transaction->use_count == 0) + kfree(transaction); +} + +static int join_transaction(struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans; + cur_trans = root->fs_info->running_transaction; + if (!cur_trans) { + cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + BUG_ON(!cur_trans); + root->fs_info->running_transaction = cur_trans; + cur_trans->num_writers = 0; + cur_trans->transid = root->root_key.offset + 1; + init_waitqueue_head(&cur_trans->writer_wait); + init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->in_commit = 0; + cur_trans->use_count = 0; + cur_trans->commit_done = 0; + } + cur_trans->num_writers++; + return 0; +} + +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks) +{ + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + int ret; + + mutex_lock(&root->fs_info->trans_mutex); + ret = join_transaction(root); + BUG_ON(ret); + h->transid = root->fs_info->running_transaction->transid; + h->transaction = root->fs_info->running_transaction; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + root->fs_info->running_transaction->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); + return h; +} + +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans; + mutex_lock(&root->fs_info->trans_mutex); + cur_trans = root->fs_info->running_transaction; + WARN_ON(cur_trans->num_writers <= 1); + if (waitqueue_active(&cur_trans->writer_wait)) + wake_up(&cur_trans->writer_wait); + cur_trans->num_writers--; + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + kfree(trans); + return 0; +} + + +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; +} + +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + u64 old_extent_block; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *inode_root = fs_info->inode_root; + + btrfs_set_root_blocknr(&inode_root->root_item, + inode_root->node->b_blocknr); + ret = btrfs_update_root(trans, tree_root, + &inode_root->root_key, + &inode_root->root_item); + BUG_ON(ret); + while(1) { + old_extent_block = btrfs_root_blocknr(&extent_root->root_item); + if (old_extent_block == extent_root->node->b_blocknr) + break; + btrfs_set_root_blocknr(&extent_root->root_item, + extent_root->node->b_blocknr); + ret = btrfs_update_root(trans, tree_root, + &extent_root->root_key, + &extent_root->root_item); + BUG_ON(ret); + } + return 0; +} + +static int wait_for_commit(struct btrfs_root *root, + struct btrfs_transaction *commit) +{ + DEFINE_WAIT(wait); + commit->use_count++; + while(!commit->commit_done) { + prepare_to_wait(&commit->commit_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (commit->commit_done) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + finish_wait(&commit->commit_wait, &wait); + return 0; +} + +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret = 0; + struct buffer_head *snap = root->commit_root; + struct btrfs_key snap_key; + struct btrfs_transaction *cur_trans; + DEFINE_WAIT(wait); + + mutex_lock(&root->fs_info->trans_mutex); + if (trans->transaction->in_commit) { + cur_trans = trans->transaction; + trans->transaction->use_count++; + btrfs_end_transaction(trans, root); + ret = wait_for_commit(root, cur_trans); + BUG_ON(ret); + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + return 0; + } + while (trans->transaction->num_writers > 1) { + prepare_to_wait(&trans->transaction->writer_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (trans->transaction->num_writers <= 1) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + finish_wait(&trans->transaction->writer_wait, &wait); + + cur_trans = root->fs_info->running_transaction; + root->fs_info->running_transaction = NULL; + mutex_unlock(&root->fs_info->trans_mutex); + + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + + if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + } + + ret = btrfs_commit_tree_roots(trans, root); + BUG_ON(ret); + + ret = btrfs_write_and_wait_transaction(trans, root); + BUG_ON(ret); + + write_ctree_super(trans, root); + btrfs_finish_extent_commit(trans, root->fs_info->extent_root); + btrfs_finish_extent_commit(trans, root->fs_info->tree_root); + put_transaction(cur_trans); + kfree(trans); + + if (root->node != root->commit_root) { + trans = btrfs_start_transaction(root, 1); + root->commit_root = root->node; + get_bh(root->node); + ret = btrfs_drop_snapshot(trans, root, snap); + BUG_ON(ret); + + ret = btrfs_del_root(trans, root->fs_info->tree_root, + &snap_key); + BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + } + + return ret; +} + diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ab27b7491c..7e6c08a0acc 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,27 +1,30 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +struct btrfs_transaction { + u64 transid; + unsigned long num_writers; + int in_commit; + int use_count; + int commit_done; + wait_queue_head_t writer_wait; + wait_queue_head_t commit_wait; +}; + struct btrfs_trans_handle { u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; + struct btrfs_transaction *transaction; }; -static inline struct btrfs_trans_handle * -btrfs_start_transaction(struct btrfs_root *root, int num_blocks) -{ - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); - h->transid = root->root_key.offset; - h->blocks_reserved = num_blocks; - h->blocks_used = 0; - return h; -} - -static inline void btrfs_free_transaction(struct btrfs_root *root, - struct btrfs_trans_handle *handle) -{ - memset(handle, 0, sizeof(*handle)); - kfree(handle); -} +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks); +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From 2c90e5d658424bc71b111eb5a972240d5d06fe86 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 10:50:19 -0400 Subject: Btrfs: still corruption hunting Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 7 +-- fs/btrfs/btrfs_inode.h | 14 +++++ fs/btrfs/ctree.c | 42 ++++++++++---- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 84 ++++++++++++++-------------- fs/btrfs/disk-io.h | 4 +- fs/btrfs/extent-tree.c | 18 ++++-- fs/btrfs/super.c | 146 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/transaction.c | 39 +++++++++---- fs/btrfs/transaction.h | 3 + 10 files changed, 265 insertions(+), 95 deletions(-) create mode 100644 fs/btrfs/btrfs_inode.h (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 845422368f4..a50a1c44eab 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -4,6 +4,7 @@ #define BIT_ARRAY_BYTES 256 #define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) +extern struct kmem_cache *btrfs_bit_radix_cachep; int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) { unsigned long *bits; @@ -16,7 +17,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); + bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); @@ -68,12 +69,10 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) break; } } - if (empty) { bits = radix_tree_delete(radix, slot); - synchronize_rcu(); BUG_ON(!bits); - kfree(bits); + kmem_cache_free(btrfs_bit_radix_cachep, bits); } #endif return 0; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h new file mode 100644 index 00000000000..e159841650a --- /dev/null +++ b/fs/btrfs/btrfs_inode.h @@ -0,0 +1,14 @@ +#ifndef __BTRFS_I__ +#define __BTRFS_I__ + +struct btrfs_inode { + u32 magic; + struct inode vfs_inode; + u32 magic2; +}; +static inline struct btrfs_inode *BTRFS_I(struct inode *inode) +{ + return container_of(inode, struct btrfs_inode, vfs_inode); +} + +#endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a0dfa2d6cb9..453ce835209 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,6 +16,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +struct btrfs_path *btrfs_alloc_path(void) +{ + return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); +} + +void btrfs_free_path(struct btrfs_path *p) +{ + kmem_cache_free(btrfs_path_cachep, p); +} + inline void btrfs_init_path(struct btrfs_path *p) { memset(p, 0, sizeof(*p)); @@ -47,17 +57,18 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } cow = btrfs_alloc_free_block(trans, root); cow_node = btrfs_buffer_node(cow); + if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); - *cow_ret = cow; - btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); - if (buf != root->commit_root) + if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, @@ -66,6 +77,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); + *cow_ret = cow; return 0; } @@ -477,9 +489,12 @@ again: p->slots[level + 1], &cow_buf); b = cow_buf; + c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - c = btrfs_buffer_node(b); + if (level != btrfs_header_level(&c->header)) + WARN_ON(1); + level = btrfs_header_level(&c->header); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -1257,19 +1272,22 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root data_size) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; u8 *ptr; - btrfs_init_path(&path); - ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], u8); - btrfs_memcpy(root, path.nodes[0]->b_data, + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], u8); + btrfs_memcpy(root, path->nodes[0]->b_data, ptr, data, data_size); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9ec0d65ebe9..d8e03bd797f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -7,6 +7,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" @@ -888,6 +889,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); +struct btrfs_path *btrfs_alloc_path(void); +void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bb133104e2e..2dbd55084a4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,18 +8,6 @@ #include "disk-io.h" #include "transaction.h" -#define PATTERN 0xDEADBEEFUL -static inline void check_pattern(struct buffer_head *buf) -{ - if (buf->b_private != (void *)PATTERN) - WARN_ON(1); -} - -static inline void set_pattern(struct buffer_head *buf) -{ - buf->b_private = (void *)PATTERN; -} - static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); @@ -35,6 +23,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_find_get_block(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -43,6 +33,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) struct buffer_head *head; struct buffer_head *ret = NULL; + page = find_lock_page(mapping, index); if (!page) return NULL; @@ -64,15 +55,17 @@ out_unlock: unlock_page(page); if (ret) { touch_buffer(ret); - check_pattern(ret); } page_cache_release(page); return ret; +#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_getblk(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -95,7 +88,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); - set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -111,6 +103,7 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; +#endif } static sector_t max_block(struct block_device *bdev) @@ -225,6 +218,8 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_bread(root->fs_info->sb, blocknr); +#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -239,7 +234,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); - set_pattern(bh); } else { unlock_buffer(bh); } @@ -250,6 +244,7 @@ fail: brelse(bh); return NULL; +#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -268,14 +263,14 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -static int __setup_root(struct btrfs_super_block *super, +static int __setup_root(int blocksize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; - root->blocksize = btrfs_super_blocksize(super); + root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); @@ -283,7 +278,7 @@ static int __setup_root(struct btrfs_super_block *super, return 0; } -static int find_and_setup_root(struct btrfs_super_block *super, +static int find_and_setup_root(int blocksize, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, @@ -291,7 +286,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, { int ret; - __setup_root(super, root, fs_info, objectid); + __setup_root(blocksize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -302,9 +297,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super) +struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -317,13 +310,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + struct btrfs_super_block *disk_super; - if (!btrfs_super_root(disk_super)) { - return NULL; - } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - sb_set_blocksize(sb, sb_buffer->b_size); + sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -331,55 +322,59 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = disk_super; fs_info->sb = sb; + fs_info->btree_inode = NULL; +#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); +#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - - fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + __setup_root(sb->s_blocksize, tree_root, + fs_info, BTRFS_ROOT_TREE_OBJECTID); + fs_info->sb_buffer = read_tree_block(tree_root, + BTRFS_SUPER_INFO_OFFSET / + sb->s_blocksize); if (!fs_info->sb_buffer) { printk("failed2\n"); return NULL; } - brelse(sb_buffer); - sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) { + return NULL; + } fs_info->disk_super = disk_super; - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + mutex_lock(&fs_info->fs_mutex); + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); + mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); @@ -392,9 +387,11 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); + WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; @@ -413,6 +410,7 @@ int close_ctree(struct btrfs_root *root) int ret; struct btrfs_trans_handle *trans; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -421,6 +419,7 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); + mutex_unlock(&root->fs_info->fs_mutex); if (root->node) btrfs_block_release(root, root->node); @@ -436,8 +435,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); + // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + // iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -448,7 +447,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - check_pattern(buf); - brelse(buf); + // brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f6998e2192c..ac6764ba8aa 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -31,9 +31,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super); +struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4194dab7a3..37b87e28a2f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -173,12 +173,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) if (!pending) { bh = btrfs_find_tree_block(root, blocknr); - if (bh && buffer_uptodate(bh)) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { - btrfs_block_release(root, bh); - return 0; + if (bh) { + if (buffer_uptodate(bh)) { + u64 transid = + root->fs_info->running_transaction->transid; + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + transid) { + btrfs_block_release(root, bh); + return 0; + } } btrfs_block_release(root, bh); } @@ -539,6 +543,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root */ while(*level >= 0) { cur = path->nodes[*level]; + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 894a70bf26d..6969b672b57 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -14,6 +14,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -24,6 +25,14 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct file_operations btrfs_file_operations; +static int check_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + return 0; +} + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path path; @@ -34,6 +43,7 @@ static void btrfs_read_locked_inode(struct inode *inode) btrfs_init_path(&path); mutex_lock(&root->fs_info->fs_mutex); + check_inode(inode); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { btrfs_release_path(root, &path); @@ -41,6 +51,7 @@ static void btrfs_read_locked_inode(struct inode *inode) make_bad_inode(inode); return; } + check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_item); @@ -60,6 +71,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -80,6 +92,7 @@ static void btrfs_read_locked_inode(struct inode *inode) // inode->i_op = &page_symlink_inode_operations; break; } + check_inode(inode); return; } @@ -347,6 +360,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, namelen, 0); if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { *ino = 0; + ret = 0; goto out; } di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], @@ -354,6 +368,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, *ino = btrfs_dir_objectid(di); out: btrfs_release_path(root, &path); + check_inode(dir); return ret; } @@ -367,7 +382,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &ino); mutex_unlock(&root->fs_info->fs_mutex); @@ -378,7 +392,9 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + check_inode(inode); } + check_inode(dir); return d_splice_alias(inode, dentry); } @@ -471,23 +487,14 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct inode * inode; struct dentry * root_dentry; struct btrfs_super_block *disk_super; - struct buffer_head *bh; struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!bh) { - printk("btrfs: unable to read on disk super\n"); - return -EIO; - } - disk_super = (struct btrfs_super_block *)bh->b_data; - root = open_ctree(sb, bh, disk_super); + root = open_ctree(sb); if (!root) { printk("btrfs: open_ctree failed\n"); @@ -533,6 +540,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + check_inode(inode); } static int btrfs_update_inode(struct btrfs_trans_handle *trans, @@ -560,6 +568,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); + check_inode(inode); return 0; } @@ -577,6 +586,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) else btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); return ret; } @@ -594,6 +604,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -616,6 +627,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); insert_inode_hash(inode); + check_inode(inode); + check_inode(dir); return inode; } @@ -632,7 +645,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } - + check_inode(inode); + check_inode(dentry->d_parent->d_inode); return ret; } @@ -644,6 +658,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, d_instantiate(dentry, inode); return 0; } + if (err > 0) + err = -EEXIST; + check_inode(inode); return err; } @@ -675,6 +692,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); + check_inode(dir); + if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -755,11 +775,11 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; if (!wait) { - filemap_flush(root->fs_info->btree_inode->i_mapping); + // filemap_flush(root->fs_info->btree_inode->i_mapping); + filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1242,6 +1262,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static struct kmem_cache *btrfs_inode_cachep; +struct kmem_cache *btrfs_trans_handle_cachep; +struct kmem_cache *btrfs_transaction_cachep; +struct kmem_cache *btrfs_bit_radix_cachep; +struct kmem_cache *btrfs_path_cachep; + +/* + * Called inside transaction, so use GFP_NOFS + */ +static struct inode *btrfs_alloc_inode(struct super_block *sb) +{ + struct btrfs_inode *ei; + + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + ei->magic = 0xDEADBEEF; + ei->magic2 = 0xDEADBEAF; + return &ei->vfs_inode; +} + +static void btrfs_destroy_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(inode->i_ino == 1); + WARN_ON(inode->i_data.nrpages); + + ei->magic = 0; + ei->magic2 = 0; + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + +static void init_once(void * foo, struct kmem_cache * cachep, + unsigned long flags) +{ + struct btrfs_inode *ei = (struct btrfs_inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&ei->vfs_inode); + } +} + +static int init_inodecache(void) +{ + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", + 256, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU), + NULL, NULL); + if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL || + btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(btrfs_inode_cachep); + kmem_cache_destroy(btrfs_trans_handle_cachep); + kmem_cache_destroy(btrfs_transaction_cachep); + kmem_cache_destroy(btrfs_bit_radix_cachep); + kmem_cache_destroy(btrfs_path_cachep); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -1265,6 +1374,8 @@ static struct super_operations btrfs_super_ops = { .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .write_inode = btrfs_write_inode, + .alloc_inode = btrfs_alloc_inode, + .destroy_inode = btrfs_destroy_inode, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -1305,12 +1416,17 @@ static struct file_operations btrfs_file_operations = { static int __init init_btrfs_fs(void) { + int err; printk("btrfs loaded!\n"); + err = init_inodecache(); + if (err) + return err; return register_filesystem(&btrfs_fs_type); } static void __exit exit_btrfs_fs(void) { + destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 84c4e278ce2..72b52e1e0b1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -5,13 +5,20 @@ #include "transaction.h" static int total_trans = 0; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; + +#define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { + WARN_ON(transaction->use_count == 0); transaction->use_count--; + WARN_ON(transaction->magic != TRANS_MAGIC); if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; - kfree(transaction); + memset(transaction, 0, sizeof(*transaction)); + kmem_cache_free(btrfs_transaction_cachep, transaction); } } @@ -20,7 +27,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_transaction *cur_trans; cur_trans = root->fs_info->running_transaction; if (!cur_trans) { - cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, + GFP_NOFS); total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; @@ -28,6 +36,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->transid = root->root_key.offset + 1; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->magic = TRANS_MAGIC; cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; @@ -39,7 +48,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + struct btrfs_trans_handle *h = + kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; mutex_lock(&root->fs_info->trans_mutex); @@ -51,6 +61,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_used = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); + h->magic = h->magic2 = TRANS_MAGIC; return h; } @@ -58,6 +69,8 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + WARN_ON(trans->magic != TRANS_MAGIC); + WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; WARN_ON(cur_trans->num_writers < 1); @@ -67,7 +80,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); - kfree(trans); + kmem_cache_free(btrfs_trans_handle_cachep, trans); return 0; } @@ -75,7 +88,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } @@ -137,6 +150,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { +printk("already in commit!, waiting\n"); cur_trans = trans->transaction; trans->transaction->use_count++; btrfs_end_transaction(trans, root); @@ -146,7 +160,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); return 0; } + cur_trans = trans->transaction; + trans->transaction->in_commit = 1; while (trans->transaction->num_writers > 1) { + WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) @@ -154,15 +171,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); schedule(); mutex_lock(&root->fs_info->trans_mutex); + finish_wait(&trans->transaction->writer_wait, &wait); } finish_wait(&trans->transaction->writer_wait, &wait); - + WARN_ON(cur_trans != trans->transaction); if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, @@ -172,22 +189,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); - cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; mutex_unlock(&root->fs_info->trans_mutex); - ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); + cur_trans->commit_done = 1; + wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - kfree(trans); - + kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); snap = root->commit_root; @@ -203,7 +219,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_end_transaction(trans, root); BUG_ON(ret); } - return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 7e6c08a0acc..4f1496ae6f2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -7,15 +7,18 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; + int magic; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; struct btrfs_trans_handle { + int magic; u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + int magic2; }; -- cgit v1.2.3-70-g09d2 From 8fd17795b2261ecb1bad2a6df09ef14c4957a3fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 21:01:03 -0400 Subject: Btrfs: early fsync support Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/root-tree.c | 1 - fs/btrfs/super.c | 37 ++++++++++++++++++++++++++++++++++++- fs/btrfs/transaction.c | 25 ++++++++++++++++++++++--- fs/btrfs/transaction.h | 1 + 7 files changed, 62 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 8d7e5a3ed62..5ff2cef9f02 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,6 +7,7 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add parent pointer back to extent map. +* Implement real list of pending transactions * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d628a3b9441..047a84762d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -256,6 +256,7 @@ struct btrfs_fs_info { struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; + struct list_head trans_list; struct crypto_hash *hash_tfm; spinlock_t hash_lock; struct kobject kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11e17a2f736..b666c3571ae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -528,6 +528,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 72be9836932..a42943bd917 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -100,7 +100,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, BUG_ON(refs == 0); if (refs == 1) { ret = btrfs_del_item(trans, root, path); -printk("deleting root %Lu %Lu %u\n", key->objectid, key->offset, key->flags); } else { btrfs_set_root_refs(ri, refs - 1); printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index dff2db0d1dc..f99c764a59d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -932,6 +933,26 @@ out_unlock: return err; } +static int btrfs_sync_file(struct file *file, + struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); +out: + return ret > 0 ? EIO : ret; +} + static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -2353,6 +2374,19 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } +static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct btrfs_root *root = btrfs_sb(dentry->d_sb); + struct btrfs_super_block *disk_super = root->fs_info->disk_super; + + buf->f_namelen = BTRFS_NAME_LEN; + buf->f_blocks = btrfs_super_total_blocks(disk_super); + buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_bavail = buf->f_bfree; + buf->f_bsize = dentry->d_sb->s_blocksize; + buf->f_type = BTRFS_SUPER_MAGIC; + return 0; +} static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2362,7 +2396,6 @@ static struct file_system_type btrfs_fs_type = { }; static struct super_operations btrfs_super_ops = { - .statfs = simple_statfs, .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, @@ -2371,6 +2404,7 @@ static struct super_operations btrfs_super_ops = { .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, + .statfs = btrfs_statfs, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -2413,6 +2447,7 @@ static struct file_operations btrfs_file_operations = { .mmap = generic_file_mmap, .open = generic_file_open, .ioctl = btrfs_ioctl, + .fsync = btrfs_sync_file, }; static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8740752f384..078cb9cbf9d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ static void put_transaction(struct btrfs_transaction *transaction) if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; + list_del_init(&transaction->list); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -43,6 +44,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; + list_add_tail(&cur_trans->list, &root->fs_info->trans_list); } cur_trans->num_writers++; return 0; @@ -236,6 +238,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, { int ret = 0; struct btrfs_transaction *cur_trans; + struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; DEFINE_WAIT(wait); @@ -272,13 +275,29 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - btrfs_set_super_generation(root->fs_info->disk_super, - root->fs_info->generation + 1); + if (cur_trans->list.prev != &root->fs_info->trans_list) { + prev_trans = list_entry(cur_trans->list.prev, + struct btrfs_transaction, list); + if (prev_trans->commit_done) + prev_trans = NULL; + else + prev_trans->use_count++; + } mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); + if (prev_trans) { + mutex_lock(&root->fs_info->trans_mutex); + wait_for_commit(root, prev_trans); + put_transaction(prev_trans); + mutex_unlock(&root->fs_info->trans_mutex); + } + btrfs_set_super_generation(root->fs_info->disk_super, + cur_trans->transid); BUG_ON(ret); - write_ctree_super(trans, root); + + mutex_lock(&root->fs_info->fs_mutex); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4f1496ae6f2..3cc29900a07 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -8,6 +8,7 @@ struct btrfs_transaction { int use_count; int commit_done; int magic; + struct list_head list; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; -- cgit v1.2.3-70-g09d2 From 7c4452b9a6ca7aabe37ea2e43d443110bdc08cd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 09:29:35 -0400 Subject: Btrfs: smarter transaction writeback Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 37 +++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.h | 1 + 4 files changed, 37 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index afc5267515e..652cf305a96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -762,6 +762,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a10e902d310..1890e8648db 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -980,7 +980,6 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8a2545f6a5b..f9b8864dcc4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -45,6 +45,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + init_bit_radix(&cur_trans->dirty_pages); } cur_trans->num_writers++; return 0; @@ -106,8 +107,40 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - return 0; + unsigned long gang[16]; + int ret; + int i; + int err; + int werr = 0; + struct page *page; + struct radix_tree_root *dirty_pages; + struct inode *btree_inode = root->fs_info->btree_inode; + + if (!trans || !trans->transaction) { + return filemap_write_and_wait(btree_inode->i_mapping); + } + dirty_pages = &trans->transaction->dirty_pages; + while(1) { + ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + /* FIXME EIO */ + clear_radix_bit(dirty_pages, gang[i]); + page = find_lock_page(btree_inode->i_mapping, + gang[i]); + if (!page) + continue; + err = write_one_page(page, 0); + if (err) + werr = err; + page_cache_release(page); + } + } + err = filemap_fdatawait(btree_inode->i_mapping); + if (err) + werr = err; + return werr; } int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3cc29900a07..afe42d167ce 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -9,6 +9,7 @@ struct btrfs_transaction { int commit_done; int magic; struct list_head list; + struct radix_tree_root dirty_pages; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; -- cgit v1.2.3-70-g09d2 From 31f3c99b73483f7b738a886c552050cbd6128ff3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 30 Apr 2007 15:25:45 -0400 Subject: Btrfs: allocator improvements, inode block groups Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 10 ++-- fs/btrfs/ctree.h | 18 ++++++- fs/btrfs/extent-tree.c | 138 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/super.c | 53 +++++++++++++++---- fs/btrfs/transaction.c | 1 + fs/btrfs/transaction.h | 15 ++++++ 7 files changed, 181 insertions(+), 55 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 43a4f1ddb37..6b50076b02d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -3,6 +3,7 @@ struct btrfs_inode { struct btrfs_root *root; + struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b4783bf8bf4..dbd3f636dd3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -61,7 +61,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(trans, root); + cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -800,7 +800,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -905,7 +905,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1277,7 +1277,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); @@ -1374,7 +1374,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c432222d40e..e6bf9919536 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -174,6 +174,7 @@ struct btrfs_inode_item { __le64 generation; __le64 size; __le64 nblocks; + __le64 block_group; __le32 nlink; __le32 uid; __le32 gid; @@ -241,6 +242,7 @@ struct btrfs_device_item { /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_HINTS 8 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) struct btrfs_block_group_item { @@ -410,6 +412,17 @@ static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) i->nblocks = cpu_to_le64(val); } +static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->block_group); +} + +static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, + u64 val) +{ + i->block_group = cpu_to_le64(val); +} + static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) { return le32_to_cpu(i->nlink); @@ -1054,10 +1067,13 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, u64 hint); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 62051a36664..8b8cbe25fff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,42 +12,57 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_search_start(struct btrfs_root *root, int data) +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data) { struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; - u64 last; + u64 last = 0; + u64 hint_last; int i; int ret; - - cache[0] = info->block_group_cache; - if (!cache[0]) - goto find_new; - used = btrfs_block_group_used(&cache[0]->item); - if (used < (cache[0]->key.offset * 3 / 2)) - return 0; -find_new: - last = 0; + int full_search = 0; + if (hint) { + used = btrfs_block_group_used(&hint->item); + if (used < (hint->key.offset * 2) / 3) { + return hint; + } + radix_tree_tag_clear(&info->block_group_radix, + hint->key.objectid + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + last = hint->key.objectid + hint->key.offset; + hint_last = last; + } else { + hint_last = 0; + last = 0; + } while(1) { ret = radix_tree_gang_lookup_tag(&info->block_group_radix, (void **)cache, last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); + BTRFS_BLOCK_GROUP_AVAIL); if (!ret) break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } - last = 0; + last = hint_last; +again: while(1) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)cache, @@ -56,17 +71,32 @@ find_new: break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } info->block_group_cache = NULL; - return 0; + if (!full_search) { + last = 0; + full_search = 1; + goto again; + } +found: + if (!found_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&found_group, 0, 1); + BUG_ON(ret != 1); + } + return found_group; } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -243,6 +273,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; + cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); @@ -322,10 +353,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btree_inode->i_blkbits)); } } - if (root->fs_info->block_group_cache) { - root->fs_info->block_group_cache->last_alloc = - root->fs_info->block_group_cache->first_free; - } return 0; } @@ -532,22 +559,43 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; + int update_block_group = 0; + struct btrfs_block_group_cache *hint_block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); + /* find search start here */ + if (0 && search_start && num_blocks) { + u64 used; + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&hint_block_group, + search_start, 1); + if (ret) { + used = btrfs_block_group_used(&hint_block_group->item); + if (used > (hint_block_group->key.offset * 9) / 10) + search_start = 0; + else if (search_start < hint_block_group->last_alloc) + search_start = hint_block_group->last_alloc; + } else { + search_start = 0; + } + } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - find_search_start(root, 0); - if (info->block_group_cache && - info->block_group_cache->last_alloc > search_start) - search_start = info->block_group_cache->last_alloc; - + if (1 || !search_start) { + trans->block_group = btrfs_find_block_group(root, + trans->block_group, + 0); + if (trans->block_group->last_alloc > search_start) + search_start = trans->block_group->last_alloc; + update_block_group = 1; + } check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -662,11 +710,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&info->block_group_cache, - ins->objectid, 1); - if (ret) { - info->block_group_cache->last_alloc = ins->objectid; + if (update_block_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&trans->block_group, + ins->objectid, 1); + if (ret) { + trans->block_group->last_alloc = ins->objectid; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -747,14 +797,14 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, u64 hint) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins); + 1, hint, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; @@ -975,6 +1025,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + u64 used; root = root->fs_info->extent_root; key.objectid = 0; @@ -1005,8 +1056,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = 0; - cache->first_free = 0; + cache->last_alloc = cache->key.objectid; + cache->first_free = cache->key.objectid; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, @@ -1014,6 +1065,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.offset - 1, (void *)cache); BUG_ON(ret); + used = btrfs_block_group_used(bi); + if (used < (key.offset * 2) / 3) { + radix_tree_tag_set(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } if (key.objectid >= btrfs_super_total_blocks(root->fs_info->disk_super)) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1890e8648db..7ecbe7c8618 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -52,6 +52,8 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; + struct btrfs_block_group_cache *alloc_group; + u64 alloc_group_block; int ret; path = btrfs_alloc_path(); @@ -82,6 +84,12 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); + alloc_group_block = btrfs_inode_block_group(inode_item); + ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, + (void **)&alloc_group, + alloc_group_block, 1); + BUG_ON(!ret); + BTRFS_I(inode)->block_group = alloc_group; btrfs_free_path(path); inode_item = NULL; @@ -136,6 +144,8 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_block_group(item, + BTRFS_I(inode)->block_group->key.objectid); } @@ -237,6 +247,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -262,6 +273,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = (u32)-1; @@ -429,6 +441,7 @@ static void btrfs_delete_inode(struct inode *inode) inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); if (S_ISREG(inode->i_mode)) { ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); @@ -731,6 +744,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) if (wait) { mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); } @@ -744,6 +758,7 @@ static void btrfs_dirty_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -751,7 +766,9 @@ static void btrfs_dirty_inode(struct inode *inode) static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, int mode) + u64 objectid, + struct btrfs_block_group_cache *group, + int mode) { struct inode *inode; struct btrfs_inode_item inode_item; @@ -763,6 +780,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; + group = btrfs_find_block_group(root, group, 0); + BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -832,6 +851,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { @@ -839,11 +859,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; - // FIXME mark the inode dirty + + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; @@ -853,6 +875,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; } dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -904,6 +928,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -915,7 +940,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -923,6 +949,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + btrfs_set_trans_block_group(trans, inode); err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); if (err) @@ -938,6 +965,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_fail: btrfs_end_transaction(trans, root); @@ -1349,6 +1378,7 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); ret = btrfs_end_transaction(trans, root); @@ -1445,6 +1475,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { @@ -1481,6 +1512,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1821,6 +1853,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { @@ -1845,6 +1878,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2017,6 +2051,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_leaf *leaf; struct btrfs_root *new_root; struct inode *inode; + struct inode *dir; int ret; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -2025,7 +2060,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root); + subvol = btrfs_alloc_free_block(trans, root, 0); if (subvol == NULL) return -ENOSPC; leaf = btrfs_buffer_leaf(subvol); @@ -2069,10 +2104,9 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) * insert the directory item */ key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + name, namelen, dir->i_ino, &key, 0); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2084,7 +2118,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700); + inode = btrfs_new_inode(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group, S_IFDIR | 0700); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f9b8864dcc4..bdbf514c111 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -77,6 +77,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; + h->block_group = NULL; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); h->magic = h->magic2 = TRANS_MAGIC; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index afe42d167ce..b5378119e76 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,5 +1,6 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +#include "btrfs_inode.h" struct btrfs_transaction { u64 transid; @@ -20,10 +21,24 @@ struct btrfs_trans_handle { unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + struct btrfs_block_group_cache *block_group; int magic2; }; +static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + trans->block_group = BTRFS_I(inode)->block_group; +} + +static inline void btrfs_update_inode_block_group(struct + btrfs_trans_handle *trans, + struct inode *inode) +{ + BTRFS_I(inode)->block_group = trans->block_group; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, -- cgit v1.2.3-70-g09d2 From 08607c1b182b3b8b54d7141a0c012cda17d201e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 15:33:54 -0400 Subject: Btrfs: add compat ioctl Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 5 +++-- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/super.c | 27 +++++++++++++++++++++-- fs/btrfs/transaction.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.h | 8 +++++++ 6 files changed, 98 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 861e34ae097..61d25b5de28 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,7 +7,6 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add generation number to inode -* Add ability to switch a block group from data to metadata or vice versa * forbid cross subvolume renames and hardlinks * Release * Do real tree locking diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a1c95c980fd..74e6aadf6bb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -281,6 +281,7 @@ struct btrfs_block_group_cache { }; struct crypto_hash; + struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; @@ -308,9 +309,9 @@ struct btrfs_fs_info { struct mutex fs_mutex; struct list_head trans_list; struct crypto_hash *hash_tfm; + struct delayed_work trans_work; spinlock_t hash_lock; int do_barriers; - struct kobject kobj; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3da057f78bc..048282f06af 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -577,6 +577,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -648,7 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); mutex_unlock(&fs_info->fs_mutex); return tree_root; } @@ -746,6 +746,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); @@ -776,7 +777,6 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); - kobject_unregister(&fs_info->kobj); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b58b4cf6676..5864917953a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -950,7 +951,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -ENOMEM; } sb->s_root = root_dentry; - + btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; } @@ -1452,7 +1453,7 @@ static int btrfs_prepare_write(struct file *file, struct page *page, static void btrfs_write_super(struct super_block *sb) { - btrfs_sync_fs(sb, 1); + sb->s_dirt = 0; } static int btrfs_readpage(struct file *file, struct page *page) @@ -2698,6 +2699,20 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int return ret; } +#ifdef CONFIG_COMPAT +static long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int ret; + lock_kernel(); + ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; + +} +#endif + static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; @@ -3042,6 +3057,9 @@ static struct file_operations btrfs_dir_file_operations = { .read = generic_read_dir, .readdir = btrfs_readdir, .ioctl = btrfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif }; static struct address_space_operations btrfs_aops = { @@ -3073,6 +3091,9 @@ static struct file_operations btrfs_file_operations = { .open = generic_file_open, .ioctl = btrfs_ioctl, .fsync = btrfs_sync_file, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif }; static struct inode_operations btrfs_symlink_inode_operations = { @@ -3085,6 +3106,7 @@ static int __init init_btrfs_fs(void) { int err; printk("btrfs loaded!\n"); + btrfs_init_transaction_sys(); err = init_inodecache(); if (err) return err; @@ -3095,6 +3117,7 @@ static int __init init_btrfs_fs(void) static void __exit exit_btrfs_fs(void) { + btrfs_exit_transaction_sys(); destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f0f03121b7b..bf7eef67ba0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -8,6 +8,8 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; +static struct workqueue_struct *trans_wq; + #define BTRFS_ROOT_TRANS_TAG 0 #define TRANS_MAGIC 0xE1E10E @@ -44,6 +46,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; + cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); init_bit_radix(&cur_trans->dirty_pages); } @@ -350,3 +353,60 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +void btrfs_transaction_cleaner(struct work_struct *work) +{ + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + trans_work.work); + + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_transaction *cur; + struct btrfs_trans_handle *trans; + unsigned long now; + unsigned long delay = HZ * 30; + int ret; + +printk("btrfs transaction cleaner\n"); + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto out; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto out; + } + mutex_unlock(&root->fs_info->trans_mutex); +printk("forcing commit\n"); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +out: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_transaction_queue_work(root, delay); +} + +void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) +{ + queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); +} + +void btrfs_transaction_flush_work(struct btrfs_root *root) +{ + cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work); + flush_workqueue(trans_wq); +} + +void __init btrfs_init_transaction_sys(void) +{ + trans_wq = create_workqueue("btrfs"); +} + +void __exit btrfs_exit_transaction_sys(void) +{ + destroy_workqueue(trans_wq); +} + diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b5378119e76..0b08208be85 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -11,6 +11,7 @@ struct btrfs_transaction { int magic; struct list_head list; struct radix_tree_root dirty_pages; + unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; @@ -47,4 +48,11 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); + +void btrfs_transaction_cleaner(struct work_struct *work); +void btrfs_transaction_flush_work(struct btrfs_root *root); +void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); +void btrfs_init_transaction_sys(void); +void btrfs_exit_transaction_sys(void); + #endif -- cgit v1.2.3-70-g09d2 From facda1e787d43191a3368c322f682054991c41b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 18:11:48 -0400 Subject: Btrfs: get forced transaction commits via workqueue Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/transaction.c | 30 ++++++++++++++++++++---------- fs/btrfs/transaction.h | 3 --- 4 files changed, 27 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 74e6aadf6bb..e93ba1a5c81 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -283,6 +283,7 @@ struct btrfs_block_group_cache { struct crypto_hash; struct btrfs_fs_info { + spinlock_t hash_lock; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; @@ -308,10 +309,11 @@ struct btrfs_fs_info { struct mutex trans_mutex; struct mutex fs_mutex; struct list_head trans_list; + struct list_head dead_roots; struct crypto_hash *hash_tfm; struct delayed_work trans_work; - spinlock_t hash_lock; int do_barriers; + int closing; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 048282f06af..751069c0e9f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -563,6 +563,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); + INIT_LIST_HEAD(&fs_info->dead_roots); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -577,6 +578,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + fs_info->closing = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, @@ -746,6 +749,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bf7eef67ba0..b859db395fd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -12,12 +12,10 @@ static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 -#define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); transaction->use_count--; - WARN_ON(transaction->magic != TRANS_MAGIC); if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; @@ -42,7 +40,6 @@ static int join_transaction(struct btrfs_root *root) cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); - cur_trans->magic = TRANS_MAGIC; cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; @@ -83,7 +80,6 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->block_group = NULL; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); - h->magic = h->magic2 = TRANS_MAGIC; return h; } @@ -92,8 +88,6 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans; - WARN_ON(trans->magic != TRANS_MAGIC); - WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; WARN_ON(cur_trans->num_writers < 1); @@ -257,8 +251,8 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; int ret; - while(!list_empty(list)) { + mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); trans = btrfs_start_transaction(tree_root, 1); @@ -271,6 +265,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty); + mutex_unlock(&tree_root->fs_info->fs_mutex); } return 0; } @@ -346,10 +341,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); + if (root->fs_info->closing) + list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); + else + list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); - drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); + if (root->fs_info->closing) { + mutex_unlock(&root->fs_info->fs_mutex); + drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); + mutex_lock(&root->fs_info->fs_mutex); + } return ret; } @@ -362,11 +365,19 @@ void btrfs_transaction_cleaner(struct work_struct *work) struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; + struct list_head dirty_roots; unsigned long now; unsigned long delay = HZ * 30; int ret; -printk("btrfs transaction cleaner\n"); + INIT_LIST_HEAD(&dirty_roots); + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -381,7 +392,6 @@ printk("btrfs transaction cleaner\n"); goto out; } mutex_unlock(&root->fs_info->trans_mutex); -printk("forcing commit\n"); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0b08208be85..f25b4900db4 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -8,7 +8,6 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; - int magic; struct list_head list; struct radix_tree_root dirty_pages; unsigned long start_time; @@ -17,13 +16,11 @@ struct btrfs_transaction { }; struct btrfs_trans_handle { - int magic; u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; struct btrfs_block_group_cache *block_group; - int magic2; }; -- cgit v1.2.3-70-g09d2 From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 09:07:21 -0400 Subject: Btrfs: add GPLv2 Signed-off-by: Chris Mason --- fs/btrfs/COPYING | 356 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/TODO | 1 - fs/btrfs/bit-radix.c | 18 +++ fs/btrfs/bit-radix.h | 18 +++ fs/btrfs/btrfs_inode.h | 18 +++ fs/btrfs/ctree.c | 18 +++ fs/btrfs/ctree.h | 18 +++ fs/btrfs/dir-item.c | 18 +++ fs/btrfs/disk-io.c | 18 +++ fs/btrfs/disk-io.h | 18 +++ fs/btrfs/extent-tree.c | 18 +++ fs/btrfs/file-item.c | 18 +++ fs/btrfs/file.c | 18 +++ fs/btrfs/hash.c | 18 +++ fs/btrfs/hash.h | 18 +++ fs/btrfs/inode-item.c | 18 +++ fs/btrfs/inode-map.c | 18 +++ fs/btrfs/inode.c | 18 +++ fs/btrfs/ioctl.h | 18 +++ fs/btrfs/print-tree.c | 18 +++ fs/btrfs/print-tree.h | 18 +++ fs/btrfs/root-tree.c | 18 +++ fs/btrfs/super.c | 18 +++ fs/btrfs/sysfs.c | 18 +++ fs/btrfs/transaction.c | 18 +++ fs/btrfs/transaction.h | 18 +++ 26 files changed, 788 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/COPYING (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING new file mode 100644 index 00000000000..ca442d313d8 --- /dev/null +++ b/fs/btrfs/COPYING @@ -0,0 +1,356 @@ + + NOTE! This copyright does *not* cover user programs that use kernel + services by normal system calls - this is merely considered normal use + of the kernel, and does *not* fall under the heading of "derived work". + Also note that the GPL below is copyrighted by the Free Software + Foundation, but the instance of code that it refers to (the Linux + kernel) is copyrighted by me and others who actually wrote it. + + Also note that the only valid version of the GPL as far as the kernel + is concerned is _this_ particular version of the license (ie v2, not + v2.2 or v3.x or whatever), unless explicitly otherwise stated. + + Linus Torvalds + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4a48a2365d5..d9b6d38c603 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,3 @@ -* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 8f9cd427723..a4ddf4de979 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "bit-radix.h" diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 4e717e30db4..c100f54d5c3 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BIT_RADIX__ #define __BIT_RADIX__ #include diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6b50076b02d..783f06b6325 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_I__ #define __BTRFS_I__ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 20e01f95ce6..4736155af41 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 24410d42e3c..4bd648d68e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS__ #define __BTRFS__ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b408a3d20c7..0d179a44ebf 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87810117254..8c68a64c0ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 822ccb8aa4a..7b76ccc4875 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __DISKIO__ #define __DISKIO__ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe02fbfa6d5..975a0eb9241 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d5a98827e38..37dd3801a10 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0325dc03859..566e526b60f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 32de1ea1b64..e5c76903d72 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + /* * Original copy from: * linux/fs/ext3/hash.c diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index e56c4670093..d3be0267058 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __HASH__ #define __HASH__ int btrfs_name_hash(const char *name, int len, u64 *hash_result); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index c3b990b661c..449e9dbdf95 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 318e27a6378..58930fc48be 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1cabc1951b0..46f869d6d0f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497fadd98c3..23bed48c51a 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __IOCTL_ #define __IOCTL_ #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 21791f03756..1fa70568300 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 396041a05cf..205cd03601a 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 13c30284fd8..7a2d63c1ee8 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 94b3535a2a1..ef8ad18b275 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ed8b7e20cfb..ead9dc8051b 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4f3c8ddcb40..30914521cf0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include "ctree.h" diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f25b4900db4..5fb1d322aa1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __TRANSACTION__ #define __TRANSACTION__ #include "btrfs_inode.h" -- cgit v1.2.3-70-g09d2 From 5eda7b5e9b0bed864dd18284c7df9b3c8207dad7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: Btrfs: Add the ability to find and remove dead roots after a crash. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 17 ++++++++++-- fs/btrfs/disk-io.c | 44 +++++++++++++++++++---------- fs/btrfs/disk-io.h | 3 ++ fs/btrfs/inode.c | 2 ++ fs/btrfs/root-tree.c | 75 ++++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/transaction.c | 54 +++++++++++++++++++++++++++++++----- fs/btrfs/transaction.h | 1 + 7 files changed, 164 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 77071f27397..fb6fffb71dd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -122,12 +122,12 @@ struct btrfs_super_block { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* this block number */ __le64 magic; - __le32 blocksize; __le64 generation; __le64 root; __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; + __le32 blocksize; } __attribute__ ((__packed__)); /* @@ -226,10 +226,12 @@ struct btrfs_root_item { struct btrfs_inode_item inode; __le64 root_dirid; __le64 blocknr; - __le32 flags; __le64 block_limit; __le64 blocks_used; + __le32 flags; __le32 refs; + struct btrfs_disk_key drop_progress; + u8 drop_level; } __attribute__ ((__packed__)); #define BTRFS_FILE_EXTENT_REG 0 @@ -800,6 +802,16 @@ static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) item->refs = cpu_to_le32(val); } +static inline u32 btrfs_root_flags(struct btrfs_root_item *item) +{ + return le32_to_cpu(item->flags); +} + +static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) +{ + item->flags = cpu_to_le32(val); +} + static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) { return le64_to_cpu(s->blocknr); @@ -1076,6 +1088,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +int btrfs_find_dead_roots(struct btrfs_root *root); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 865a284aa06..d7615e1578c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -326,8 +326,8 @@ static int find_and_setup_root(int blocksize, return 0; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; @@ -336,11 +336,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 highest_inode; int ret = 0; - root = radix_tree_lookup(&fs_info->fs_roots_radix, - (unsigned long)location->objectid); - if (root) - return root; - root = kmalloc(sizeof(*root), GFP_NOFS); + root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { @@ -383,6 +379,28 @@ out: BUG_ON(!root->node); insert: root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + int ret; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)location->objectid); + if (root) + return root; + + root = btrfs_read_fs_root_no_radix(fs_info, location); + if (IS_ERR(root)) + return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); @@ -391,11 +409,6 @@ insert: kfree(root); return ERR_PTR(ret); } - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; - } return root; } @@ -489,6 +502,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; + ret = btrfs_find_dead_roots(tree_root); + if (ret) + goto fail_tree_root; mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -538,7 +554,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } -static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); @@ -565,7 +581,7 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) if (!ret) break; for (i = 0; i < ret; i++) - free_fs_root(fs_info, gang[i]); + btrfs_free_fs_root(fs_info, gang[i]); } return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7b76ccc4875..c4a695ac44f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -65,6 +65,8 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, @@ -75,4 +77,5 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d031daa777..9d2a0a3f674 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2028,6 +2028,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; brelse(subvol); subvol = NULL; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ac0fae7780f..737e5a38d17 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -18,6 +18,7 @@ #include #include "ctree.h" +#include "transaction.h" #include "disk-io.h" #include "print-tree.h" @@ -32,7 +33,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.objectid = objectid; search_key.flags = (u32)-1; - search_key.offset = (u32)-1; + search_key.offset = (u64)-1; path = btrfs_alloc_path(); BUG_ON(!path); @@ -50,6 +51,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); +printk("find last finds key %Lu %u %Lu slot %d search for obj %Lu\n", key->objectid, key->flags, key->offset, slot, objectid); ret = 0; out: btrfs_release_path(root, path); @@ -93,6 +95,67 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_find_dead_roots(struct btrfs_root *root) +{ + struct btrfs_root *dead_root; + struct btrfs_item *item; + struct btrfs_root_item *ri; + struct btrfs_key key; + struct btrfs_path *path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + + key.objectid = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } + item = leaf->items + slot; + btrfs_disk_key_to_cpu(&key, &item->key); + if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) + goto next; + ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); + if (btrfs_root_refs(ri) != 0) + goto next; + dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto err; + } +printk("found dead root %Lu %u %Lu\n", key.objectid, key.flags, key.offset); + ret = btrfs_add_dead_root(dead_root, + &root->fs_info->dead_roots); + if (ret) + goto err; +next: + slot++; + path->slots[0]++; + } + ret = 0; +err: + btrfs_free_path(path); + return ret; +} + int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { @@ -111,14 +174,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, path->slots[0], struct btrfs_root_item); refs = btrfs_root_refs(ri); - BUG_ON(refs == 0); - if (refs == 1) { - ret = btrfs_del_item(trans, root, path); - } else { - btrfs_set_root_refs(ri, refs - 1); - WARN_ON(1); - mark_buffer_dirty(path->nodes[0]); - } + BUG_ON(refs != 0); + ret = btrfs_del_item(trans, root, path); out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 321f8852755..85a2a5e2714 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -85,11 +85,15 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } } root->last_trans = running_trans_id; h->transid = running_trans_id; @@ -208,8 +212,24 @@ struct dirty_root { struct btrfs_key snap_key; struct buffer_head *commit_root; struct btrfs_root *root; + int free_on_drop; }; +int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) +{ + struct dirty_root *dirty; + + dirty = kmalloc(sizeof(*dirty), GFP_NOFS); + if (!dirty) + return -ENOMEM; + memcpy(&dirty->snap_key, &root->root_key, sizeof(root->root_key)); + dirty->commit_root = root->node; + dirty->root = root; + dirty->free_on_drop = 1; + list_add(&dirty->list, dead_list); + return 0; +} + static int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) @@ -217,9 +237,11 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; + struct btrfs_root_item tmp_item; int i; int ret; int err = 0; + u32 refs; while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, @@ -246,6 +268,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, dirty->commit_root = root->commit_root; root->commit_root = NULL; dirty->root = root; + dirty->free_on_drop = 0; + memcpy(&tmp_item, &root->root_item, sizeof(tmp_item)); + root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, bh_blocknr(root->node)); @@ -254,7 +279,18 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - list_add(&dirty->list, list); + + refs = btrfs_root_refs(&tmp_item); + btrfs_set_root_refs(&tmp_item, refs - 1); + err = btrfs_update_root(trans, root->fs_info->tree_root, + &dirty->snap_key, + &tmp_item); + + BUG_ON(err); + if (refs == 1) + list_add(&dirty->list, list); + else + kfree(dirty); } } return err; @@ -270,16 +306,20 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); + trans = btrfs_start_transaction(tree_root, 1); +printk("deleting root %Lu %u %Lu\n", dirty->snap_key.objectid, dirty->snap_key.flags, dirty->snap_key.offset); ret = btrfs_drop_snapshot(trans, dirty->root, dirty->commit_root); BUG_ON(ret); - ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); if (ret) break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + + if (dirty->free_on_drop) + kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 5fb1d322aa1..ebf44f3e111 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -69,5 +69,6 @@ void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); +int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); #endif -- cgit v1.2.3-70-g09d2 From 6702ed490ca0bb44e17131818a5a18b773957c5a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 16:15:09 -0400 Subject: Btrfs: Add run time btree defrag, and an ioctl to force btree defrag This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 195 +++++++++++++++++++++++++++++++++---------- fs/btrfs/ctree.h | 19 ++++- fs/btrfs/disk-io.c | 8 ++ fs/btrfs/extent-tree.c | 53 +++++++----- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 33 +++++++- fs/btrfs/ioctl.h | 2 +- fs/btrfs/transaction.c | 105 +++++++++++++++++++---- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 553 insertions(+), 89 deletions(-) create mode 100644 fs/btrfs/tree-defrag.c (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4e2df66bdb..9321438c5ee 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7a08491e208..c7e47e77723 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf, struct buffer_head *parent, int parent_slot, struct buffer_head - **cow_ret) + **cow_ret, u64 search_start, u64 empty_size) { struct buffer_head *cow; struct btrfs_node *cow_node; - int ret; + int ret = 0; + int different_trans = 0; + WARN_ON(root->ref_cows && trans->transid != root->last_trans); WARN_ON(!buffer_uptodate(buf)); - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); - WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); - WARN_ON(1); - } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { - *cow_ret = buf; - return 0; - } - cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); + cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - ret = btrfs_inc_ref(trans, root, buf); - if (ret) - return ret; + + WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > + trans->transid); + if (btrfs_header_generation(btrfs_buffer_header(buf)) != + trans->transid) { + different_trans = 1; + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; + } else { + WARN_ON(!root->ref_cows); + clean_tree_block(trans, root, buf); + } + if (buf == root->node) { root->node = cow; get_bh(cow); @@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); + WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != + trans->transid); btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); @@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret) +{ + u64 search_start; + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + if (btrfs_header_generation(btrfs_buffer_header(buf)) == + trans->transid) { + *cow_ret = buf; + return 0; + } + + search_start = bh_blocknr(buf) & ~((u64)65535); + return __btrfs_cow_block(trans, root, buf, parent, + parent_slot, cow_ret, search_start, 0); +} + +static int close_blocks(u64 blocknr, u64 other) +{ + if (blocknr < other && other - blocknr < 8) + return 1; + if (blocknr > other && blocknr - other < 8) + return 1; + return 0; +} + +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only) +{ + struct btrfs_node *parent_node; + struct buffer_head *cur_bh; + struct buffer_head *tmp_bh; + u64 blocknr; + u64 search_start = 0; + u64 other; + u32 parent_nritems; + int start_slot; + int end_slot; + int i; + int err = 0; + + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + parent_node = btrfs_buffer_node(parent); + parent_nritems = btrfs_header_nritems(&parent_node->header); + + start_slot = 0; + end_slot = parent_nritems; + + if (parent_nritems == 1) + return 0; + + for (i = start_slot; i < end_slot; i++) { + int close = 1; + blocknr = btrfs_node_blockptr(parent_node, i); + if (i > 0) { + other = btrfs_node_blockptr(parent_node, i - 1); + close = close_blocks(blocknr, other); + } + if (close && i < end_slot - 1) { + other = btrfs_node_blockptr(parent_node, i + 1); + close = close_blocks(blocknr, other); + } + if (close) + continue; + + cur_bh = btrfs_find_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + if (cache_only) { + brelse(cur_bh); + continue; + } + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } + if (search_start == 0) { + search_start = bh_blocknr(cur_bh) & ~((u64)65535); + } + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, + &tmp_bh, search_start, + min(8, end_slot - i)); + if (err) + break; + search_start = bh_blocknr(tmp_bh); + brelse(tmp_bh); + } + return err; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves */ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int slot) + int level, int slot) { struct btrfs_node *node; int i; @@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, unsigned long gang[8]; struct buffer_head *bh; - if (!path->nodes[1]) + if (level == 0) + return; + + if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[1]); + node = btrfs_buffer_node(path->nodes[level]); search = btrfs_node_blockptr(node, slot); bh = btrfs_find_tree_block(root, search); if (bh) { @@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 64) + if (nread > 32) continue; if (direction > 0 && cluster_start <= blocknr && cluster_start + 8 > blocknr) { @@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; - struct btrfs_root_item *root_item = &root->root_item; u64 blocknr; int slot; int ret; @@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int should_reada = p->reada; u8 lowest_level = 0; - if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { - lowest_level = root_item->drop_level; - WARN_ON(ins_len || cow); - } - + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: @@ -798,8 +909,8 @@ again: if (level == lowest_level) break; blocknr = btrfs_node_blockptr(c, slot); - if (level == 1 && should_reada) - reada_for_search(root, p, slot); + if (should_reada) + reada_for_search(root, p, level, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { @@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); if (IS_ERR(t)) return PTR_ERR(t); c = btrfs_buffer_node(t); @@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); if (IS_ERR(split_buffer)) return PTR_ERR(split_buffer); @@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, blocknr); break; } @@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c5a18d5d7f7..42aa20301bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -178,6 +178,7 @@ struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; + int lowest_level; }; /* @@ -338,6 +339,9 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + struct btrfs_key defrag_progress; + int defrag_running; + int defrag_level; }; /* the lower bits in the key flags defines the item type */ @@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint); + struct btrfs_root *root, u64 hint, + u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); @@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); @@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 *hint_block); +/* tree-defrag.c */ +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60db85bc4ba..c948416fea3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { WARN_ON(atomic_read(&buf->b_count) == 0); + lock_buffer(buf); clear_buffer_dirty(buf); + unlock_buffer(buf); return 0; } @@ -294,6 +296,9 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_running = 0; + root->defrag_level = 0; root->root_key.objectid = objectid; return 0; } @@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) { struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)bh->b_blocknr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5d4d5d8db8e..26b8d340649 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,8 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data); @@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); @@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) @@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, static int try_remove_page(struct address_space *mapping, unsigned long index) { int ret; + return 0; ret = invalidate_mapping_pages(mapping, index, index); return ret; } @@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); + ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, u64 hint_block, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + total_needed += empty_size; path = btrfs_alloc_path(); check_failed: @@ -1157,9 +1160,11 @@ check_pending: goto error; } search_start = orig_search_start; - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; goto new_group; } @@ -1238,9 +1243,11 @@ new_group: ret = -ENOSPC; goto error; } - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); @@ -1264,7 +1271,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 hint_block, + u64 num_blocks, u64 empty_size, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, 0, + ret = find_free_extent(trans, root, 0, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); BUG_ON(ret); if (ret) @@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, + ret = find_free_extent(trans, root, num_blocks, empty_size, + search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); BUG_ON(ret); if (ret) @@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, exclude_start = ins->objectid; exclude_nr = ins->offset; hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); @@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint) + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, empty_size, hint, + (unsigned long)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } + WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); @@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + level = root_item->drop_level; + path->lowest_level = level; wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (wret < 0) { ret = wret; goto out; } - level = root_item->drop_level; node = btrfs_buffer_node(path->nodes[level]); found_key = &node->ptrs[path->slots[level]].key; WARN_ON(memcmp(found_key, &root_item->drop_progress, @@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; num_walks++; if (num_walks > 10) { - struct btrfs_key key; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); ret = -EAGAIN; get_bh(root->node); break; @@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; } } out: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1fe38fe8415..00b118a2db6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root, if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, + num_blocks, 0, hint_block, (u64)-1, &ins, 1); if (err) goto failed_truncate; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3889032fc44..12aa043b9f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) &alloc_hint); if (ret) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) goto out; @@ -1360,7 +1360,7 @@ not_found: if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, + 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { err = ret; @@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0); + subvol = btrfs_alloc_free_block(trans, root, 0, 0); if (IS_ERR(subvol)) return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); @@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; + struct btrfs_trans_handle *trans; int ret = 0; + int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int else ret = create_snapshot(root, vol_args.name, namelen); break; + + case BTRFS_IOC_DEFRAG: + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + while (1) { + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 0); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + ret = 0; + break; default: return -ENOTTY; } diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 23bed48c51a..8bc47dec286 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 498626470a0..338a7199363 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 +#define BTRFS_ROOT_DEFRAG_TAG 1 static void put_transaction(struct btrfs_transaction *transaction) { @@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root) return 0; } +static int record_root_in_trans(struct btrfs_root *root) +{ + u64 running_trans_id = root->fs_info->running_transaction->transid; + if (root->ref_cows && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } + root->last_trans = running_trans_id; + } + return 0; +} + struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; - u64 running_trans_id; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - running_trans_id = root->fs_info->running_transaction->transid; - if (root != root->fs_info->tree_root && root->last_trans < - running_trans_id) { - WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->ref_cows != 1); - if (root->root_item.refs != 0) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); - } else { - WARN_ON(1); - } - } - root->last_trans = running_trans_id; - h->transid = running_trans_id; + record_root_in_trans(root); + h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, gang[i]); if (!page) continue; + if (PageWriteback(page)) { + if (PageDirty(page)) + wait_on_page_writeback(page); + else { + unlock_page(page); + page_cache_release(page); + continue; + } + } err = write_one_page(page, 0); if (err) werr = err; @@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) +{ + struct btrfs_root *gang[1]; + struct btrfs_root *root; + struct btrfs_root *tree_root = info->tree_root; + struct btrfs_trans_handle *trans; + int i; + int ret; + int err = 0; + u64 last = 0; + + trans = btrfs_start_transaction(tree_root, 1); + while(1) { + ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, + (void **)gang, last, + ARRAY_SIZE(gang), + BTRFS_ROOT_DEFRAG_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + last = root->root_key.objectid + 1; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + if (root->defrag_running) + continue; + + while (1) { + mutex_lock(&root->fs_info->trans_mutex); + record_root_in_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 1); + btrfs_end_transaction(trans, tree_root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(tree_root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + } + } + btrfs_end_transaction(trans, tree_root); + return err; +} + static int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) goto out; } mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ebf44f3e111..8b2714e6556 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c new file mode 100644 index 00000000000..15d0a486fb5 --- /dev/null +++ b/fs/btrfs/tree-defrag.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" + +static void reada_defrag(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +static int defrag_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + struct buffer_head *next; + struct buffer_head *cur; + u64 blocknr; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (!cache_only && *level > 1 && path->slots[*level] == 0) + reada_defrag(root, btrfs_buffer_node(cur)); + + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(btrfs_buffer_header(cur))) + break; + + if (*level == 1) { + ret = btrfs_realloc_node(trans, root, + path->nodes[*level], + cache_only); + break; + } + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); + + if (cache_only) { + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next) || + buffer_locked(next)) { + brelse(next); + path->slots[*level]++; + continue; + } + } else { + next = read_tree_block(root, blocknr); + } + ret = btrfs_cow_block(trans, root, next, path->nodes[*level], + path->slots[*level], &next); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, next, cache_only); + BUG_ON(ret); + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + btrfs_block_release(root, path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(btrfs_buffer_header(next)); + path->slots[*level] = 0; + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + WARN_ON(ret); + return 0; +} + +static int defrag_walk_up(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + int i; + int slot; + struct btrfs_node *node; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { + path->slots[i]++; + *level = i; + node = btrfs_buffer_node(path->nodes[i]); + WARN_ON(i == 0); + btrfs_disk_key_to_cpu(&root->defrag_progress, + &node->ptrs[path->slots[i]].key); + root->defrag_level = i; + return 0; + } else { + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only) +{ + struct btrfs_path *path = NULL; + struct buffer_head *tmp; + int ret = 0; + int wret; + int level; + int orig_level; + int i; + int num_runs = 0; + + if (root->ref_cows == 0) { + goto out; + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + level = btrfs_header_level(btrfs_buffer_header(root->node)); + orig_level = level; + if (level == 0) { + goto out; + } + if (root->defrag_progress.objectid == 0) { + get_bh(root->node); + ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, root->node, cache_only); + BUG_ON(ret); + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + level = root->defrag_level; + path->lowest_level = level; + wret = btrfs_search_slot(trans, root, &root->defrag_progress, + path, 0, 1); + + if (wret < 0) { + ret = wret; + goto out; + } + while(level > 0 && !path->nodes[level]) + level--; + if (!path->nodes[level]) { + ret = 0; + goto out; + } + } + + while(1) { + wret = defrag_walk_down(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = defrag_walk_up(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + if (num_runs++ > 8) { + ret = -EAGAIN; + break; + } + } + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; + } + } +out: + if (path) + btrfs_free_path(path); + if (ret != -EAGAIN) { + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + } + return ret; +} -- cgit v1.2.3-70-g09d2 From 26b8003f10569a9155b7539ef5a7379ee0c6b050 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 20:17:12 -0400 Subject: Btrfs: Replace extent tree preallocation code with some bit radix magic. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 230 +++++++++---------------------------------------- fs/btrfs/transaction.c | 2 + fs/btrfs/transaction.h | 2 + 5 files changed, 49 insertions(+), 195 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8b7d01597f4..59e09e37ab9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -299,12 +299,7 @@ struct btrfs_fs_info { struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; - - u64 extent_tree_insert[BTRFS_MAX_LEVEL * 6]; - int extent_tree_insert_nr; - u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 6]; - int extent_tree_prealloc_nr; - + struct radix_tree_root extent_ins_radix; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c948416fea3..192211274ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,6 +427,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); + init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); @@ -443,8 +444,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; - fs_info->extent_tree_insert_nr = 0; - fs_info->extent_tree_prealloc_nr = 0; fs_info->closing = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f7d76d34fc3..ba50bd7b9a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,12 +22,6 @@ #include "print-tree.h" #include "transaction.h" -static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, - u64 search_end, u64 hint_block, - struct btrfs_key *ins, u64 exclude_start, - u64 exclude_nr, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -174,9 +168,6 @@ static u64 find_search_start(struct btrfs_root *root, if (cache->data) goto out; - if (num > 1) { - last = max(last, cache->last_prealloc); - } again: ret = cache_block_group(root, cache); if (ret) @@ -374,18 +365,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_leaf *l; struct btrfs_extent_item *item; - struct btrfs_key ins; u32 refs; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, - (u64)-1, 0, &ins, 0, 0, 0); - if (ret) { - btrfs_free_path(path); - return ret; - } + key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -532,13 +517,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_block_group_item *bi; - struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, - 0, 0, 0); - /* FIXME, set bit to recalc cache groups on next mount */ - if (ret) - return ret; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; @@ -706,14 +685,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -static int try_remove_page(struct address_space *mapping, unsigned long index) -{ - int ret; - return 0; - ret = invalidate_mapping_pages(mapping, index, index); - return ret; -} - int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) { unsigned long gang[8]; @@ -732,6 +703,9 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) last = gang[i] + 1; } } + ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0, + ARRAY_SIZE(gang)); + WARN_ON(ret); return 0; } @@ -740,7 +714,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct radix_tree_root *unpin_radix) { unsigned long gang[8]; - struct inode *btree_inode = root->fs_info->btree_inode; struct btrfs_block_group_cache *block_group; u64 first = 0; int ret; @@ -765,14 +738,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (gang[i] < block_group->last_prealloc) - block_group->last_prealloc = gang[i]; if (!block_group->data) set_radix_bit(extent_radix, gang[i]); } - try_remove_page(btree_inode->i_mapping, - gang[i] << (PAGE_CACHE_SHIFT - - btree_inode->i_blkbits)); } } return 0; @@ -785,7 +753,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_extent_item extent_item; int i; int ret; - u64 super_blocks_used; + int err; + unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); @@ -794,16 +763,21 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); - for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { - ins.objectid = extent_root->fs_info->extent_tree_insert[i]; - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used + 1); - ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, - sizeof(extent_item)); - BUG_ON(ret); + while(1) { + ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + + for (i = 0; i < ret; i++) { + ins.objectid = gang[i]; + err = btrfs_insert_item(trans, extent_root, &ins, + &extent_item, + sizeof(extent_item)); + clear_radix_bit(&info->extent_ins_radix, gang[i]); + WARN_ON(err); + } } - extent_root->fs_info->extent_tree_insert_nr = 0; return 0; } @@ -856,7 +830,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root *extent_root = info->extent_root; int ret; struct btrfs_extent_item *ei; - struct btrfs_key ins; u32 refs; key.objectid = blocknr; @@ -868,12 +841,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); - if (ret) { - btrfs_free_path(path); - return ret; - } - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret < 0) return ret; @@ -1003,35 +970,17 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; - int total_found = 0; - int fill_prealloc = 0; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; u64 limit; + WARN_ON(num_blocks < 1); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - if (num_blocks == 0) { - fill_prealloc = 1; - num_blocks = 1; - total_needed = (min(level + 1, BTRFS_MAX_LEVEL)) * 6; - } - if (fill_prealloc) { - u64 first; - int nr = info->extent_tree_prealloc_nr; - first = info->extent_tree_prealloc[nr - 1]; - if (info->extent_tree_prealloc_nr >= total_needed && - first >= search_start) { - ins->objectid = info->extent_tree_prealloc[0]; - ins->offset = 1; - return 0; - } - info->extent_tree_prealloc_nr = 0; - } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1091,10 +1040,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (fill_prealloc) { - info->extent_tree_prealloc_nr = 0; - total_found = 0; - } if (start_found) limit = last_block + (block_group->key.offset >> 1); @@ -1170,67 +1115,21 @@ check_pending: } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { - if (test_radix_bit(&info->pinned_radix, test_block)) { + if (test_radix_bit(&info->pinned_radix, test_block) || + test_radix_bit(&info->extent_ins_radix, test_block)) { search_start = test_block + 1; goto new_group; } } - if (!fill_prealloc && info->extent_tree_insert_nr) { - u64 last = - info->extent_tree_insert[info->extent_tree_insert_nr - 1]; - if (ins->objectid + num_blocks > - info->extent_tree_insert[0] && - ins->objectid <= last) { - search_start = last + 1; - WARN_ON(!full_scan); - goto new_group; - } - } - if (!fill_prealloc && info->extent_tree_prealloc_nr) { - u64 first = - info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1]; - if (ins->objectid + num_blocks > first && - ins->objectid <= info->extent_tree_prealloc[0]) { - search_start = info->extent_tree_prealloc[0] + 1; - goto new_group; - } - } if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (fill_prealloc) { - int nr; - test_block = ins->objectid; - if (test_block - info->extent_tree_prealloc[total_needed - 1] >= - leaf_range(root)) { - total_found = 0; - info->extent_tree_prealloc_nr = total_found; - } - while(test_block < ins->objectid + ins->offset && - total_found < total_needed) { - nr = total_needed - total_found - 1; - BUG_ON(nr < 0); - info->extent_tree_prealloc[nr] = test_block; - total_found++; - test_block++; - } - if (total_found < total_needed) { - search_start = test_block; - goto new_group; - } - info->extent_tree_prealloc_nr = total_found; - } if (!data) { block_group = btrfs_lookup_block_group(info, ins->objectid); - if (block_group) { - if (fill_prealloc) - block_group->last_prealloc = - info->extent_tree_prealloc[total_needed-1]; - else - trans->block_group = block_group; - } + if (block_group) + trans->block_group = block_group; } ins->offset = num_blocks; btrfs_free_path(path); @@ -1278,85 +1177,41 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_blocks_used; u64 search_start = 0; - u64 exclude_start = 0; - u64 exclude_nr = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - struct btrfs_key prealloc_key; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); - if (root == extent_root) { - int nr; - BUG_ON(info->extent_tree_prealloc_nr == 0); - BUG_ON(num_blocks != 1); - ins->offset = 1; - info->extent_tree_prealloc_nr--; - nr = info->extent_tree_prealloc_nr; - ins->objectid = info->extent_tree_prealloc[nr]; - info->extent_tree_insert[info->extent_tree_insert_nr++] = - ins->objectid; - ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1, 0, 0); - WARN_ON(info->extent_tree_insert_nr > - ARRAY_SIZE(info->extent_tree_insert)); - BUG_ON(ret); - return 0; - } - - /* - * if we're doing a data allocation, preallocate room in the - * extent tree first. This way the extent tree blocks end up - * in the correct block group. - */ - if (data) { - ret = find_free_extent(trans, root, 0, 0, 0, - search_end, 0, &prealloc_key, 0, 0, 0); - BUG_ON(ret); - if (ret) - return ret; - exclude_nr = info->extent_tree_prealloc_nr; - exclude_start = info->extent_tree_prealloc[exclude_nr - 1]; - } - - /* do the real allocation */ + WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, search_start, search_end, hint_block, ins, - exclude_start, exclude_nr, data); + trans->alloc_exclude_start, + trans->alloc_exclude_nr, data); BUG_ON(ret); if (ret) return ret; - /* - * if we're doing a metadata allocation, preallocate space in the - * extent tree second. This way, we don't create a tiny hole - * in the allocation map between any unused preallocation blocks - * and the metadata block we're actually allocating. On disk, - * it'll go: - * [block we've allocated], [used prealloc 1], [ unused prealloc ] - * The unused prealloc will get reused the next time around. - */ - if (!data) { - exclude_start = ins->objectid; - exclude_nr = ins->offset; - hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, 0, search_start, - search_end, hint_block, - &prealloc_key, exclude_start, - exclude_nr, 0); - BUG_ON(ret); - if (ret) - return ret; - } - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + + if (root == extent_root) { + BUG_ON(num_blocks != 1); + set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); + goto update_block; + } + + WARN_ON(trans->alloc_exclude_nr); + trans->alloc_exclude_start = ins->objectid; + trans->alloc_exclude_nr = ins->offset; ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); + trans->alloc_exclude_start = 0; + trans->alloc_exclude_nr = 0; + BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); @@ -1366,6 +1221,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (pending_ret) { return pending_ret; } + +update_block: ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); BUG_ON(ret); @@ -1750,7 +1607,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; - cache->last_prealloc = cache->key.objectid; cache->pinned = 0; cache->cached = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88b0752550b..e64ecec3f52 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -109,6 +109,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_reserved = num_blocks; h->blocks_used = 0; h->block_group = NULL; + h->alloc_exclude_nr = 0; + h->alloc_exclude_start = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); return h; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 8b2714e6556..65a395eeca9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -39,6 +39,8 @@ struct btrfs_trans_handle { unsigned long blocks_used; struct btrfs_transaction *transaction; struct btrfs_block_group_cache *block_group; + u64 alloc_exclude_start; + u64 alloc_exclude_nr; }; -- cgit v1.2.3-70-g09d2 From e9d0b13b5bbb58c9b840e407a8d181442f799966 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Aug 2007 14:06:19 -0400 Subject: Btrfs: Btree defrag on the extent-mapping tree as well Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 ++++++---- fs/btrfs/ctree.h | 4 ++- fs/btrfs/extent-tree.c | 28 +++++++++++++++- fs/btrfs/inode.c | 23 ++----------- fs/btrfs/super.c | 2 ++ fs/btrfs/transaction.c | 91 +++++++++++++++++++++++++++----------------------- fs/btrfs/transaction.h | 2 ++ fs/btrfs/tree-defrag.c | 36 ++++++++++++++++---- 8 files changed, 125 insertions(+), 79 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c7e47e77723..ee1ae00d282 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -99,7 +99,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; } else { - WARN_ON(!root->ref_cows); clean_tree_block(trans, root, buf); } @@ -162,13 +161,14 @@ static int close_blocks(u64 blocknr, u64 other) int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only) + int cache_only, u64 *last_ret) { struct btrfs_node *parent_node; struct buffer_head *cur_bh; struct buffer_head *tmp_bh; u64 blocknr; - u64 search_start = 0; + u64 search_start = *last_ret; + u64 last_block = 0; u64 other; u32 parent_nritems; int start_slot; @@ -198,6 +198,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; blocknr = btrfs_node_blockptr(parent_node, i); + if (last_block == 0) + last_block = blocknr; if (i > 0) { other = btrfs_node_blockptr(parent_node, i - 1); close = close_blocks(blocknr, other); @@ -206,8 +208,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent_node, i + 1); close = close_blocks(blocknr, other); } - if (close) + if (close) { + last_block = blocknr; continue; + } cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || @@ -219,9 +223,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(cur_bh); cur_bh = read_tree_block(root, blocknr); } - if (search_start == 0) { - search_start = bh_blocknr(cur_bh) & ~((u64)65535); - } + if (search_start == 0) + search_start = last_block & ~((u64)65535); + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, &tmp_bh, search_start, min(8, end_slot - i)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 59e09e37ab9..d3cd564b3b3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1019,6 +1019,8 @@ static inline void btrfs_memmove(struct btrfs_root *root, btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, @@ -1066,7 +1068,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only); + int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 120c448f58f..3418bb62b99 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -396,6 +396,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return 0; } +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return 0; +} + static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) @@ -1374,7 +1382,25 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); continue; } - next = read_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next)) { + brelse(next); + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, blocknr); + mutex_lock(&root->fs_info->fs_mutex); + + /* we dropped the lock, check one more time */ + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { + path->slots[*level]++; + brelse(next); + ret = btrfs_free_extent(trans, root, + blocknr, 1, 1); + BUG_ON(ret); + continue; + } + } WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12aa043b9f6..5c05ecbc572 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2159,9 +2159,7 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - struct btrfs_trans_handle *trans; int ret = 0; - int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2201,25 +2199,8 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int case BTRFS_IOC_DEFRAG: mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - memset(&root->defrag_progress, 0, - sizeof(root->defrag_progress)); - while (1) { - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 0); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - btrfs_btree_balance_dirty(root); - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - btrfs_end_transaction(trans, root); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); mutex_unlock(&root->fs_info->fs_mutex); ret = 0; break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e797d5fb28..74f3de47423 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -121,7 +121,9 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } + btrfs_clean_old_snapshots(root); mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 204337c5ca0..c9d52dc83e4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -317,18 +317,47 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) +{ + struct btrfs_fs_info *info = root->fs_info; + int ret; + struct btrfs_trans_handle *trans; + + if (root->defrag_running) + return 0; + + trans = btrfs_start_transaction(root, 1); + while (1) { + root->defrag_running = 1; + ret = btrfs_defrag_leaves(trans, root, cacheonly); + btrfs_end_transaction(trans, root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + cond_resched(); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (ret != -EAGAIN) + break; + } + root->defrag_running = 0; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) { struct btrfs_root *gang[1]; struct btrfs_root *root; - struct btrfs_root *tree_root = info->tree_root; - struct btrfs_trans_handle *trans; int i; int ret; int err = 0; u64 last = 0; - trans = btrfs_start_transaction(tree_root, 1); while(1) { ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, (void **)gang, last, @@ -339,37 +368,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); - if (root->defrag_running) - continue; - - while (1) { - mutex_lock(&root->fs_info->trans_mutex); - record_root_in_trans(root); - mutex_unlock(&root->fs_info->trans_mutex); - - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 1); - btrfs_end_transaction(trans, tree_root); - mutex_unlock(&info->fs_mutex); - - btrfs_btree_balance_dirty(root); - cond_resched(); - - mutex_lock(&info->fs_mutex); - trans = btrfs_start_transaction(tree_root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); + btrfs_defrag_root(root, 1); } } - btrfs_end_transaction(trans, tree_root); + btrfs_defrag_root(info->extent_root, 1); return err; } @@ -527,6 +529,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +int btrfs_clean_old_snapshots(struct btrfs_root *root) +{ + struct list_head dirty_roots; + INIT_LIST_HEAD(&dirty_roots); + + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } + return 0; +} void btrfs_transaction_cleaner(struct work_struct *work) { struct btrfs_fs_info *fs_info = container_of(work, @@ -536,12 +552,10 @@ void btrfs_transaction_cleaner(struct work_struct *work) struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; - struct list_head dirty_roots; unsigned long now; unsigned long delay = HZ * 30; int ret; - INIT_LIST_HEAD(&dirty_roots); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -561,14 +575,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) ret = btrfs_commit_transaction(trans, root); out: mutex_unlock(&root->fs_info->fs_mutex); - - mutex_lock(&root->fs_info->trans_mutex); - list_splice_init(&root->fs_info->dead_roots, &dirty_roots); - mutex_unlock(&root->fs_info->trans_mutex); - - if (!list_empty(&dirty_roots)) { - drop_dirty_roots(root, &dirty_roots); - } + btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 65a395eeca9..d5f491d3757 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,5 +73,7 @@ void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); +int btrfs_clean_old_snapshots(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 7ea66b4aa5c..a09064a9a41 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -42,16 +42,20 @@ static void reada_defrag(struct btrfs_root *root, static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, - int cache_only) + int cache_only, u64 *last_ret) { struct buffer_head *next; struct buffer_head *cur; u64 blocknr; int ret = 0; + int is_extent = 0; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + if (root->fs_info->extent_root == root) + is_extent = 1; + while(*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); @@ -70,7 +74,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (*level == 1) { ret = btrfs_realloc_node(trans, root, path->nodes[*level], - cache_only); + cache_only, last_ret); + if (is_extent) + btrfs_extent_post_op(trans, root); + break; } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), @@ -90,8 +97,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, next, cache_only); + ret = btrfs_realloc_node(trans, root, next, cache_only, + last_ret); BUG_ON(ret); + + if (is_extent) + btrfs_extent_post_op(trans, root); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -148,10 +160,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int level; int orig_level; int i; + int is_extent = 0; + u64 last_ret = 0; + + if (root->fs_info->extent_root == root) + is_extent = 1; - if (root->ref_cows == 0) { + if (root->ref_cows == 0 && !is_extent) goto out; - } path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -165,16 +181,21 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, get_bh(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, root->node, cache_only); + ret = btrfs_realloc_node(trans, root, root->node, cache_only, + &last_ret); BUG_ON(ret); path->nodes[level] = root->node; path->slots[level] = 0; + if (is_extent) + btrfs_extent_post_op(trans, root); } else { level = root->defrag_level; path->lowest_level = level; wret = btrfs_search_slot(trans, root, &root->defrag_progress, path, 0, 1); + if (is_extent) + btrfs_extent_post_op(trans, root); if (wret < 0) { ret = wret; goto out; @@ -188,7 +209,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } while(1) { - wret = defrag_walk_down(trans, root, path, &level, cache_only); + wret = defrag_walk_down(trans, root, path, &level, cache_only, + &last_ret); if (wret > 0) break; if (wret < 0) -- cgit v1.2.3-70-g09d2 From 15ee9bc7ed171248d1405df5854da5fa91bfdc39 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 10 Aug 2007 16:22:09 -0400 Subject: Btrfs: delay commits during fsync to allow more writers Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 5 +++++ fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/file.c | 22 ++++++++++++++++++---- fs/btrfs/inode.c | 2 ++ fs/btrfs/transaction.c | 44 ++++++++++++++++++++++++++++++++------------ fs/btrfs/transaction.h | 7 +++++++ 7 files changed, 66 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6bce46be49f..f446f161f73 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -25,6 +25,11 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct inode vfs_inode; + + /* + * transid of the trans_handle that last modified this inode + */ + u64 last_trans; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d3cd564b3b3..c7f5161271d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -301,6 +301,7 @@ struct btrfs_fs_info { struct radix_tree_root extent_map_radix; struct radix_tree_root extent_ins_radix; u64 generation; + u64 last_trans_committed; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 192211274ce..b2f79878d51 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -435,6 +435,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->dead_roots); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; + fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 00b118a2db6..6933ab11a5c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -694,22 +694,36 @@ static int btrfs_sync_file(struct file *file, { struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; + int ret = 0; struct btrfs_trans_handle *trans; /* - * FIXME, use inode generation number to check if we can skip the - * commit + * check the transaction that last modified this inode + * and see if its already been committed */ mutex_lock(&root->fs_info->fs_mutex); + if (!BTRFS_I(inode)->last_trans) + goto out; + mutex_lock(&root->fs_info->trans_mutex); + if (BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed) { + BTRFS_I(inode)->last_trans = 0; + mutex_unlock(&root->fs_info->trans_mutex); + goto out; + } + mutex_unlock(&root->fs_info->trans_mutex); + + /* + * ok we haven't committed the transaction yet, lets do a commit + */ trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; goto out; } ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); out: + mutex_unlock(&root->fs_info->fs_mutex); return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5c05ecbc572..398484179d8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -193,6 +193,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, fill_inode_item(inode_item, inode); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: btrfs_release_path(root, path); @@ -2234,6 +2235,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; + ei->last_trans = 0; return &ei->vfs_inode; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c9d52dc83e4..18abea80279 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -55,7 +55,8 @@ static int join_transaction(struct btrfs_root *root) BUG_ON(!cur_trans); root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; - cur_trans->num_writers = 0; + cur_trans->num_writers = 1; + cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); @@ -65,8 +66,11 @@ static int join_transaction(struct btrfs_root *root) cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); init_bit_radix(&cur_trans->dirty_pages); + } else { + cur_trans->num_writers++; + cur_trans->num_joined++; } - cur_trans->num_writers++; + return 0; } @@ -428,12 +432,14 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret = 0; + unsigned long joined = 0; + unsigned long timeout = 1; struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; struct radix_tree_root pinned_copy; DEFINE_WAIT(wait); + int ret; init_bit_radix(&pinned_copy); INIT_LIST_HEAD(&dirty_fs_roots); @@ -448,7 +454,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); + + mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->fs_mutex); return 0; } @@ -463,26 +473,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, prev_trans); - put_transaction(prev_trans); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); + put_transaction(prev_trans); } } - while (trans->transaction->num_writers > 1) { + + do { + joined = cur_trans->num_joined; WARN_ON(cur_trans != trans->transaction); - prepare_to_wait(&trans->transaction->writer_wait, &wait, + prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); - if (trans->transaction->num_writers <= 1) - break; + + if (cur_trans->num_writers > 1) + timeout = MAX_SCHEDULE_TIMEOUT; + else + timeout = 1; + mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); - schedule(); + + schedule_timeout(timeout); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); - finish_wait(&trans->transaction->writer_wait, &wait); - } - finish_wait(&trans->transaction->writer_wait, &wait); + finish_wait(&cur_trans->writer_wait, &wait); + } while (cur_trans->num_writers > 1 || + (cur_trans->num_joined != joined)); + WARN_ON(cur_trans != trans->transaction); ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); @@ -511,6 +530,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root, &pinned_copy); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; + root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d5f491d3757..e451783a1a4 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -23,6 +23,7 @@ struct btrfs_transaction { u64 transid; unsigned long num_writers; + unsigned long num_joined; int in_commit; int use_count; int commit_done; @@ -57,6 +58,12 @@ static inline void btrfs_update_inode_block_group(struct BTRFS_I(inode)->block_group = trans->block_group; } +static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + BTRFS_I(inode)->last_trans = trans->transaction->transid; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, -- cgit v1.2.3-70-g09d2 From 5ce14bbcdd1b5d9233d26a1e89faf3a26c820c58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Sep 2007 11:15:39 -0400 Subject: Btrfs: Find and remove dead roots the first time a root is loaded. Dead roots are trees left over after a crash, and they were either in the process of being removed or were waiting to be removed when the box crashed. Before, a search of the entire tree of root pointers was done on mount looking for dead roots. Now, the search is done the first time we load a root. This makes mount faster when there are a large number of snapshots, and it enables the block accounting code to properly update the block counts on the latest root as old versions of the root are reaped after a crash. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/disk-io.c | 9 ++++----- fs/btrfs/root-tree.c | 16 +++++++++++++--- fs/btrfs/transaction.c | 6 ++++-- fs/btrfs/transaction.h | 3 ++- 5 files changed, 25 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03e1c4ad341..6d6e94b6380 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1148,7 +1148,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); -int btrfs_find_dead_roots(struct btrfs_root *root); +int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, + struct btrfs_root *latest_root); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c953a0e0aa..4296839eea1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -426,6 +426,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + return root; } @@ -522,11 +526,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - ret = btrfs_find_dead_roots(tree_root); - if (ret) { - mutex_unlock(&fs_info->fs_mutex); - goto fail_tree_root; - } mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 402f67821c1..3b5926dfbeb 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -93,7 +93,8 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -int btrfs_find_dead_roots(struct btrfs_root *root) +int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, + struct btrfs_root *latest) { struct btrfs_root *dead_root; struct btrfs_item *item; @@ -105,7 +106,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root) struct btrfs_leaf *leaf; int slot; - key.objectid = 0; + key.objectid = objectid; key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; @@ -131,15 +132,24 @@ int btrfs_find_dead_roots(struct btrfs_root *root) btrfs_disk_key_to_cpu(&key, &item->key); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; + + if (key.objectid < objectid) + goto next; + + if (key.objectid > objectid) + break; + ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); if (btrfs_root_refs(ri) != 0) goto next; + dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); if (IS_ERR(dead_root)) { ret = PTR_ERR(dead_root); goto err; } - ret = btrfs_add_dead_root(dead_root, + + ret = btrfs_add_dead_root(dead_root, latest, &root->fs_info->dead_roots); if (ret) goto err; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b15daa3a9f..29755593de6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -239,7 +239,9 @@ struct dirty_root { struct btrfs_root *latest_root; }; -int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) +int btrfs_add_dead_root(struct btrfs_root *root, + struct btrfs_root *latest, + struct list_head *dead_list) { struct dirty_root *dirty; @@ -247,6 +249,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) if (!dirty) return -ENOMEM; dirty->root = root; + dirty->latest_root = latest; list_add(&dirty->list, dead_list); return 0; } @@ -412,7 +415,6 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, while(1) { trans = btrfs_start_transaction(tree_root, 1); - ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) { break; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e451783a1a4..4bc328cbb24 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -78,7 +78,8 @@ void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); -int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, + struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); -- cgit v1.2.3-70-g09d2 From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:19 -0400 Subject: Btrfs: Create extent_buffer interface for large blocksizes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 + fs/btrfs/ctree.c | 1484 ++++++++++++++++++++++++++---------------------- fs/btrfs/ctree.h | 849 ++++++++++----------------- fs/btrfs/dir-item.c | 102 ++-- fs/btrfs/disk-io.c | 391 ++++++------- fs/btrfs/disk-io.h | 55 +- fs/btrfs/extent-tree.c | 294 +++++----- fs/btrfs/extent_map.c | 672 +++++++++++++++++++++- fs/btrfs/extent_map.h | 75 ++- fs/btrfs/file-item.c | 63 +- fs/btrfs/file.c | 129 +++-- fs/btrfs/inode-item.c | 22 +- fs/btrfs/inode-map.c | 17 +- fs/btrfs/inode.c | 435 ++++++++------ fs/btrfs/print-tree.c | 96 ++-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/root-tree.c | 51 +- fs/btrfs/super.c | 7 +- fs/btrfs/sysfs.c | 10 +- fs/btrfs/transaction.c | 61 +- fs/btrfs/transaction.h | 5 +- fs/btrfs/tree-defrag.c | 58 +- 22 files changed, 2794 insertions(+), 2088 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6341cf7a9f9..a3b51085d7f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -23,4 +23,6 @@ modules_install: clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean +tester: + $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b41f48ade41..f60920e8a0e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,21 +16,24 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size); -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst, struct buffer_head - *src); -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf); +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src); +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst_buf, + struct extent_buffer *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -62,40 +65,38 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - btrfs_block_release(root, p->nodes[i]); + free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret, u64 search_start, u64 empty_size) +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size) { - struct buffer_head *cow; - struct btrfs_node *cow_node; + struct extent_buffer *cow; int ret = 0; int different_trans = 0; WARN_ON(root->ref_cows && trans->transid != root->last_trans); - WARN_ON(!buffer_uptodate(buf)); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); - cow_node = btrfs_buffer_node(cow); - if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); - memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); - btrfs_set_header_generation(&cow_node->header, trans->transid); - btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, root->root_key.objectid); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > - trans->transid); - if (btrfs_header_generation(btrfs_buffer_header(buf)) != - trans->transid) { + WARN_ON(btrfs_header_generation(buf) > trans->transid); + if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; ret = btrfs_inc_ref(trans, root, buf); if (ret) @@ -106,29 +107,29 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf == root->node) { root->node = cow; - get_bh(cow); + extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, + extent_buffer_blocknr(buf), 1, 1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); } else { - btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - bh_blocknr(cow)); + btrfs_set_node_blockptr(parent, parent_slot, + extent_buffer_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != - trans->transid); - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + WARN_ON(btrfs_header_generation(parent) != trans->transid); + btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret) +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret) { u64 search_start; if (trans->transaction != root->fs_info->running_transaction) { @@ -141,13 +142,12 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { + if (btrfs_header_generation(buf) == trans->transid) { *cow_ret = buf; return 0; } - search_start = bh_blocknr(buf) & ~((u64)65535); + search_start = extent_buffer_blocknr(buf) & ~((u64)65535); return __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); } @@ -161,9 +161,11 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -static int should_defrag_leaf(struct buffer_head *bh) +#if 0 +static int should_defrag_leaf(struct extent_buffer *eb) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + return 0; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); struct btrfs_disk_key *key; u32 nritems; @@ -188,14 +190,17 @@ static int should_defrag_leaf(struct buffer_head *bh) } return 0; } +#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { + return 0; +#if 0 struct btrfs_node *parent_node; - struct buffer_head *cur_bh; - struct buffer_head *tmp_bh; + struct extent_buffer *cur_eb; + struct extent_buffer *tmp_eb; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -281,6 +286,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(tmp_bh); } return err; +#endif } /* @@ -289,12 +295,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * which is the stop of the leaf data stack */ static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct btrfs_leaf *leaf) + struct extent_buffer *leaf) { - u32 nr = btrfs_header_nritems(&leaf->header); + u32 nr = btrfs_header_nritems(leaf); if (nr == 0) return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset(leaf->items + nr - 1); + return btrfs_item_offset_nr(leaf, nr - 1); } /* @@ -310,9 +316,9 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) + if (k1.type > k2->type) return 1; - if (k1.flags < k2->flags) + if (k1.type < k2->type) return -1; if (k1.offset > k2->offset) return 1; @@ -324,37 +330,39 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *parent = NULL; - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + struct extent_buffer *parent = NULL; + struct extent_buffer *node = path->nodes[level]; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key node_key; int parent_slot; int slot; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(&node->header); + u32 nritems = btrfs_header_nritems(node); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); + parent = path->nodes[level + 1]; slot = path->slots[level]; - BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; - BUG_ON(memcmp(parent_key, &node->ptrs[0].key, + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_node_key(node, &node_key, 0); + BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&node->header)); + btrfs_header_blocknr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot - 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) <= 0); } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot + 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) >= 0); } return 0; } @@ -362,83 +370,172 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); - struct btrfs_node *parent = NULL; + struct extent_buffer *leaf = path->nodes[level]; + struct extent_buffer *parent = NULL; int parent_slot; - int slot = path->slots[0]; struct btrfs_key cpukey; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key leaf_key; + int slot = path->slots[0]; - u32 nritems = btrfs_header_nritems(&leaf->header); + u32 nritems = btrfs_header_nritems(leaf); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); - - BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); + parent = path->nodes[level + 1]; if (nritems == 0) return 0; if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_item_key(leaf, &leaf_key, 0); - BUG_ON(memcmp(parent_key, &leaf->items[0].key, + BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&leaf->header)); + btrfs_header_blocknr(leaf)); + } +#if 0 + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); + btrfs_item_key(leaf, &leaf_key, i); + if (comp_keys(&leaf_key, &cpukey) >= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", i); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, i) != + btrfs_item_end_nr(leaf, i + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", i); + BUG_ON(1); + } + if (i == 0) { + if (btrfs_item_offset_nr(leaf, i) + + btrfs_item_size_nr(leaf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_print_leaf(root, leaf); + printk("slot %d first offset bad\n", i); + BUG_ON(1); + } + } } - if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != - btrfs_item_end(leaf->items + slot)); + if (nritems > 0) { + if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { + btrfs_print_leaf(root, leaf); + printk("slot %d bad size \n", nritems - 1); + BUG_ON(1); + } + } +#endif + if (slot != 0 && slot < nritems - 1) { + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); + if (comp_keys(&leaf_key, &cpukey) <= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", slot); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, slot - 1) != + btrfs_item_end_nr(leaf, slot)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot) != - btrfs_item_end(leaf->items + slot + 1)); + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); + BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); + if (btrfs_item_offset_nr(leaf, slot) != + btrfs_item_end_nr(leaf, slot + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } - BUG_ON(btrfs_item_offset(leaf->items) + - btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); + BUG_ON(btrfs_item_offset_nr(leaf, 0) + + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); - if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, - sizeof(node->header.fsid))) - BUG(); + struct extent_buffer *buf = path->nodes[level]; + char fsid[BTRFS_FSID_SIZE]; + + read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE); + + if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + int i = 0; + printk("warning bad block %Lu\n", buf->start); + if (!btrfs_buffer_uptodate(buf)) { + WARN_ON(1); + } + for (i = 0; i < BTRFS_FSID_SIZE; i++) { + printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); + } + printk("\n"); + // BUG(); + } if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); } /* - * search for key in the array p. items p are item_size apart - * and there are 'max' items in p + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * * the slot in the array is returned via slot, and it points to * the place where you would insert key if it is not found in * the array. * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, - int max, int *slot) +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; int mid; int ret; struct btrfs_disk_key *tmp; + struct btrfs_disk_key unaligned; + unsigned long offset; + char *map_token = NULL; + char *kaddr = NULL; + unsigned long map_start = 0; + unsigned long map_len = 0; while(low < high) { mid = (low + high) / 2; - tmp = (struct btrfs_disk_key *)(p + mid * item_size); + offset = p + mid * item_size; + + if (!map_token || offset < map_start || + (offset + sizeof(struct btrfs_disk_key)) > + map_start + map_len) { + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); + map_extent_buffer(eb, offset, &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + } + if (offset + sizeof(struct btrfs_disk_key) > + map_start + map_len) { + unmap_extent_buffer(eb, map_token, KM_USER0); + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + map_token = NULL; + tmp = &unaligned; + } else { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } ret = comp_keys(tmp, key); if (ret < 0) @@ -447,10 +544,13 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, high = mid; else { *slot = mid; + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -458,46 +558,42 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) { - if (btrfs_is_leaf(c)) { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; - return generic_bin_search((void *)l->items, + if (level == 0) { + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } else { - return generic_bin_search((void *)c->ptrs, + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } return -1; } -static struct buffer_head *read_node_slot(struct btrfs_root *root, - struct buffer_head *parent_buf, - int slot) +static struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot) { - struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; - if (slot >= btrfs_header_nritems(&node->header)) + if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(node, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -508,60 +604,57 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (level == 0) return 0; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; /* * deal with the case where there is only one pointer in the root * by promoting the node below to a root */ - if (!parent_buf) { - struct buffer_head *child; - u64 blocknr = bh_blocknr(mid_buf); + if (!parent) { + struct extent_buffer *child; + u64 blocknr = extent_buffer_blocknr(mid); - if (btrfs_header_nritems(&mid->header) != 1) + if (btrfs_header_nritems(mid) != 1) return 0; /* promote the child to a root */ - child = read_node_slot(root, mid_buf, 0); + child = read_node_slot(root, mid, 0); BUG_ON(!child); root->node = child; path->nodes[level] = NULL; - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); /* once for the path */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); /* once for the root ptr */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = btrfs_buffer_node(parent_buf); - - if (btrfs_header_nritems(&mid->header) > + if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; - left_buf = read_node_slot(root, parent_buf, pslot - 1); - if (left_buf) { - wret = btrfs_cow_block(trans, root, left_buf, - parent_buf, pslot - 1, &left_buf); + left = read_node_slot(root, parent, pslot - 1); + if (left) { + wret = btrfs_cow_block(trans, root, left, + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; } } - right_buf = read_node_slot(root, parent_buf, pslot + 1); - if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); + right = read_node_slot(root, parent, pslot + 1); + if (right) { + wret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -569,30 +662,27 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* first, try to make some room in the middle buffer */ - if (left_buf) { - left = btrfs_buffer_node(left_buf); - orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(trans, root, left_buf, mid_buf); + if (left) { + orig_slot += btrfs_header_nritems(left); + wret = push_node_left(trans, root, left, mid); if (wret < 0) ret = wret; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ - if (right_buf) { - right = btrfs_buffer_node(right_buf); - wret = push_node_left(trans, root, mid_buf, right_buf); + if (right) { + wret = push_node_left(trans, root, mid, right); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = bh_blocknr(right_buf); - clean_tree_block(trans, root, right_buf); - wait_on_buffer(right_buf); - btrfs_block_release(root, right_buf); - right_buf = NULL; + if (btrfs_header_nritems(right) == 0) { + u64 blocknr = extent_buffer_blocknr(right); + clean_tree_block(trans, root, right); + wait_on_tree_block_writeback(root, right); + free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + 1); @@ -602,14 +692,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key right_key; + btrfs_node_key(right, &right_key, 0); + btrfs_set_node_key(parent, &right_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); } } - if (btrfs_header_nritems(&mid->header) == 1) { + if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -619,21 +708,20 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * otherwise we would have pulled some pointers from the * right */ - BUG_ON(!left_buf); - wret = balance_node_right(trans, root, mid_buf, left_buf); + BUG_ON(!left); + wret = balance_node_right(trans, root, mid, left); if (wret < 0) { ret = wret; goto enospc; } BUG_ON(wret == 1); } - if (btrfs_header_nritems(&mid->header) == 0) { + if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = bh_blocknr(mid_buf); - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); - btrfs_block_release(root, mid_buf); - mid_buf = NULL; + u64 blocknr = extent_buffer_blocknr(mid); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) @@ -643,37 +731,36 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key mid_key; + btrfs_node_key(mid, &mid_key, 0); + btrfs_set_node_key(parent, &mid_key, pslot); + btrfs_mark_buffer_dirty(parent); } /* update the path */ - if (left_buf) { - if (btrfs_header_nritems(&left->header) > orig_slot) { - get_bh(left_buf); - path->nodes[level] = left_buf; + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { + extent_buffer_get(left); + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid_buf) - btrfs_block_release(root, mid_buf); + if (mid) + free_extent_buffer(mid); } else { - orig_slot -= btrfs_header_nritems(&left->header); + orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; } } /* double check we haven't messed things up */ check_block(root, path, level); if (orig_ptr != - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), - path->slots[level])) + btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right_buf) - btrfs_block_release(root, right_buf); - if (left_buf) - btrfs_block_release(root, left_buf); + if (right) + free_extent_buffer(right); + if (left) + free_extent_buffer(left); return ret; } @@ -682,14 +769,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -699,107 +782,101 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (level == 0) return 1; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; - if (!parent_buf) + if (!parent) return 1; - parent = btrfs_buffer_node(parent_buf); - left_buf = read_node_slot(root, parent_buf, pslot - 1); + left = read_node_slot(root, parent, pslot - 1); /* first, try to make some room in the middle buffer */ - if (left_buf) { + if (left) { u32 left_nr; - left = btrfs_buffer_node(left_buf); - left_nr = btrfs_header_nritems(&left->header); + left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); + ret = btrfs_cow_block(trans, root, left, parent, + pslot - 1, &left); if (ret) wret = 1; else { - left = btrfs_buffer_node(left_buf); wret = push_node_left(trans, root, - left_buf, mid_buf); + left, mid); } } if (wret < 0) ret = wret; if (wret == 0) { + struct btrfs_disk_key disk_key; orig_slot += left_nr; - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, - &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&left->header) > orig_slot) { - path->nodes[level] = left_buf; + btrfs_node_key(mid, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot); + btrfs_mark_buffer_dirty(parent); + if (btrfs_header_nritems(left) > orig_slot) { + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); } else { orig_slot -= - btrfs_header_nritems(&left->header); + btrfs_header_nritems(left); path->slots[level] = orig_slot; - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } check_node(root, path, level); return 0; } - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } - right_buf = read_node_slot(root, parent_buf, pslot + 1); + right= read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ - if (right_buf) { + if (right) { u32 right_nr; - right = btrfs_buffer_node(right_buf); - right_nr = btrfs_header_nritems(&right->header); + right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, - &right_buf); + ret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, + &right); if (ret) wret = 1; else { - right = btrfs_buffer_node(right_buf); wret = balance_node_right(trans, root, - right_buf, mid_buf); + right, mid); } } if (wret < 0) ret = wret; if (wret == 0) { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&mid->header) <= orig_slot) { - path->nodes[level] = right_buf; + struct btrfs_disk_key disk_key; + + btrfs_node_key(right, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + + if (btrfs_header_nritems(mid) <= orig_slot) { + path->nodes[level] = right; path->slots[level + 1] += 1; path->slots[level] = orig_slot - - btrfs_header_nritems(&mid->header); - btrfs_block_release(root, mid_buf); + btrfs_header_nritems(mid); + free_extent_buffer(mid); } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 0; } - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 1; @@ -811,10 +888,9 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; + struct extent_buffer *node; int i; u32 nritems; - u64 item_objectid; u64 blocknr; u64 search; u64 cluster_start; @@ -823,7 +899,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int direction = path->reada; struct radix_tree_root found; unsigned long gang[8]; - struct buffer_head *bh; + struct extent_buffer *eb; if (level == 0) return; @@ -831,18 +907,17 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[level]); + node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - bh = btrfs_find_tree_block(root, search); - if (bh) { - brelse(bh); + eb = btrfs_find_tree_block(root, search); + if (eb) { + free_extent_buffer(eb); return; } init_bit_radix(&found); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); blocknr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } @@ -886,8 +961,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct buffer_head *b; - struct btrfs_node *c; + struct extent_buffer *b; u64 blocknr; int slot; int ret; @@ -901,10 +975,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; - get_bh(b); + extent_buffer_get(b); while (b) { - c = btrfs_buffer_node(b); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); if (cow) { int wret; wret = btrfs_cow_block(trans, root, b, @@ -912,32 +985,30 @@ again: p->slots[level + 1], &b); if (wret) { - btrfs_block_release(root, b); + free_extent_buffer(b); return wret; } - c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(&c->header)) + if (level != btrfs_header_level(b)) WARN_ON(1); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) return -1; - ret = bin_search(c, key, &slot); - if (!btrfs_is_leaf(c)) { + ret = bin_search(b, key, level, &slot); + if (level != 0) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + if (ins_len > 0 && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; b = p->nodes[level]; - c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -947,22 +1018,19 @@ again: b = p->nodes[level]; if (!b) goto again; - c = btrfs_buffer_node(b); slot = p->slots[level]; - BUG_ON(btrfs_header_nritems(&c->header) == 1); + BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(c, slot); + blocknr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(c, slot)); - + b = read_tree_block(root, btrfs_node_blockptr(b, slot)); } else { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(root, l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, b) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(trans, root, key, p, ins_len); @@ -986,19 +1054,20 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_disk_key - *key, int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) { int i; int ret = 0; + struct extent_buffer *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { - struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = btrfs_buffer_node(path->nodes[i]); - btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + t = path->nodes[i]; + btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1014,18 +1083,16 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst_buf, struct - buffer_head *src_buf) + *root, struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { @@ -1035,17 +1102,21 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(dst_nritems), + btrfs_node_key_ptr_offset(0), + push_items * sizeof(struct btrfs_key_ptr)); + if (push_items < src_nritems) { - btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, - (src_nritems - push_items) * - sizeof(struct btrfs_key_ptr)); - } - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(push_items), + (src_nritems - push_items) * + sizeof(struct btrfs_key_ptr)); + } + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1058,24 +1129,22 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf) +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ @@ -1085,18 +1154,21 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); + memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), + btrfs_node_key_ptr_offset(0), + (dst_nritems) * + sizeof(struct btrfs_key_ptr)); - btrfs_memcpy(root, dst, dst->ptrs, - src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(src_nritems - push_items), + push_items * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1107,45 +1179,46 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *lower; - struct btrfs_node *c; - struct btrfs_disk_key *lower_key; + struct extent_buffer *lower; + struct extent_buffer *c; + struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); - if (IS_ERR(t)) - return PTR_ERR(t); - c = btrfs_buffer_node(t); - memset(c, 0, root->blocksize); - btrfs_set_header_nritems(&c->header, 1); - btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); - btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_owner(&c->header, root->root_key.objectid); - lower = btrfs_buffer_node(path->nodes[level-1]); - memcpy(c->header.fsid, root->fs_info->disk_super->fsid, - sizeof(c->header.fsid)); - if (btrfs_is_leaf(lower)) - lower_key = &((struct btrfs_leaf *)lower)->items[0].key; + c = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(root->node), 0); + if (IS_ERR(c)) + return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); + btrfs_set_header_nritems(c, 1); + btrfs_set_header_level(c, level); + btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_owner(c, root->root_key.objectid); + lower = path->nodes[level-1]; + + write_extent_buffer(c, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(c), + BTRFS_FSID_SIZE); + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); else - lower_key = &lower->ptrs[0].key; - btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); + btrfs_node_key(lower, &lower_key, 0); + btrfs_set_node_key(c, &lower_key, 0); + btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); - btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(c); /* the super has an extra ref to root->node */ - btrfs_block_release(root, root->node); - root->node = t; - get_bh(t); - path->nodes[level] = t; + free_extent_buffer(root->node); + root->node = c; + extent_buffer_get(c); + path->nodes[level] = c; path->slots[level] = 0; return 0; } @@ -1163,26 +1236,26 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct btrfs_node *lower; + struct extent_buffer *lower; int nritems; BUG_ON(!path->nodes[level]); - lower = btrfs_buffer_node(path->nodes[level]); - nritems = btrfs_header_nritems(&lower->header); + lower = path->nodes[level]; + nritems = btrfs_header_nritems(lower); if (slot > nritems) BUG(); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - btrfs_memmove(root, lower, lower->ptrs + slot + 1, - lower->ptrs + slot, + memmove_extent_buffer(lower, + btrfs_node_key_ptr_offset(slot + 1), + btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - btrfs_memcpy(root, lower, &lower->ptrs[slot].key, - key, sizeof(struct btrfs_disk_key)); + btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, blocknr); - btrfs_set_header_nritems(&lower->header, nritems + 1); - btrfs_mark_buffer_dirty(path->nodes[level]); + btrfs_set_header_nritems(lower, nritems + 1); + btrfs_mark_buffer_dirty(lower); check_node(root, path, level); return 0; } @@ -1199,69 +1272,73 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *c; - struct buffer_head *split_buffer; - struct btrfs_node *split; + struct extent_buffer *c; + struct extent_buffer *split; + struct btrfs_disk_key disk_key; int mid; int ret; int wret; u32 c_nritems; - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (t == root->node) { + c = path->nodes[level]; + if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } else { ret = push_nodes_for_insert(trans, root, path, level); - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (!ret && - btrfs_header_nritems(&c->header) < + c = path->nodes[level]; + if (!ret && btrfs_header_nritems(c) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; if (ret < 0) return ret; } - c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); - if (IS_ERR(split_buffer)) - return PTR_ERR(split_buffer); + c_nritems = btrfs_header_nritems(c); + split = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(c), 0); + if (IS_ERR(split)) + return PTR_ERR(split); + + btrfs_set_header_flags(split, btrfs_header_flags(c)); + btrfs_set_header_level(split, btrfs_header_level(c)); + btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_owner(split, root->root_key.objectid); + write_extent_buffer(split, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(split), + BTRFS_FSID_SIZE); - split = btrfs_buffer_node(split_buffer); - btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); - btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_owner(&split->header, root->root_key.objectid); - memcpy(split->header.fsid, root->fs_info->disk_super->fsid, - sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; - btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&split->header, c_nritems - mid); - btrfs_set_header_nritems(&c->header, mid); + + copy_extent_buffer(split, c, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(mid), + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_set_header_nritems(split, c_nritems - mid); + btrfs_set_header_nritems(c, mid); ret = 0; - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(split_buffer); - wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - bh_blocknr(split_buffer), path->slots[level + 1] + 1, + btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(split); + + btrfs_node_key(split, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(split), + path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; if (path->slots[level] >= mid) { path->slots[level] -= mid; - btrfs_block_release(root, t); - path->nodes[level] = split_buffer; + free_extent_buffer(c); + path->nodes[level] = split; path->slots[level + 1] += 1; } else { - btrfs_block_release(root, split_buffer); + free_extent_buffer(split); } return ret; } @@ -1271,16 +1348,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) +static int leaf_space_used(struct extent_buffer *l, int start, int nr) { int data_len; - int nritems = btrfs_header_nritems(&l->header); + int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; - data_len = btrfs_item_end(l->items + start); - data_len = data_len - btrfs_item_offset(l->items + end); + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; @@ -1291,10 +1368,17 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { - int nritems = btrfs_header_nritems(&leaf->header); - return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + int nritems = btrfs_header_nritems(leaf); + int ret; + ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + ret, BTRFS_LEAF_DATA_SIZE(root), + leaf_space_used(leaf, 0, nritems), nritems); + } + return ret; } /* @@ -1307,12 +1391,10 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *left_buf = path->nodes[0]; - struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); - struct btrfs_leaf *right; - struct buffer_head *right_buf; - struct buffer_head *upper; - struct btrfs_node *upper_node; + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + struct btrfs_disk_key disk_key; int slot; int i; int free_space; @@ -1321,6 +1403,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + u32 data_end; int ret; slot = path->slots[1]; @@ -1328,102 +1411,109 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - upper_node = btrfs_buffer_node(upper); - if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { + if (slot >= btrfs_header_nritems(upper) - 1) return 1; - } - right_buf = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); - right = btrfs_buffer_leaf(right_buf); + + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + /* cow and double check */ - ret = btrfs_cow_block(trans, root, right_buf, upper, - slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); if (ret) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - left_nritems = btrfs_header_nritems(&left->header); + left_nritems = btrfs_header_nritems(left); if (left_nritems == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + for (i = left_nritems - 1; i >= 1; i--) { - item = left->items + i; + item = btrfs_item_nr(left, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(left, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(left, item) + sizeof(*item); } + if (push_items == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + if (push_items == left_nritems) WARN_ON(1); - right_nritems = btrfs_header_nritems(&right->header); + /* push left to right */ - push_space = btrfs_item_end(left->items + left_nritems - push_items); + right_nritems = btrfs_header_nritems(right); + push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(root, left); + /* make room in the right data area */ - btrfs_memmove(root, right, btrfs_leaf_data(right) + - leaf_data_end(root, right) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - - leaf_data_end(root, right)); + data_end = leaf_data_end(root, right); + memmove_extent_buffer(right, + btrfs_leaf_data(right) + data_end - push_space, + btrfs_leaf_data(right) + data_end, + BTRFS_LEAF_DATA_SIZE(root) - data_end); + /* copy from the left data area */ - btrfs_memcpy(root, right, btrfs_leaf_data(right) + + copy_extent_buffer(right, left, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - btrfs_memmove(root, right, right->items + push_items, right->items, - right_nritems * sizeof(struct btrfs_item)); + + memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), + btrfs_item_nr_offset(0), + right_nritems * sizeof(struct btrfs_item)); + /* copy the items from left to right */ - btrfs_memcpy(root, right, right->items, left->items + - left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(right, left, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(left_nritems - push_items), + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; - btrfs_set_header_nritems(&right->header, right_nritems); + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } left_nritems -= push_items; - btrfs_set_header_nritems(&left->header, left_nritems); + btrfs_set_header_nritems(left, left_nritems); - btrfs_mark_buffer_dirty(left_buf); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, - &right->items[0].key, sizeof(struct btrfs_disk_key)); + btrfs_item_key(right, &disk_key, 0); + btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buf; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[1] += 1; } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } if (path->nodes[1]) check_node(root, path, 1); @@ -1436,10 +1526,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *right_buf = path->nodes[0]; - struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); - struct buffer_head *t; - struct btrfs_leaf *left; + struct btrfs_disk_key disk_key; + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; int slot; int i; int free_space; @@ -1447,119 +1536,128 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; + u32 right_nritems; int ret = 0; int wret; slot = path->slots[1]; - if (slot == 0) { + if (slot == 0) return 1; - } - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } - t = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); - left = btrfs_buffer_leaf(t); + + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], + slot - 1)); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } /* cow and double check */ - ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (btrfs_header_nritems(&right->header) == 0) { - btrfs_block_release(root, t); + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + free_extent_buffer(left); return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { - item = right->items + i; + for (i = 0; i < right_nritems - 1; i++) { + item = btrfs_item_nr(right, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(right, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(right, item) + sizeof(*item); } if (push_items == 0) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (push_items == btrfs_header_nritems(&right->header)) + if (push_items == btrfs_header_nritems(right)) WARN_ON(1); + /* push data from right to left */ - btrfs_memcpy(root, left, left->items + - btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(left, right, + btrfs_item_nr_offset(btrfs_header_nritems(left)), + btrfs_item_nr_offset(0), + push_items * sizeof(struct btrfs_item)); + push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(right->items + push_items -1); - btrfs_memcpy(root, left, btrfs_leaf_data(left) + + btrfs_item_offset_nr(right, push_items -1); + + copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), + btrfs_item_offset_nr(right, push_items - 1), push_space); - old_left_nritems = btrfs_header_nritems(&left->header); + old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u32 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(left->items + - old_left_nritems - 1))); + u32 ioff; + item = btrfs_item_nr(left, i); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_offset_nr(left, old_left_nritems - 1))); } - btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); + btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ - push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(root, right); - btrfs_memmove(root, right, btrfs_leaf_data(right) + - BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - btrfs_memmove(root, right, right->items, right->items + push_items, - (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct btrfs_item)); - btrfs_set_header_nritems(&right->header, - btrfs_header_nritems(&right->header) - - push_items); + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(push_items), + (btrfs_header_nritems(right) - push_items) * + sizeof(struct btrfs_item)); + + right_nritems = btrfs_header_nritems(right) - push_items; + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); + btrfs_item_key(right, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = t; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = left; path->slots[1] -= 1; } else { - btrfs_block_release(root, t); + free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1578,13 +1676,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size) { - struct buffer_head *l_buf; - struct btrfs_leaf *l; + struct extent_buffer *l; u32 nritems; int mid; int slot; - struct btrfs_leaf *right; - struct buffer_head *right_buffer; + struct extent_buffer *right; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1603,8 +1699,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) return wret; } - l_buf = path->nodes[0]; - l = btrfs_buffer_leaf(l_buf); + l = path->nodes[0]; /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1617,36 +1712,38 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } slot = path->slots[0]; - nritems = btrfs_header_nritems(&l->header); + nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; return ret; @@ -1659,15 +1756,15 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_LEAF_DATA_SIZE(root)) { if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, @@ -1681,61 +1778,74 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root double_split = 1; } } - btrfs_set_header_nritems(&right->header, nritems - mid); - data_copy_size = btrfs_item_end(l->items + mid) - - leaf_data_end(root, l); - btrfs_memcpy(root, right, right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - btrfs_memcpy(root, right, + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - data_copy_size, btrfs_leaf_data(l) + leaf_data_end(root, l), data_copy_size); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end(l->items + mid); + btrfs_item_end_nr(l, mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u32 ioff = btrfs_item_offset(right->items + i); - btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); } - btrfs_set_header_nritems(&l->header, mid); + btrfs_set_header_nritems(l, mid); ret = 0; - wret = insert_ptr(trans, root, path, &right->items[0].key, - bh_blocknr(right_buffer), path->slots[1] + 1, 1); + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_mark_buffer_dirty(right_buffer); - btrfs_mark_buffer_dirty(l_buf); + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + if (mid <= slot) { - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; } else - btrfs_block_release(root, right_buffer); + free_extent_buffer(right); + BUG_ON(path->slots[0] < 0); check_node(root, path, 1); + check_leaf(root, path, 0); if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; @@ -1744,8 +1854,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; check_node(root, path, 1); check_leaf(root, path, 0); @@ -1760,8 +1870,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data_start; @@ -1770,15 +1880,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); slot = path->slots[0]; - old_data_start = btrfs_item_offset(leaf->items + slot); - old_size = btrfs_item_size(leaf->items + slot); + old_data_start = btrfs_item_offset_nr(leaf, slot); + old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); size_diff = old_size - new_size; @@ -1790,32 +1899,38 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff + size_diff); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); } /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); - btrfs_set_item_size(leaf->items + slot, new_size); - btrfs_mark_buffer_dirty(leaf_buf); + + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, new_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } -int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u32 data_size) +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) { int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data; @@ -1823,16 +1938,17 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < data_size) + if (btrfs_leaf_free_space(root, leaf) < data_size) { + btrfs_print_leaf(root, leaf); BUG(); + } slot = path->slots[0]; - old_data = btrfs_item_end(leaf->items + slot); + old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); BUG_ON(slot >= nritems); @@ -1842,22 +1958,28 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } + /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); + data_end = old_data; - old_size = btrfs_item_size(leaf->items + slot); - btrfs_set_item_size(leaf->items + slot, old_size + data_size); - btrfs_mark_buffer_dirty(leaf_buf); + old_size = btrfs_item_size_nr(leaf, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, old_size + data_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } @@ -1866,15 +1988,16 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { + struct extent_buffer *leaf; + struct btrfs_item *item; int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1884,6 +2007,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root /* create a root if there isn't one */ if (!root->node) BUG(); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { return -EEXIST; @@ -1892,57 +2016,68 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { BUG(); } + slot = path->slots[0]; BUG_ON(slot < 0); + if (slot != nritems) { int i; - unsigned int old_data = btrfs_item_end(leaf->items + slot); + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } /* shift the items */ - btrfs_memmove(root, leaf, leaf->items + slot + 1, - leaf->items + slot, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } + /* setup the item for the new data */ - btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_item_offset(leaf->items + slot, data_end - data_size); - btrfs_set_item_size(leaf->items + slot, data_size); - btrfs_set_header_nritems(&leaf->header, nritems + 1); - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_set_item_key(leaf, &disk_key, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_offset(leaf, item, data_end - data_size); + btrfs_set_item_size(leaf, item, data_size); + btrfs_set_header_nritems(leaf, nritems + 1); + btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); out: return ret; @@ -1958,17 +2093,17 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root { int ret = 0; struct btrfs_path *path; - u8 *ptr; + struct extent_buffer *leaf; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], u8); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, data, data_size); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, data, ptr, data_size); + btrfs_mark_buffer_dirty(leaf); } btrfs_free_path(path); return ret; @@ -1984,30 +2119,30 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; - struct buffer_head *parent = path->nodes[level]; + struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = btrfs_buffer_node(parent); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(parent); if (slot != nritems -1) { - btrfs_memmove(root, node, node->ptrs + slot, - node->ptrs + slot + 1, + memmove_extent_buffer(parent, + btrfs_node_key_ptr_offset(slot), + btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; - btrfs_set_header_nritems(&node->header, nritems); + btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { - struct btrfs_header *header = btrfs_buffer_header(root->node); - BUG_ON(btrfs_header_level(header) != 1); + BUG_ON(btrfs_header_level(root->node) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(header, 0); + btrfs_set_header_level(root->node, 0); } else if (slot == 0) { - wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, - level + 1); + struct btrfs_disk_key disk_key; + + btrfs_node_key(parent, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); if (wret) ret = wret; } @@ -2023,59 +2158,67 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; int doff; int dsize; int ret = 0; int wret; u32 nritems; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; slot = path->slots[0]; - doff = btrfs_item_offset(leaf->items + slot); - dsize = btrfs_item_size(leaf->items + slot); - nritems = btrfs_header_nritems(&leaf->header); + doff = btrfs_item_offset_nr(leaf, slot); + dsize = btrfs_item_size_nr(leaf, slot); + nritems = btrfs_header_nritems(leaf); if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + dsize, btrfs_leaf_data(leaf) + data_end, doff - data_end); + for (i = slot + 1; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, ioff + dsize); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); } - btrfs_memmove(root, leaf, leaf->items + slot, - leaf->items + slot + 1, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * (nritems - slot - 1)); } - btrfs_set_header_nritems(&leaf->header, nritems - 1); + btrfs_set_header_nritems(leaf, nritems - 1); nritems--; + /* delete the leaf if we've emptied it */ if (nritems == 0) { - if (leaf_buf == root->node) { - btrfs_set_header_level(&leaf->header, 0); + if (leaf == root->node) { + btrfs_set_header_level(leaf, 0); } else { - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - bh_blocknr(leaf_buf), 1, 1); + extent_buffer_blocknr(leaf), + 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(leaf, &disk_key, 0); wret = fixup_low_keys(trans, root, path, - &leaf->items[0].key, 1); + &disk_key, 1); if (wret) ret = wret; } @@ -2087,34 +2230,40 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - get_bh(leaf_buf); + extent_buffer_get(leaf); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (path->nodes[0] == leaf_buf && - btrfs_header_nritems(&leaf->header)) { + + if (path->nodes[0] == leaf && + btrfs_header_nritems(leaf)) { wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } - if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = bh_blocknr(leaf_buf); - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + + if (btrfs_header_nritems(leaf) == 0) { + u64 blocknr = extent_buffer_blocknr(leaf); + + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; - btrfs_block_release(root, leaf_buf); + + free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf_buf); - btrfs_block_release(root, leaf_buf); + btrfs_mark_buffer_dirty(leaf); + free_extent_buffer(leaf); } } else { - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf); } } return ret; @@ -2130,25 +2279,27 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct buffer_head *c; - struct btrfs_node *c_node; - struct buffer_head *next = NULL; + struct extent_buffer *c; + struct extent_buffer *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; + slot = path->slots[level] + 1; c = path->nodes[level]; - c_node = btrfs_buffer_node(c); - if (slot >= btrfs_header_nritems(&c_node->header)) { + if (slot >= btrfs_header_nritems(c)) { level++; continue; } - blocknr = btrfs_node_blockptr(c_node, slot); + + blocknr = btrfs_node_blockptr(c, slot); if (next) - btrfs_block_release(root, next); + free_extent_buffer(next); + if (path->reada) reada_for_search(root, path, level, slot); + next = read_tree_block(root, blocknr); break; } @@ -2156,15 +2307,14 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) while(1) { level--; c = path->nodes[level]; - btrfs_block_release(root, c); + free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!level) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(next), 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 800a3499cc3..c4b82980685 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -20,10 +20,10 @@ #define __BTRFS__ #include -#include #include #include #include "bit-radix.h" +#include "extent_map.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -32,7 +32,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_BtRfS_M" +#define BTRFS_MAGIC "_B2RfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL @@ -78,41 +78,41 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; + u8 type; __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; + u8 type; u64 offset; } __attribute__ ((__packed__)); +#define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; - u8 fsid[16]; /* FS specific uuid */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; - __le16 nritems; + __le32 nritems; __le16 flags; u8 level; } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) -struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -128,7 +128,9 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le32 blocksize; + __le32 sectorsize; + __le32 nodesize; + __le32 leafsize; } __attribute__ ((__packed__)); /* @@ -138,7 +140,7 @@ struct btrfs_super_block { struct btrfs_item { struct btrfs_disk_key key; __le32 offset; - __le16 size; + __le32 size; } __attribute__ ((__packed__)); /* @@ -176,7 +178,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct buffer_head *nodes[BTRFS_MAX_LEVEL]; + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; int lowest_level; @@ -292,6 +294,7 @@ struct btrfs_block_group_cache { }; struct btrfs_fs_info { + u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; @@ -304,9 +307,8 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; - struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; - struct buffer_head *sb_buffer; + struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; struct mutex trans_mutex; @@ -325,8 +327,8 @@ struct btrfs_fs_info { * and for the extent tree extent_root root. */ struct btrfs_root { - struct buffer_head *node; - struct buffer_head *commit_root; + struct extent_buffer *node; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -336,7 +338,16 @@ struct btrfs_root { struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; - u32 blocksize; + + /* data allocations are done in sectorsize units */ + u32 sectorsize; + + /* node allocations are done in nodesize units */ + u32 nodesize; + + /* leaf allocations are done in leafsize units */ + u32 leafsize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -347,12 +358,6 @@ struct btrfs_root { char *name; }; -/* the lower bits in the key flags defines the item type */ -#define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_SHIFT 24 -#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ - BTRFS_KEY_TYPE_SHIFT) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -402,246 +407,253 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +/* some macros to generate set/get funcs for the struct fields. This + * assumes there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ +} + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb) \ +{ \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ +} -static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) -{ - return le64_to_cpu(bi->used); -} - -static inline void btrfs_set_block_group_used(struct - btrfs_block_group_item *bi, - u64 val) -{ - bi->used = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->generation); -} - -static inline void btrfs_set_inode_generation(struct btrfs_inode_item *i, - u64 val) -{ - i->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_size(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->size); -} - -static inline void btrfs_set_inode_size(struct btrfs_inode_item *i, u64 val) -{ - i->size = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_nblocks(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->nblocks); -} - -static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) -{ - i->nblocks = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->block_group); -} - -static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, - u64 val) -{ - i->block_group = cpu_to_le64(val); -} - -static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->nlink); -} - -static inline void btrfs_set_inode_nlink(struct btrfs_inode_item *i, u32 val) -{ - i->nlink = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_uid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->uid); -} - -static inline void btrfs_set_inode_uid(struct btrfs_inode_item *i, u32 val) -{ - i->uid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_gid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->gid); -} - -static inline void btrfs_set_inode_gid(struct btrfs_inode_item *i, u32 val) -{ - i->gid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_mode(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->mode); -} - -static inline void btrfs_set_inode_mode(struct btrfs_inode_item *i, u32 val) -{ - i->mode = cpu_to_le32(val); +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ } -static inline u32 btrfs_inode_rdev(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->rdev); -} +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); -static inline void btrfs_set_inode_rdev(struct btrfs_inode_item *i, u32 val) -{ - i->rdev = cpu_to_le32(val); -} +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); +BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, + compat_flags, 16); -static inline u16 btrfs_inode_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_flags(struct btrfs_inode_item *i, u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { - i->flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u16 btrfs_inode_compat_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->compat_flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, - u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) { - i->compat_flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) -{ - return le64_to_cpu(ts->sec); -} +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); -static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u64 val) -{ - ts->sec = cpu_to_le64(val); -} +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 32); -static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) -{ - return le32_to_cpu(ts->nsec); -} +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, + refs, 32); +BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, + owner, 32); -static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, - u32 val) -{ - ts->nsec = cpu_to_le32(val); -} +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); -static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(ei->refs); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } -static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) { - ei->refs = cpu_to_le32(val); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +static unsigned long btrfs_node_key_ptr_offset(int nr) { - return le64_to_cpu(ei->owner); + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; } -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +static void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - ei->owner = cpu_to_le64(val); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } - -static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le64_to_cpu(n->ptrs[nr].blockptr); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); -static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, - u64 val) +static inline unsigned long btrfs_item_nr_offset(int nr) { - n->ptrs[nr].blockptr = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; } -static inline u32 btrfs_item_offset(struct btrfs_item *item) +static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, + int nr) { - return le32_to_cpu(item->offset); + return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) { - item->offset = cpu_to_le32(val); + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); } -static inline u32 btrfs_item_end(struct btrfs_item *item) +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(item->offset) + le16_to_cpu(item->size); + return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_item_size(struct btrfs_item *item) +static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - return le16_to_cpu(item->size); + return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); } -static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - item->size = cpu_to_le16(val); + return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le16_to_cpu(d->flags); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - d->flags = cpu_to_le16(val); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) -{ - return d->type; -} +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); -static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - d->type = val; + read_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline u16 btrfs_dir_name_len(struct btrfs_dir_item *d) +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - return le16_to_cpu(d->name_len); + write_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline void btrfs_set_dir_name_len(struct btrfs_dir_item *d, u16 val) -{ - d->name_len = cpu_to_le16(val); -} +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); - cpu->flags = le32_to_cpu(disk->flags); + cpu->type = disk->type; cpu->objectid = le64_to_cpu(disk->objectid); } @@ -649,400 +661,167 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); - disk->flags = cpu_to_le32(cpu->flags); + disk->type = cpu->type; disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->objectid); -} - -static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, - u64 val) -{ - disk->objectid = cpu_to_le64(val); -} - -static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->offset); -} - -static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, - u64 val) -{ - disk->offset = cpu_to_le64(val); -} - -static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) -{ - return le32_to_cpu(disk->flags); -} - -static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, - u32 val) -{ - disk->flags = cpu_to_le32(val); -} - -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, - u32 val) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; - btrfs_set_disk_key_flags(key, flags); -} - -static inline u32 btrfs_key_type(struct btrfs_key *key) -{ - return key->flags >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) -{ - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; -} - -static inline u64 btrfs_header_blocknr(struct btrfs_header *h) -{ - return le64_to_cpu(h->blocknr); -} - -static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) -{ - h->blocknr = cpu_to_le64(blocknr); -} - -static inline u64 btrfs_header_generation(struct btrfs_header *h) +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - return le64_to_cpu(h->generation); + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_generation(struct btrfs_header *h, - u64 val) +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - h->generation = cpu_to_le64(val); + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline u64 btrfs_header_owner(struct btrfs_header *h) +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) { - return le64_to_cpu(h->owner); + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_owner(struct btrfs_header *h, - u64 val) -{ - h->owner = cpu_to_le64(val); -} - -static inline u16 btrfs_header_nritems(struct btrfs_header *h) -{ - return le16_to_cpu(h->nritems); -} - -static inline void btrfs_set_header_nritems(struct btrfs_header *h, u16 val) -{ - h->nritems = cpu_to_le16(val); -} - -static inline u16 btrfs_header_flags(struct btrfs_header *h) -{ - return le16_to_cpu(h->flags); -} - -static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) -{ - h->flags = cpu_to_le16(val); -} - -static inline int btrfs_header_level(struct btrfs_header *h) -{ - return h->level; -} - -static inline void btrfs_set_header_level(struct btrfs_header *h, int level) -{ - BUG_ON(level > BTRFS_MAX_LEVEL); - h->level = level; -} - -static inline int btrfs_is_leaf(struct btrfs_node *n) -{ - return (btrfs_header_level(&n->header) == 0); -} - -static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocknr); -} - -static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) -{ - item->blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->root_dirid); -} - -static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) -{ - item->root_dirid = cpu_to_le64(val); -} - -static inline u32 btrfs_root_refs(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->refs); -} - -static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) -{ - item->refs = cpu_to_le32(val); -} - -static inline u32 btrfs_root_flags(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->flags); -} - -static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) -{ - item->flags = cpu_to_le32(val); -} - -static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, - u64 val) -{ - item->blocks_used = cpu_to_le64(val); -} - -static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocks_used); -} - -static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, - u64 val) -{ - item->block_limit = cpu_to_le64(val); -} - -static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->block_limit); -} -static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) +static inline u8 btrfs_key_type(struct btrfs_key *key) { - return le64_to_cpu(s->blocknr); + return key->type; } -static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) +static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) { - s->blocknr = cpu_to_le64(val); + key->type = val; } -static inline u64 btrfs_super_generation(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->generation); -} +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 16); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); -static inline void btrfs_set_super_generation(struct btrfs_super_block *s, - u64 val) +static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) { - s->generation = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, fsid); + return (u8 *)ptr; } -static inline u64 btrfs_super_root(struct btrfs_super_block *s) +static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { - return le64_to_cpu(s->root); + unsigned long ptr = offsetof(struct btrfs_super_block, fsid); + return (u8 *)ptr; } -static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) { - s->root = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; } -static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) +static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) { - return le64_to_cpu(s->total_blocks); + return NULL; } -static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, - u64 val) +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) { - s->total_blocks = cpu_to_le64(val); + return NULL; } -static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) +static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) { - return le64_to_cpu(s->blocks_used); + return NULL; } -static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, - u64 val) +static inline int btrfs_is_leaf(struct extent_buffer *eb) { - s->blocks_used = cpu_to_le64(val); + return (btrfs_header_level(eb) == 0); } -static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) -{ - return le32_to_cpu(s->blocksize); -} +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); -static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u32 val) -{ - s->blocksize = cpu_to_le32(val); -} +BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); -static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->root_dir_objectid); -} +/* struct btrfs_super_block */ +BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, + total_blocks, 64); +BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, + blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, + leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); -static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 - val) +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { - s->root_dir_objectid = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items); } -static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) -{ - return (u8 *)l->items; -} +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) -{ - return e->type; -} -static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, - u8 val) -{ - e->type = val; -} - -static inline char *btrfs_file_extent_inline_start(struct +static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { - return (char *)(&e->disk_blocknr); + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return (unsigned long)(&((struct - btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; -} - -static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) -{ - struct btrfs_file_extent_item *fe = NULL; - return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); -} - -static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->disk_blocknr); + return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; } -static inline void btrfs_set_file_extent_disk_blocknr(struct - btrfs_file_extent_item - *e, u64 val) +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + struct btrfs_item *e) { - e->disk_blocknr = cpu_to_le64(val); + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + return btrfs_item_size(eb, e) - offset; } -static inline u64 btrfs_file_extent_generation(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->generation); -} - -static inline void btrfs_set_file_extent_generation(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_disk_num_blocks(struct - btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->disk_num_blocks); -} - -static inline void btrfs_set_file_extent_disk_num_blocks(struct - btrfs_file_extent_item - *e, u64 val) -{ - e->disk_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->offset); -} - -static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item - *e, u64 val) -{ - e->offset = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->num_blocks); -} - -static inline void btrfs_set_file_extent_num_blocks(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->num_blocks = cpu_to_le64(val); -} +BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, + disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, + disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, + num_blocks, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } -static inline void btrfs_check_bounds(void *vptr, size_t len, - void *vcontainer, size_t container_len) -{ - char *ptr = vptr; - char *container = vcontainer; - WARN_ON(ptr < container); - WARN_ON(ptr + len > container + container_len); -} - -static inline void btrfs_memcpy(struct btrfs_root *root, - void *dst_block, - void *dst, const void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memcpy(dst, src, nr); -} - -static inline void btrfs_memmove(struct btrfs_root *root, - void *dst_block, - void *dst, void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memmove(dst, src, nr); -} - static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { @@ -1063,7 +842,11 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ - btrfs_item_offset((leaf)->items + (slot)))) + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) /* mount option defines and helpers */ #define BTRFS_MOUNT_SUBVOL 0x000001 @@ -1084,7 +867,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, @@ -1092,7 +875,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); + struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -1106,10 +889,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret); +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1120,7 +903,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -1134,7 +917,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* root-item.c */ @@ -1179,9 +962,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item); +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); @@ -1224,8 +1007,6 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 end, int create); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 49db5fa7ced..6f19de41b87 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -33,7 +33,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle int ret; char *ptr; struct btrfs_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { @@ -49,11 +49,11 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - item = leaf->items + path->slots[0]; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); - BUG_ON(data_size > btrfs_item_size(item)); - ptr += btrfs_item_size(item) - data_size; + BUG_ON(data_size > btrfs_item_size(leaf, item)); + ptr += btrfs_item_size(leaf, item) - data_size; return (struct btrfs_dir_item *)ptr; } @@ -65,12 +65,13 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - char *name_ptr; + struct extent_buffer *leaf; + unsigned long name_ptr; struct btrfs_key key; + struct btrfs_disk_key disk_key; u32 data_size; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); @@ -85,14 +86,16 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); second_insert: /* FIXME, use some real flag for selecting the extra index */ @@ -110,13 +113,15 @@ second_insert: ret2 = PTR_ERR(dir_item); goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); if (ret) @@ -136,14 +141,15 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; + struct btrfs_key found_key; + struct extent_buffer *leaf; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); @@ -152,12 +158,13 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return NULL; path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -176,7 +183,6 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; @@ -193,21 +199,22 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, const char *name, int name_len) { struct btrfs_dir_item *dir_item; - char *name_ptr; + unsigned long name_ptr; u32 total_len; u32 cur = 0; u32 this_len; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); - total_len = btrfs_item_size(leaf->items + path->slots[0]); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { - this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); - name_ptr = (char *)(dir_item + 1); + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item); + name_ptr = (unsigned long)(dir_item + 1); - if (btrfs_dir_name_len(dir_item) == name_len && - memcmp(name_ptr, name, name_len) == 0) + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) return dir_item; cur += this_len; @@ -223,20 +230,23 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di) { - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u32 sub_item_len; u32 item_len; int ret = 0; - leaf = btrfs_buffer_leaf(path->nodes[0]); - sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); - item_len = btrfs_item_size(leaf->items + path->slots[0]); - if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + leaf = path->nodes[0]; + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + item_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); } else { - char *ptr = (char *)di; - char *start = btrfs_item_ptr(leaf, path->slots[0], char); - btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + /* MARKER */ + unsigned long ptr = (unsigned long)di; + unsigned long start; + + start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6d76f2ec2..0c1f90cbedb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,138 +23,132 @@ #include #include #include +#include // for block_sync_page #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -u64 bh_blocknr(struct buffer_head *bh) +#if 0 +static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { - return bh->b_blocknr; -} - -static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) -{ - struct btrfs_node *node = btrfs_buffer_node(buf); - if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)bh_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(&node->header)); + if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { + printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(buf)); return 1; } return 0; } +#endif -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - - - page = find_lock_page(mapping, index); - if (!page) - return NULL; - - if (!page_has_buffers(page)) - goto out_unlock; + struct inode *btree_inode = root->fs_info->btree_inode; + return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); +} - head = page_buffers(page); - bh = head; - do { - if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - } while (bh != head); -out_unlock: - unlock_page(page); - page_cache_release(page); - return ret; +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); } -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical) +struct extent_map *btree_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) { - if (logical == 0) { - bh->b_bdev = NULL; - bh->b_blocknr = 0; - set_buffer_mapped(bh); - } else { - map_bh(bh, root->fs_info->sb, logical); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret; + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; } - return 0; + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } + em->start = 0; + em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->block_start = 0; + em->block_end = em->end; + em->bdev = inode->i_sb->s_bdev; + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + em = NULL; + goto again; + } else if (ret) { + em = ERR_PTR(ret); + } +out: + return em; } -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - int err; - u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btree_get_extent, wbc); +} +int btree_readpage(struct file *file, struct page *page) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btree_get_extent); +} - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return NULL; +static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; - if (!page_has_buffers(page)) - create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); - head = page_buffers(page); - bh = head; - do { - if (!buffer_mapped(bh)) { - err = btrfs_map_bh_to_logical(root, bh, first_block); - BUG_ON(err); - } - if (bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - first_block++; - } while (bh != head); -out_unlock: - unlock_page(page); - if (ret) - touch_buffer(ret); - page_cache_release(page); + BUG_ON(page->private != 1); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } return ret; } -static int btree_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +static void btree_invalidatepage(struct page *page, unsigned long offset) { - int err; - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = btrfs_map_bh_to_logical(root, bh, iblock); - return err; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btree_releasepage(page, GFP_NOFS); } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { + return 0; +#if 0 u32 crc; crc = crc32c(0, data, len); memcpy(result, &crc, BTRFS_CRC32_SIZE); return 0; +#endif } -static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, +#if 0 +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { + return 0; char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -176,7 +170,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } return 0; } +#endif +#if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; @@ -195,87 +191,65 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } - -static int btree_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, btree_get_block); -} +#endif static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .releasepage = btree_releasepage, + .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) return 0; - if (buffer_uptodate(bh)) { - ret = 1; - goto done; - } - if (test_set_buffer_locked(bh)) { - ret = 1; - goto done; - } - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - } else { - unlock_buffer(bh); - ret = 1; - } -done: - brelse(bh); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 0); + free_extent_buffer(buf); return ret; } -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; - - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - goto uptodate; - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto fail; - } else { - unlock_buffer(bh); - } -uptodate: - if (!buffer_checked(bh)) { - csum_tree_block(root, bh, 1); - set_buffer_checked(bh); - } - if (check_tree_block(root, bh)) - goto fail; - return bh; -fail: - brelse(bh); - return NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); + return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { - WARN_ON(atomic_read(&buf->b_count) == 0); - lock_buffer(buf); - clear_buffer_dirty(buf); - unlock_buffer(buf); + struct inode *btree_inode = root->fs_info->btree_inode; + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + return 0; +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + buf); + return 0; +} + +int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); return 0; } @@ -287,7 +261,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->blocksize = blocksize; + root->sectorsize = blocksize; + root->nodesize = blocksize; + root->leafsize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -332,7 +308,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; u64 highest_inode; int ret = 0; @@ -361,11 +337,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, ret = -ENOENT; goto out; } - l = btrfs_buffer_leaf(path->nodes[0]); - memcpy(&root->root_item, - btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); @@ -406,21 +381,21 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, (unsigned long)root->root_key.objectid, root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_set_root_name(root, name, namelen); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_sysfs_add_root(root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root->name); kfree(root); return ERR_PTR(ret); @@ -471,6 +446,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + fs_info->btree_inode->i_mapping, + GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -493,10 +471,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; - disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - fs_info->disk_super = disk_super; - memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, + sizeof(fs_info->super_copy)); + + read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, + (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), + BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -530,9 +512,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) return tree_root; fail_tree_root: - btrfs_block_release(tree_root, tree_root->node); + free_extent_buffer(tree_root->node); fail_sb_buffer: - btrfs_block_release(tree_root, fs_info->sb_buffer); + free_extent_buffer(fs_info->sb_buffer); fail_iput: iput(fs_info->btree_inode); fail: @@ -546,31 +528,13 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct buffer_head *bh = root->fs_info->sb_buffer; - - lock_buffer(bh); - WARN_ON(atomic_read(&bh->b_count) < 1); - clear_buffer_dirty(bh); - csum_tree_block(root, bh, 0); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (root->fs_info->do_barriers) - ret = submit_bh(WRITE_BARRIER, bh); - else - ret = submit_bh(WRITE, bh); - if (ret == -EOPNOTSUPP) { - get_bh(bh); - lock_buffer(bh); - set_buffer_uptodate(bh); - root->fs_info->do_barriers = 0; - ret = submit_bh(WRITE, bh); - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - WARN_ON(1); - return -EIO; - } - return 0; + struct extent_buffer *super = root->fs_info->sb_buffer; + struct inode *btree_inode = root->fs_info->btree_inode; + + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, + super->start, super->len); + return ret; } int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -581,9 +545,9 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (root->inode) iput(root->inode); if (root->node) - brelse(root->node); + free_extent_buffer(root->node); if (root->commit_root) - brelse(root->commit_root); + free_extent_buffer(root->commit_root); if (root->name) kfree(root->name); kfree(root); @@ -629,12 +593,10 @@ int close_ctree(struct btrfs_root *root) mutex_unlock(&fs_info->fs_mutex); if (fs_info->extent_root->node) - btrfs_block_release(fs_info->extent_root, - fs_info->extent_root->node); + free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root->node) - btrfs_block_release(fs_info->tree_root, - fs_info->tree_root->node); - btrfs_block_release(root, fs_info->sb_buffer); + free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); @@ -645,25 +607,32 @@ int close_ctree(struct btrfs_root *root) return 0; } -void btrfs_mark_buffer_dirty(struct buffer_head *bh) +int btrfs_buffer_uptodate(struct extent_buffer *buf) +{ + struct inode *btree_inode = buf->pages[0]->mapping->host; + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + struct inode *btree_inode = buf->pages[0]->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + buf); +} - WARN_ON(!atomic_read(&bh->b_count)); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)bh->b_blocknr, + (unsigned long long)extent_buffer_blocknr(buf), transid, root->fs_info->generation); WARN_ON(1); } - mark_buffer_dirty(bh); -} - -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) -{ - brelse(buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fc6b520c6e0..70d9413c599 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,68 +19,35 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include - #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -enum btrfs_bh_state_bits { - BH_Checked = BH_PrivateStart, - BH_Defrag, - BH_DefragDone, -}; -BUFFER_FNS(Checked, checked); -BUFFER_FNS(Defrag, defrag); -BUFFER_FNS(DefragDone, defrag_done); - -static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) -{ - return (struct btrfs_node *)bh->b_data; -} - -static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) -{ - return (struct btrfs_leaf *)bh->b_data; -} - -static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) -{ - return &((struct btrfs_node *)bh->b_data)->header; -} - -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); int readahead_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); -u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, u64 block_start, u64 num_blocks); -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical); void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -void btrfs_mark_buffer_dirty(struct buffer_head *bh); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f261a8326cd..089c41cbca7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,7 +33,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct radix_tree_root *extent_radix; int slot; u64 i; @@ -56,7 +56,6 @@ static int cache_block_group(struct btrfs_root *root, path->reada = 2; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; - key.flags = 0; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -69,9 +68,9 @@ static int cache_block_group(struct btrfs_root *root, path->slots[0]--; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&leaf->header)) { + if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; @@ -82,7 +81,7 @@ static int cache_block_group(struct btrfs_root *root, } } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { if (key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; @@ -116,8 +115,7 @@ next: hole_size = block_group->key.objectid + block_group->key.offset - last; for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); + set_radix_bit(extent_radix, last + i); } } block_group->cached = 1; @@ -366,7 +364,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; u32 refs; @@ -375,7 +373,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, @@ -386,10 +383,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG(); } BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(item); - btrfs_set_extent_refs(item, refs + 1); + refs = btrfs_extent_refs(l, item); + btrfs_set_extent_refs(l, item, refs + 1); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); @@ -414,23 +411,25 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; path = btrfs_alloc_path(); key.objectid = blocknr; key.offset = num_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - if (ret != 0) + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", blocknr); BUG(); - l = btrfs_buffer_leaf(path->nodes[0]); + } + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(item); + *refs = btrfs_extent_refs(l, item); out: btrfs_free_path(path); return 0; @@ -439,16 +438,16 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, + extent_buffer_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { u64 blocknr; - struct btrfs_node *buf_node; - struct btrfs_leaf *buf_leaf; - struct btrfs_disk_key *key; + u32 nritems; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int leaf; @@ -458,31 +457,31 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - buf_node = btrfs_buffer_node(buf); - leaf = btrfs_is_leaf(buf_node); - buf_leaf = btrfs_buffer_leaf(buf); - for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + + leaf = btrfs_is_leaf(buf); + nritems = btrfs_header_nritems(buf); + for (i = 0; i < nritems; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi)); + btrfs_file_extent_disk_num_blocks(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); if (ret) { faili = i; @@ -496,22 +495,23 @@ fail: for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), 0); + btrfs_file_extent_disk_num_blocks(buf, + fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); err = btrfs_free_extent(trans, root, blocknr, 1, 0); BUG_ON(err); } @@ -527,16 +527,18 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int ret; int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_block_group_item *bi; + unsigned long bi; + struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; BUG_ON(ret); - bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_block_group_item); - memcpy(bi, &cache->item, sizeof(*bi)); - btrfs_mark_buffer_dirty(path->nodes[0]); + + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -768,11 +770,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; - btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_refs(&extent_item, 1); ins.offset = 1; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); + btrfs_set_stack_extent_owner(&extent_item, + extent_root->root_key.objectid); while(1) { ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, @@ -795,23 +797,20 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct btrfs_header *header; - struct buffer_head *bh; + struct extent_buffer *buf; if (!pending) { - bh = btrfs_find_tree_block(root, blocknr); - if (bh) { - if (buffer_uptodate(bh)) { + buf = btrfs_find_tree_block(root, blocknr); + if (buf) { + if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - transid) { - btrfs_block_release(root, bh); + if (btrfs_header_generation(buf) == transid) { + free_extent_buffer(buf); return 0; } } - btrfs_block_release(root, bh); + free_extent_buffer(buf); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { @@ -839,12 +838,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; int ret; struct btrfs_extent_item *ei; u32 refs; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; @@ -856,12 +855,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) return ret; BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); - BUG_ON(ei->refs == 0); - refs = btrfs_extent_refs(ei) - 1; - btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path->nodes[0]); + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs == 0); + refs -= 1; + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0) { u64 super_blocks_used, root_blocks_used; @@ -876,8 +879,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used - num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); @@ -984,7 +987,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; u64 orig_search_start = search_start; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; @@ -994,10 +997,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; WARN_ON(num_blocks < 1); - ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); + if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1034,8 +1037,9 @@ check_failed: path->slots[0]--; } - l = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + /* * a rare case, go back one key if we hit a block group item * instead of an extent item @@ -1055,9 +1059,9 @@ check_failed: } while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1075,7 +1079,7 @@ check_failed: goto check_pending; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1183,8 +1187,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_stack_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, @@ -1201,8 +1205,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); if (root == extent_root) { @@ -1241,13 +1245,13 @@ update_block: * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size) +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; - struct buffer_head *buf; + struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, empty_size, hint, (u64)-1, &ins, 0); @@ -1260,53 +1264,57 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } - WARN_ON(buffer_dirty(buf)); - set_buffer_uptodate(buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + /* set_buffer_checked(buf); set_buffer_defrag(buf); - set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + */ + /* FIXME!!!!!!!!!!!!!!!! + set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); + */ trans->blocks_used++; return buf; } static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *cur) + struct btrfs_root *root, struct extent_buffer *leaf) { - struct btrfs_disk_key *key; - struct btrfs_leaf *leaf; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; - BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); - leaf = btrfs_buffer_leaf(cur); - nritems = btrfs_header_nritems(&leaf->header); + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { u64 disk_blocknr; - key = &leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); if (disk_blocknr == 0) continue; ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), - 0); + btrfs_file_extent_disk_num_blocks(leaf, fi), 0); BUG_ON(ret); } return 0; } static void reada_walk_down(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; @@ -1314,7 +1322,7 @@ static void reada_walk_down(struct btrfs_root *root, int ret; u32 refs; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); @@ -1337,16 +1345,17 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), - 1, &refs); + ret = lookup_extent_ref(trans, root, + extent_buffer_blocknr(path->nodes[*level]), + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1360,21 +1369,20 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root cur = path->nodes[*level]; if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, btrfs_buffer_node(cur)); + reada_walk_down(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 0) { ret = drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { @@ -1384,8 +1392,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next)) { - brelse(next); + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, blocknr); mutex_lock(&root->fs_info->fs_mutex); @@ -1395,7 +1403,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - brelse(next); + free_extent_buffer(next); ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); @@ -1404,17 +1412,17 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), 1, 1); - btrfs_block_release(root, path->nodes[*level]); + extent_buffer_blocknr(path->nodes[*level]), 1, 1); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -1436,24 +1444,24 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { - struct btrfs_node *node; - node = btrfs_buffer_node(path->nodes[i]); + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; path->slots[i]++; *level = i; WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); memcpy(&root_item->drop_progress, - &node->ptrs[path->slots[i]].key, - sizeof(root_item->drop_progress)); + &disk_key, sizeof(disk_key)); root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), - 1, 1); + extent_buffer_blocknr(path->nodes[*level]), + 1, 1); BUG_ON(ret); - btrfs_block_release(root, path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -1480,15 +1488,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; path->slots[level] = 0; } else { struct btrfs_key key; - struct btrfs_disk_key *found_key; - struct btrfs_node *node; + struct btrfs_disk_key found_key; + struct extent_buffer *node; btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; @@ -1498,10 +1506,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto out; } - node = btrfs_buffer_node(path->nodes[level]); - found_key = &node->ptrs[path->slots[level]].key; - WARN_ON(memcmp(found_key, &root_item->drop_progress, - sizeof(*found_key))); + node = path->nodes[level]; + btrfs_node_key(node, &found_key, path->slots[level]); + WARN_ON(memcmp(&found_key, &root_item->drop_progress, + sizeof(found_key))); } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -1516,12 +1524,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - get_bh(root->node); + extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } @@ -1581,13 +1589,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; - struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 group_size_blocks; u64 used; @@ -1596,7 +1603,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1610,18 +1616,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = ret; break; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { err = -1; break; } - bi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { radix = &info->block_group_data_radix; cache->data = 1; } else { @@ -1629,7 +1635,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->data = 0; } - memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; @@ -1640,11 +1645,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); - used = btrfs_block_group_used(bi); + used = btrfs_block_group_used(&cache->item); if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0ab368e091f..55fefdfab84 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -26,16 +26,6 @@ struct tree_entry { struct rb_node rb_node; }; -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) - -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -223,7 +213,8 @@ int add_extent_mapping(struct extent_map_tree *tree, if (rb) prev = rb_entry(rb, struct extent_map, rb_node); if (prev && prev->end + 1 == em->start && - ((em->block_start == 0 && prev->block_start == 0) || + ((em->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || (em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; @@ -926,6 +917,40 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->state & bits) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + u64 find_lock_delalloc_range(struct extent_map_tree *tree, u64 start, u64 lock_start, u64 *end, u64 max_bytes) { @@ -1450,7 +1475,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, em = NULL; /* we've found a hole, just zero and go on */ - if (block_start == 0) { + if (block_start == EXTENT_MAP_HOLE) { zero_user_page(page, page_offset, iosize, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1593,7 +1618,8 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, free_extent_map(em); em = NULL; - if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -1630,7 +1656,6 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, nr++; } done: - WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -1827,8 +1852,623 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == 0) - return 0; + em->block_start == EXTENT_MAP_HOLE) + return 0; return (em->block_start + start - em->start) >> inode->i_blkbits; } + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 0; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) + goto fail; + eb->pages[i] = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_get_page(mapping, index); + if (!p) + goto fail; + eb->pages[i] = p; + } + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + if (eb->pages[i]) + page_cache_release(eb->pages[i]); + } + kfree(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + lock_page(page); + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + start = page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait) +{ + unsigned long i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(dst, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + + dst += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = (i << PAGE_CACHE_SHIFT) - offset; + } + + // kaddr = kmap_atomic(eb->pages[i], km); + kaddr = page_address(eb->pages[i]); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + // kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + ret = memcmp(ptr, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(kaddr + offset, src, cur); + // kunmap_atomic(kaddr, KM_USER0); + + src += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memset(kaddr + offset, c, cur); + // kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + page = dst->pages[i]; + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + // kunmap_atomic(kaddr, KM_USER1); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + // char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + // kunmap_atomic(src_kaddr, KM_USER1); + } + // kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + //kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + /* + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); + */ +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = dst_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - + dst_off_in_page)); + + copy_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = dst_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); +// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur - 1; + src_end -= cur - 1; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d913ce01248..430b997a70f 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,9 +3,20 @@ #include +#define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); @@ -51,10 +62,13 @@ struct extent_state { struct list_head list; }; +#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ + (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; - u64 end; /* inclusive */ - char *addr; + unsigned long len; + atomic_t refs; + int flags; struct page *pages[]; }; @@ -87,8 +101,12 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, @@ -106,4 +124,57 @@ int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) +{ + return eb->start / 4096; +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ea4dd4c5fce..226f6d028c3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,12 +34,12 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *item; struct btrfs_key file_key; struct btrfs_path *path; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = pos; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_insert_empty_item(trans, root, path, &file_key, @@ -47,15 +47,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, if (ret < 0) goto out; BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); - btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, num_blocks); - btrfs_set_file_extent_generation(item, trans->transid); - btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_file_extent_disk_blocknr(leaf, item, offset); + btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_generation(leaf, item, trans->transid); + btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); return ret; @@ -71,32 +72,30 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset = 0; int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; if (ret > 0) { ret = 1; if (path->slots[0] == 0) goto fail; path->slots[0]--; - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { goto fail; } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); csums_in_item /= BTRFS_CRC32_SIZE; if (csum_offset >= csums_in_item) { @@ -127,7 +126,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; @@ -138,12 +136,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { + return 0; +#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset; path = btrfs_alloc_path(); @@ -161,8 +161,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (ret == -EFBIG) { u32 item_size; /* we found one, but it isn't big enough yet */ - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { /* already at max size, make a new one */ goto insert; @@ -188,8 +188,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || @@ -197,10 +197,10 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, csum_offset >= MAX_CSUM_ITEMS(root)) { goto insert; } - if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; - diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); if (diff != BTRFS_CRC32_SIZE) goto insert; ret = btrfs_extend_item(trans, root, path, diff); @@ -220,21 +220,20 @@ insert: goto fail; } csum: - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); ret = 0; item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - btrfs_check_bounds(&item->csum, BTRFS_CRC32_SIZE, - path->nodes[0]->b_data, - root->fs_info->sb->s_blocksize); + /* FIXME!!!!!!!!!!!! */ ret = btrfs_csum_data(root, data, len, &item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; +#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, @@ -242,21 +241,21 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, u64 isize) { struct btrfs_key key; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; int ret; u32 new_item_size; u64 new_item_span; u64 blocks; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) >> + blocks = (new_item_span + root->sectorsize - 1) >> root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; - if (new_item_size >= btrfs_item_size(leaf->items + slot)) + if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; ret = btrfs_truncate_item(trans, root, path, new_item_size); BUG_ON(ret); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4cc459c943e..1734ca69555 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include @@ -88,7 +87,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, { struct btrfs_key key; struct btrfs_path *path; - char *ptr, *kaddr; + struct extent_buffer *leaf; + char *kaddr; + unsigned long ptr; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -102,7 +103,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); BUG_ON(size >= PAGE_CACHE_SIZE); datasize = btrfs_file_extent_calc_inline_size(size); @@ -113,18 +113,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, err = ret; goto fail; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); kaddr = kmap_atomic(page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, size); kunmap_atomic(kaddr, KM_USER0); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); return err; @@ -156,8 +155,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; down_read(&BTRFS_I(inode)->root->snap_sem); @@ -184,7 +183,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, if (inode->i_size < start_pos) { u64 last_pos_in_file; u64 hole_size; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; @@ -227,8 +226,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); + (pos + write_bytes + root->sectorsize -1) & + ~((u64)root->sectorsize - 1), &hint_block); if (err) goto failed; @@ -288,7 +287,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; struct btrfs_file_extent_item *extent; u64 extent_end = 0; @@ -327,10 +326,10 @@ next_slot: found_extent = 0; found_inline = 0; extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; ret = 0; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } @@ -344,17 +343,18 @@ next_slot: if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); + found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << + (btrfs_file_extent_num_blocks(leaf, extent) << inode->i_blkbits); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); found_inline = 1; extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + - slot); + btrfs_file_extent_inline_len(leaf, item); } } else { extent_end = search_start; @@ -365,8 +365,7 @@ next_slot: search_start >= extent_end) { int nextret; u32 nritems; - nritems = btrfs_header_nritems( - btrfs_buffer_header(path->nodes[0])); + nritems = btrfs_header_nritems(leaf); if (slot >= nritems - 1) { nextret = btrfs_next_leaf(root, path); if (nextret) @@ -380,7 +379,7 @@ next_slot: /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; @@ -388,10 +387,13 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf,extent); u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); + btrfs_file_extent_disk_num_blocks(leaf, + extent); + read_extent_buffer(leaf, &old, + (unsigned long)extent, + sizeof(old)); if (disk_blocknr != 0) { ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, disk_num_blocks); @@ -406,20 +408,24 @@ next_slot: u64 new_num; u64 old_num; keep = 1; - WARN_ON(start & (root->blocksize - 1)); + WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); + old_num = btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { + btrfs_file_extent_disk_blocknr(leaf, + extent); + if (btrfs_file_extent_disk_blocknr(leaf, + extent)) { inode->i_blocks -= (old_num - new_num) << 3; } - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, + extent, new_num); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); } @@ -431,13 +437,17 @@ next_slot: u64 extent_num_blocks = 0; if (found_extent) { disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + btrfs_file_extent_disk_num_blocks(leaf, + extent); extent_num_blocks = - btrfs_file_extent_num_blocks(extent); + btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ @@ -464,42 +474,37 @@ next_slot: struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + leaf = path->nodes[0]; if (ret) { - btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); + btrfs_print_leaf(root, leaf); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); - - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + write_extent_buffer(leaf, &old, + (unsigned long)extent, sizeof(old)); + + btrfs_set_file_extent_offset(leaf, extent, + le64_to_cpu(old.offset) + ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < + WARN_ON(le64_to_cpu(old.num_blocks) < (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, extent, (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_type(extent, + btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { + if (le64_to_cpu(old.disk_blocknr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + btrfs_file_extent_num_blocks(leaf, + extent) << 3; } ret = 0; goto out; @@ -529,8 +534,8 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b7834617753..35d2608f891 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,24 +20,18 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item) +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid) { - struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_insert_item(trans, root, &key, inode_item, - sizeof(*inode_item)); - btrfs_release_path(root, path); - btrfs_free_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_inode_item)); if (ret == 0 && objectid > root->highest_inode) root->highest_inode = objectid; return ret; @@ -51,15 +45,15 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; int ret; int slot; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_key found_key; ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && location->offset == (u64)-1 && path->slots[0] != 0) { slot = path->slots[0] - 1; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid == location->objectid && btrfs_key_type(&found_key) == btrfs_key_type(location)) { path->slots[0]--; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 40547086625..ab74977adf5 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -24,8 +24,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; + struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); @@ -39,8 +40,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) BUG_ON(ret == 0); if (path->slots[0] > 0) { slot = path->slots[0] - 1; - l = btrfs_buffer_leaf(path->nodes[0]); - *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + *objectid = found_key.objectid; } else { *objectid = BTRFS_FIRST_FREE_OBJECTID; } @@ -64,13 +66,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int slot = 0; u64 last_ino = 0; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); - search_key.flags = 0; search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; @@ -86,9 +87,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -103,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino : search_start; goto found; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start) { if (start_found) { if (last_ino < search_start) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b03d40a907c..fbe2836364e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -159,10 +159,8 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - char csum[BTRFS_CRC32_SIZE]; size_t offset = start - (page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; u64 private; @@ -173,11 +171,15 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) if (ret) { goto zeroit; } + /* + struct btrfs_root *root = BTRFS_I(inode)->root; + char csum[BTRFS_CRC32_SIZE]; ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); BUG_ON(ret); if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { goto zeroit; } + */ kunmap_atomic(kaddr, KM_IRQ0); return 0; @@ -192,7 +194,9 @@ zeroit: void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; + struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; + struct btrfs_inode_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -205,29 +209,37 @@ void btrfs_read_locked_inode(struct inode *inode) memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { + if (ret) goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(leaf, inode_item); + inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + inode->i_uid = btrfs_inode_uid(leaf, inode_item); + inode->i_gid = btrfs_inode_gid(leaf, inode_item); + inode->i_size = btrfs_inode_size(leaf, inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode->i_generation = btrfs_inode_generation(leaf, inode_item); inode->i_rdev = 0; - rdev = btrfs_inode_rdev(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); + rdev = btrfs_inode_rdev(leaf, inode_item); + + alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -267,24 +279,35 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct btrfs_inode_item *item, +static void fill_inode_item(struct extent_buffer *leaf, + struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_rdev(item, inode->i_rdev); - btrfs_set_inode_block_group(item, + btrfs_set_inode_uid(leaf, item, inode->i_uid); + btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_nsec); + + btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group->key.objectid); } @@ -294,6 +317,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, { struct btrfs_inode_item *inode_item; struct btrfs_path *path; + struct extent_buffer *leaf; int ret; path = btrfs_alloc_path(); @@ -306,12 +330,12 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); + fill_inode_item(leaf, inode_item, inode); + btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: @@ -330,8 +354,9 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret = 0; - u64 objectid; + struct extent_buffer *leaf; struct btrfs_dir_item *di; + struct btrfs_key key; path = btrfs_alloc_path(); if (!path) { @@ -349,14 +374,15 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - objectid = btrfs_disk_key_objectid(&di->location); + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); + key.objectid, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); goto err; @@ -391,12 +417,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + return ret; } @@ -411,7 +440,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_key found_key; int found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; char *goodnames = ".."; unsigned long nr; @@ -419,10 +448,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; while(1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { @@ -435,9 +465,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino) { err = -ENOENT; @@ -513,9 +542,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; u32 found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; @@ -527,10 +556,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; + while(1) { btrfs_init_path(path); fi = NULL; @@ -542,27 +573,28 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + if (found_key.objectid != inode->i_ino) break; + if (found_type != BTRFS_CSUM_ITEM_KEY && found_type != BTRFS_DIR_ITEM_KEY && found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - item_end = btrfs_disk_key_offset(found_key); + item_end = found_key.offset; if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; + item_end += + btrfs_file_extent_num_blocks(leaf, fi) << + inode->i_blkbits; } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -583,7 +615,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, found_type); continue; } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) + if (found_key.offset >= inode->i_size) del_item = 1; else del_item = 0; @@ -591,30 +623,31 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != + btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); if (!del_item) { u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); + btrfs_file_extent_num_blocks(leaf, fi); extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; + found_key.offset + root->sectorsize - 1; extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, + btrfs_set_file_extent_num_blocks(leaf, fi, extent_num_blocks); num_dec = (orig_num_blocks - extent_num_blocks) << 3; if (extent_start != 0) { inode->i_blocks -= num_dec; } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); + btrfs_file_extent_disk_num_blocks(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; + num_dec = btrfs_file_extent_num_blocks(leaf, + fi) << 3; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -725,7 +758,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; @@ -771,9 +804,11 @@ void btrfs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) { goto no_delete; } + inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); if (ret) @@ -782,6 +817,7 @@ void btrfs_delete_inode(struct inode *inode) if (ret) goto no_delete_lock; nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -819,7 +855,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = 0; goto out; } - btrfs_disk_key_to_cpu(location, &di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -856,7 +892,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); - location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; @@ -908,11 +943,14 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) return ERR_PTR(ret); + inode = NULL; if (location.objectid) { ret = fixup_tree_root_location(root, &location, &sub_root, @@ -952,10 +990,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; int advance; unsigned char d_type; @@ -964,15 +1003,19 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 di_total; u32 di_len; int key_type = BTRFS_DIR_INDEX_KEY; + char tmp_name[32]; + char *name_ptr; + int name_len; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; + path = btrfs_alloc_path(); path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -980,16 +1023,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } else { slot++; @@ -997,28 +1040,48 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != key_type) + if (btrfs_key_type(&found_key) != key_type) break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + if (found_key.offset < filp->f_pos) continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); + + filp->f_pos = found_key.offset; advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); + di_total = btrfs_item_size(leaf, item); while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), + struct btrfs_key location; + + name_len = btrfs_dir_name_len(leaf, di); + if (name_len < 32) { + name_ptr = tmp_name; + } else { + name_ptr = kmalloc(name_len, GFP_NOFS); + BUG_ON(!name_ptr); + } + read_extent_buffer(leaf, name_ptr, + (unsigned long)(di + 1), name_len); + + d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + btrfs_dir_item_key_to_cpu(leaf, di, &location); + + over = filldir(dirent, name_ptr, name_len, + found_key.offset, + location.objectid, d_type); + + if (name_ptr != tmp_name) + kfree(name_ptr); + if (over) goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -1075,11 +1138,15 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int mode) { struct inode *inode; - struct btrfs_inode_item inode_item; + struct btrfs_inode_item *inode_item; struct btrfs_key *location; + struct btrfs_path *path; int ret; int owner; + path = btrfs_alloc_path(); + BUG_ON(!path); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1095,24 +1162,32 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto fail; + inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + fill_inode_item(path->nodes[0], inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + location = &BTRFS_I(inode)->location; location->objectid = objectid; - location->flags = 0; location->offset = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - return ERR_PTR(ret); insert_inode_hash(inode); return inode; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } static inline u8 btrfs_inode_type(struct inode *inode) @@ -1127,8 +1202,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; struct inode *parent_inode; + key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; @@ -1285,14 +1360,18 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); err = btrfs_add_nondir(trans, dentry, inode); + if (err) drop_inode = 1; + dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); + if (err) drop_inode = 1; @@ -1321,13 +1400,13 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, key.objectid = objectid; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, BTRFS_FT_DIR); if (ret) goto error; + key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, BTRFS_FT_DIR); @@ -1350,6 +1429,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -1367,6 +1447,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(inode); goto out_fail; } + drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; @@ -1380,9 +1461,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; + err = btrfs_add_link(trans, dentry, inode); if (err) goto out_fail; + d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; @@ -1392,6 +1475,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; btrfs_end_transaction(trans, root); + out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) @@ -1415,8 +1499,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; + struct extent_buffer *leaf; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_trans_handle *trans = NULL; @@ -1436,8 +1520,8 @@ again: err = -ENOMEM; goto out; } - em->start = 0; - em->end = 0; + em->start = EXTENT_MAP_HOLE; + em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1453,25 +1537,27 @@ again: path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); + + blocknr = btrfs_file_extent_disk_blocknr(leaf, item); + blocknr += btrfs_file_extent_offset(leaf, item); /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { goto not_found; } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + (btrfs_file_extent_num_blocks(leaf, item) << + inode->i_blkbits); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1484,28 +1570,29 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(item) == 0) { + if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { em->start = extent_start; em->end = extent_end - 1; - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; goto insert; } em->block_start = blocknr << inode->i_blkbits; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(item) << + (btrfs_file_extent_num_blocks(leaf, item) << inode->i_blkbits) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; + unsigned long ptr; char *map; u32 size; - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = extent_start | ((u64)root->blocksize - 1); + size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, + path->slots[0])); + + extent_end = extent_start | ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1517,18 +1604,21 @@ again: } goto not_found_em; } + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; em->start = extent_start; em->end = extent_end; + if (!page) { goto insert; } + ptr = btrfs_file_extent_inline_start(item); map = kmap(page); - memcpy(map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, size); memset(map + page_offset + size, 0, - root->blocksize - (page_offset + size)); + root->sectorsize - (page_offset + size)); flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1542,8 +1632,8 @@ not_found: em->start = start; em->end = end; not_found_em: - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { @@ -1712,6 +1802,7 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1731,8 +1822,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_root *new_root; struct inode *inode; struct inode *dir; @@ -1746,34 +1836,37 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0, 0); - if (IS_ERR(subvol)) - return PTR_ERR(subvol); - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - btrfs_mark_buffer_dirty(subvol); + leaf = btrfs_alloc_free_block(trans, root, 0, 0); + if (IS_ERR(leaf)) + return PTR_ERR(leaf); + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, root->root_key.objectid); + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_blocks_used(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; - brelse(subvol); - subvol = NULL; + + free_extent_buffer(leaf); + leaf = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); @@ -1784,7 +1877,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); @@ -1845,7 +1937,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item new_root_item; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret; int err; u64 objectid; @@ -1876,10 +1968,11 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + btrfs_set_root_blocknr(&new_root_item, + extent_buffer_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); @@ -1904,8 +1997,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); + if (err && !ret) ret = err; + mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -1986,7 +2081,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) if (copy_from_user(&vol_args, arg, sizeof(vol_args))) return -EFAULT; - + namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; @@ -2164,8 +2259,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); path = btrfs_alloc_path(); if (!path) { @@ -2177,9 +2274,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; + struct btrfs_key old_parent_key; di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, "..", 2, -1); if (IS_ERR(di)) { @@ -2190,7 +2288,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = -ENOENT; goto out_fail; } - old_parent_oid = btrfs_disk_key_objectid(&di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &old_parent_key); ret = btrfs_del_item(trans, root, path); if (ret) { goto out_fail; @@ -2199,7 +2297,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, di = btrfs_lookup_dir_index_item(trans, root, path, old_inode->i_ino, - old_parent_oid, + old_parent_key.objectid, "..", 2, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2257,8 +2355,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, u64 objectid; int name_len; int datasize; - char *ptr; + unsigned long ptr; struct btrfs_file_extent_item *ei; + struct extent_buffer *leaf; unsigned long nr; name_len = strlen(symname) + 1; @@ -2302,7 +2401,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, BUG_ON(!path); key.objectid = inode->i_ino; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, @@ -2311,16 +2409,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; goto out_unlock; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, symname, ptr, name_len); + btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bafa78603bd..a825ce078a5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,10 +20,10 @@ #include "disk-io.h" #include "print-tree.h" -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 nr = btrfs_header_nritems(&l->header); + u32 nr = btrfs_header_nritems(l); struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; @@ -31,119 +31,113 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(&l->header), nr, + (unsigned long long)btrfs_header_blocknr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = l->items + i; - type = btrfs_disk_key_type(&item->key); + item = btrfs_item_nr(l, i); + btrfs_item_key_to_cpu(l, &key, i); + type = btrfs_key_type(&key); printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - (unsigned long long)btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), - (unsigned long long)btrfs_disk_key_offset(&item->key), - btrfs_item_offset(item), - btrfs_item_size(item)); + (unsigned long long)key.objectid, type, + (unsigned long long)key.offset, + btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(ii), - (unsigned long long)btrfs_inode_size(ii), - btrfs_inode_mode(ii)); + (unsigned long long)btrfs_inode_generation(l, ii), + (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(l, di, &found_key); printk("\t\tdir oid %llu flags %u type %u\n", - (unsigned long long)btrfs_disk_key_objectid( - &di->location), - btrfs_dir_flags(di), - btrfs_dir_type(di)); - printk("\t\tname %.*s\n", - btrfs_dir_name_len(di),(char *)(di + 1)); + (unsigned long long)found_key.objectid, + btrfs_dir_flags(l, di), + btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_root_blocknr(ri), - btrfs_root_refs(ri)); + (unsigned long long)btrfs_disk_root_blocknr(l, ri), + btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printk("\t\textent data refs %u\n", - btrfs_extent_refs(ei)); + btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l->items + i)); + btrfs_file_extent_inline_len(l, item)); break; } printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); printk("\t\textent data offset %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_offset(fi), - (unsigned long long)btrfs_file_extent_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_offset(l, fi), + (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_block_group_used(bi)); - break; - case BTRFS_STRING_ITEM_KEY: - printk("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); + (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; }; } } -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { int i; u32 nr; - struct btrfs_node *c; + struct btrfs_key key; - if (!t) + if (!c) return; - c = btrfs_buffer_node(t); - nr = btrfs_header_nritems(&c->header); + nr = btrfs_header_nritems(c); if (btrfs_is_leaf(c)) { - btrfs_print_leaf(root, (struct btrfs_leaf *)c); + btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(&c->header), - btrfs_header_level(&c->header), nr, + (unsigned long long)btrfs_header_blocknr(c), + btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { + btrfs_node_key_to_cpu(c, &key, i); printk("\tkey %d (%llu %u %llu) block %llu\n", i, - (unsigned long long)c->ptrs[i].key.objectid, - c->ptrs[i].key.flags, - (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)key.objectid, + key.type, + (unsigned long long)key.offset, (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { - struct buffer_head *next_buf = read_tree_block(root, + struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && - btrfs_header_level(&c->header) != 1) + btrfs_header_level(c) != 1) BUG(); - if (btrfs_header_level(&next->header) != - btrfs_header_level(&c->header) - 1) + if (btrfs_header_level(next) != + btrfs_header_level(c) - 1) BUG(); - btrfs_print_tree(root, next_buf); - btrfs_block_release(root, next_buf); + btrfs_print_tree(root, next); + free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 205cd03601a..da75efe534d 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -18,6 +18,6 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3b5926dfbeb..88bcdd33f56 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -26,12 +26,13 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, { struct btrfs_path *path; struct btrfs_key search_key; - struct btrfs_leaf *l; + struct btrfs_key found_key; + struct extent_buffer *l; int ret; int slot; search_key.objectid = objectid; - search_key.flags = (u32)-1; + search_key.type = (u8)-1; search_key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -39,17 +40,19 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; + BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; - if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.objectid != objectid) { ret = 1; goto out; } - memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), - sizeof(*item)); - btrfs_disk_key_to_cpu(key, &l->items[slot].key); + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_release_path(root, path); @@ -62,10 +65,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item) { struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; int ret; int slot; - struct btrfs_root_item *update_item; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -73,10 +76,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); - btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + ptr = btrfs_item_ptr_offset(l, slot); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); @@ -103,11 +106,10 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; path = btrfs_alloc_path(); @@ -117,19 +119,19 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, if (ret < 0) goto err; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (slot >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } - item = leaf->items + slot; - btrfs_disk_key_to_cpu(&key, &item->key); + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; @@ -140,7 +142,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, break; ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); - if (btrfs_root_refs(ri) != 0) + if (btrfs_disk_root_refs(leaf, ri) != 0) goto next; dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); @@ -170,6 +172,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; u32 refs; struct btrfs_root_item *ri; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); @@ -177,10 +180,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_root_item); + leaf = path->nodes[0]; + ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); - refs = btrfs_root_refs(ri); + refs = btrfs_disk_root_refs(leaf, ri); BUG_ON(refs != 0); ret = btrfs_del_item(trans, root, path); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66a01cbbbea..39a1435c68f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,7 +41,7 @@ #include "ioctl.h" #include "print-tree.h" -#define BTRFS_SUPER_MAGIC 0x9123682E +#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; @@ -115,13 +115,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -EIO; } sb->s_fs_info = tree_root; - disk_super = tree_root->fs_info->disk_super; + disk_super = &tree_root->fs_info->super_copy; inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->location.flags = 0; bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); @@ -281,6 +280,7 @@ error_s: error_bdev: close_bdev_excl(bdev); error: +printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,6 +295,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); +printk("btrfs_get_sb returns %d\n", ret); return ret; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 2058783373e..9654e90eec8 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -31,31 +31,31 @@ static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_blocks_used(&root->root_item)); + (unsigned long long)btrfs_root_used(&root->root_item)); } static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_block_limit(&root->root_item)); + (unsigned long long)btrfs_root_limit(&root->root_item)); } static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); + (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); + (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocksize(fs->disk_super)); + (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); } /* this is for root attrs (subvols/snapshots) */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 087074db0bd..750f35a37aa 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -66,7 +67,9 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - init_bit_radix(&cur_trans->dirty_pages); + extent_map_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); } else { cur_trans->num_writers++; cur_trans->num_joined++; @@ -88,7 +91,7 @@ static int record_root_in_trans(struct btrfs_root *root) (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); root->commit_root = root->node; - get_bh(root->node); + extent_buffer_get(root->node); } else { WARN_ON(1); } @@ -144,29 +147,30 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[16]; int ret; - int i; int err; int werr = 0; + struct extent_map_tree *dirty_pages; struct page *page; - struct radix_tree_root *dirty_pages; struct inode *btree_inode = root->fs_info->btree_inode; + u64 start; + u64 end; + unsigned long index; if (!trans || !trans->transaction) { return filemap_write_and_wait(btree_inode->i_mapping); } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, - 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - /* FIXME EIO */ - clear_radix_bit(dirty_pages, gang[i]); - page = find_lock_page(btree_inode->i_mapping, - gang[i]); + clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (index + 1) << PAGE_CACHE_SHIFT; + page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; if (PageWriteback(page)) { @@ -202,10 +206,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == bh_blocknr(extent_root->node)) + if (old_extent_block == + extent_buffer_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - bh_blocknr(extent_root->node)); + extent_buffer_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -279,9 +284,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(bh_blocknr(root->node) != + WARN_ON(extent_buffer_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); - brelse(root->commit_root); + free_extent_buffer(root->commit_root); root->commit_root = NULL; /* make sure to update the root on disk @@ -310,7 +315,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - bh_blocknr(root->node)); + extent_buffer_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -389,10 +394,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); + // btrfs_defrag_root(root, 1); } } - btrfs_defrag_root(info->extent_root, 1); + // btrfs_defrag_root(info->extent_root, 1); return err; } @@ -414,7 +419,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + num_blocks = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -441,11 +446,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); - blocks_used = btrfs_root_blocks_used(&root->root_item); + num_blocks -= btrfs_root_used(&dirty->root->root_item); + blocks_used = btrfs_root_used(&root->root_item); if (num_blocks) { record_root_in_trans(root); - btrfs_set_root_blocks_used(&root->root_item, + btrfs_set_root_used(&root->root_item, blocks_used - num_blocks); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -553,9 +558,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - bh_blocknr(root->fs_info->tree_root->node)); - memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + extent_buffer_blocknr(root->fs_info->tree_root->node)); + + write_extent_buffer(root->fs_info->sb_buffer, + &root->fs_info->super_copy, 0, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, &pinned_copy); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4bc328cbb24..ae39fcfc169 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,7 +28,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct radix_tree_root dirty_pages; + struct extent_map_tree dirty_pages; unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; @@ -83,5 +83,6 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); - +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 420597127ed..daf019afa0a 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,14 +23,14 @@ #include "transaction.h" static void reada_defrag(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; u64 blocknr; int ret; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = readahead_tree_block(root, blocknr); @@ -44,8 +44,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_path *path, int *level, int cache_only, u64 *last_ret) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret = 0; int is_extent = 0; @@ -62,13 +62,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, cur = path->nodes[*level]; if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, btrfs_buffer_node(cur)); + reada_defrag(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 1) { @@ -80,14 +80,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next) || - buffer_locked(next) || !buffer_defrag(next)) { - brelse(next); + /* FIXME, test for defrag */ + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); path->slots[*level]++; continue; } @@ -106,16 +105,18 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); +#if 0 clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); +#endif + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; WARN_ON(ret); @@ -129,24 +130,25 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, { int i; int slot; - struct btrfs_node *node; + struct extent_buffer *node; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { path->slots[i]++; *level = i; - node = btrfs_buffer_node(path->nodes[i]); + node = path->nodes[i]; WARN_ON(i == 0); - btrfs_disk_key_to_cpu(&root->defrag_progress, - &node->ptrs[path->slots[i]].key); + btrfs_node_key_to_cpu(node, &root->defrag_progress, + path->slots[i]); root->defrag_level = i; return 0; } else { + /* clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); + */ + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -158,7 +160,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret = 0; int wret; int level; @@ -172,17 +174,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (level == 0) { goto out; } if (root->defrag_progress.objectid == 0) { - get_bh(root->node); + extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); ret = btrfs_realloc_node(trans, root, root->node, cache_only, @@ -200,12 +203,15 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (is_extent) btrfs_extent_post_op(trans, root); + if (wret < 0) { ret = wret; goto out; } + while(level > 0 && !path->nodes[level]) level--; + if (!path->nodes[level]) { ret = 0; goto out; @@ -230,7 +236,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } -- cgit v1.2.3-70-g09d2 From 6da6abae027e2dbc59bca5f4168b0760f25068c7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:15:09 -0500 Subject: Btrfs: Back port to 2.6.18-el kernels Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 ++++ fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_map.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/file.c | 15 +++-- fs/btrfs/inode.c | 21 +++++-- fs/btrfs/super.c | 1 + fs/btrfs/sysfs.c | 23 ++++++- fs/btrfs/transaction.c | 8 +++ fs/btrfs/transaction.h | 5 ++ 9 files changed, 239 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 32b24460ec8..2559f5d5d00 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,7 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include #include #include #include @@ -334,7 +335,11 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct work_struct trans_work; +#else struct delayed_work trans_work; +#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; @@ -916,6 +921,14 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) +static inline struct dentry *fdentry(struct file *file) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + return file->f_dentry; +#else + return file->f_path.dentry; +#endif +} + /* extent-tree.c */ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7213012c27d..ebb2db624fd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -593,8 +593,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; - +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); +#else INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); +#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8326a18413f..0d1e59a86e4 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1585,8 +1585,18 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, static int submit_one_bio(int rw, struct bio *bio) { + u64 maxsector; int ret = 0; + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -1678,8 +1688,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, while (cur <= end) { if (cur >= last_byte) { + char *userpage; iosize = PAGE_CACHE_SIZE - page_offset; - zero_user_page(page, page_offset, iosize, KM_USER0); + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1707,7 +1721,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { - zero_user_page(page, page_offset, iosize, KM_USER0); + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1804,9 +1823,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } if (page->index == end_index) { + char *userpage; + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - zero_user_page(page, offset, - PAGE_CACHE_SIZE - offset, KM_USER0); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); } set_page_extent_mapped(page); @@ -1921,6 +1945,129 @@ done: return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + int extent_write_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc) @@ -1945,18 +2092,20 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_write_full_page); + int extent_writepages(struct extent_map_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc) { - int ret; + int ret = 0; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -1964,8 +2113,9 @@ int extent_writepages(struct extent_map_tree *tree, }; ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_writepages); @@ -2106,7 +2256,9 @@ int extent_prepare_write(struct extent_map_tree *tree, flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); } - if (!isnew && !PageUptodate(page) && + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, EXTENT_UPTODATE, 1)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 62fcd79d8ab..461b09663fe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -231,7 +231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, { int err = 0; int i; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 hint_byte; @@ -652,7 +652,7 @@ static int prepare_pages(struct btrfs_root *root, { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; @@ -666,7 +666,11 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; BUG_ON(1); } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif wait_on_page_writeback(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); @@ -682,7 +686,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ssize_t num_written = 0; ssize_t err = 0; int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; int nrptrs; @@ -707,7 +711,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; if (count == 0) goto out; - err = remove_suid(file->f_path.dentry); + err = remove_suid(fdentry(file)); if (err) goto out; file_update_time(file); @@ -862,6 +866,9 @@ struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + .sendfile = generic_file_sendfile, +#endif .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 686dd03f34f..6a7d9160df2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -532,7 +532,11 @@ err: dir->i_size -= name_len * 2; dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + dentry->d_inode->i_nlink--; +#else drop_nlink(dentry->d_inode); +#endif ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } @@ -1139,7 +1143,7 @@ static unsigned char btrfs_filetype_table[] = { static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->f_dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; @@ -1554,7 +1558,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + inode->i_nlink++; +#else inc_nlink(inode); +#endif mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1825,6 +1833,9 @@ insert: if (ret == -EEXIST) { free_extent_map(em); em = NULL; + if (0 && failed_insert == 1) { + btrfs_drop_extent_cache(inode, start, end); + } failed_insert++; if (failed_insert > 5) { printk("failing to insert %Lu %Lu\n", start, end); @@ -1942,7 +1953,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = fdentry(vma->vm_file)->d_inode; unsigned long end; loff_t size; int ret = -EINVAL; @@ -2243,7 +2254,7 @@ static unsigned long force_ra(struct address_space *mapping, } int btrfs_defrag_file(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct page *page; unsigned long last_index; @@ -2329,7 +2340,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) static int btrfs_ioctl_defrag(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; switch (inode->i_mode & S_IFMT) { @@ -2350,7 +2361,7 @@ static int btrfs_ioctl_defrag(struct file *file) long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct btrfs_root *root = BTRFS_I(file->f_path.dentry->d_inode)->root; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b7323914bf5..7cab680418a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1671c731800..cd673ca9bb9 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -189,12 +189,29 @@ static struct kset btrfs_kset; int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) { int error; + char *name; + char c; + int len = strlen(fs->sb->s_id) + 1; + int i; + + name = kmalloc(len, GFP_NOFS); + if (!name) { + error = -ENOMEM; + goto fail; + } + + for (i = 0; i < len; i++) { + c = fs->sb->s_id[i]; + if (c == '/' || c == '\\') + c = '!'; + name[i] = c; + } + name[len] = '\0'; fs->super_kobj.kset = &btrfs_kset; fs->super_kobj.ktype = &btrfs_super_ktype; - error = kobject_set_name(&fs->super_kobj, "%s", - fs->sb->s_id); + error = kobject_set_name(&fs->super_kobj, "%s", name); if (error) goto fail; @@ -202,9 +219,11 @@ int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) if (error) goto fail; + kfree(name); return 0; fail: + kfree(name); printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); return error; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0229e69bd2f..1ad611b9f61 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -614,12 +614,20 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) } return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p) +#else void btrfs_transaction_cleaner(struct work_struct *work) +#endif { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, trans_work.work); +#endif struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ae39fcfc169..eef840bca91 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,7 +73,12 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p); +#else void btrfs_transaction_cleaner(struct work_struct *work); +#endif + void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); -- cgit v1.2.3-70-g09d2 From dc17ff8f11d129db9e83ab7244769e4eae05e14d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: Btrfs: Add data=ordered support This forces file data extents down the disk along with the metadata that references them. The current implementation is fairly simple, and just writes out all of the dirty pages in an inode before the commit. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 6 +- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 16 +++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file.c | 5 +- fs/btrfs/inode.c | 46 ++++++---- fs/btrfs/ordered-data.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ordered-data.h | 39 +++++++++ fs/btrfs/transaction.c | 58 +++++++++++++ fs/btrfs/transaction.h | 8 +- 13 files changed, 387 insertions(+), 28 deletions(-) create mode 100644 fs/btrfs/ordered-data.c create mode 100644 fs/btrfs/ordered-data.h (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5804c5ca07..ab9a9f8edbf 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o acl.o + extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d1d5af471c3..f27e633f174 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -29,6 +29,7 @@ struct btrfs_inode { struct extent_map_tree extent_tree; struct inode vfs_inode; + u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35c57074a37..43d23148a4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret) { u64 search_start; + u64 header_trans; int ret; + if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(buf) == trans->transid) { + + header_trans = btrfs_header_generation(buf); + if (header_trans == trans->transid) { *cow_ret = buf; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9873975ce0e..b55dba58dfa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -16,8 +16,8 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS__ -#define __BTRFS__ +#ifndef __BTRFS_CTREE__ +#define __BTRFS_CTREE__ #include #include @@ -363,7 +363,6 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; - struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; @@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + u64 root_objectid); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6170ff19e7..34cf1f1f47b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); init_completion(&root->kobj_unregister); - init_rwsem(&root->snap_sem); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -498,6 +497,21 @@ insert: return root; } +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid) +{ + struct btrfs_root *root; + + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)root_objectid); + return root; +} + struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8c3cfd02901..dae9fba8efc 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c906bb19b21..68137cd8506 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - if (btrfs_header_generation(buf) == transid) { + u64 header_transid = + btrfs_header_generation(buf); + if (header_transid == transid) { free_extent_buffer(buf); return 1; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 94c93373cb7..0a5f4defe59 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,6 +34,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" @@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, root->fs_info->delalloc_bytes += (end_of_last_block + 1 - start_pos) - existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); + btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); - down_read(&BTRFS_I(inode)->root->snap_sem); - mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; @@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); - up_read(&BTRFS_I(inode)->root->snap_sem); out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d0cd9a7a61..6d6e1ac0a9a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); return ret; @@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) goto make_bad; @@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) unlock_page(page); page_cache_release(page); - up_read(&BTRFS_I(inode)->root->snap_sem); out: return ret; } @@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) args->root == BTRFS_I(inode)->root); } +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + u64 root_objectid) +{ + struct btrfs_iget_args args; + args.ino = objectid; + args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid); + + if (!args.root) + return NULL; + + return ilookup5(s, objectid, btrfs_find_actor, (void *)&args); +} + struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root) { @@ -1336,7 +1348,6 @@ read_dir_items: d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); - over = filldir(dirent, name_ptr, name_len, found_key.offset, location.objectid, @@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = -EINVAL; - down_read(&BTRFS_I(inode)->root->snap_sem); lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); @@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = btrfs_cow_one_page(inode, page, end); out_unlock: - up_read(&BTRFS_I(inode)->root->snap_sem); unlock_page(page); out: return ret; @@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; - struct btrfs_root *new_root; + struct btrfs_root *new_root = root; struct inode *inode; struct inode *dir; int ret; @@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, goto fail; fail: nr = trans->blocks_used; - err = btrfs_commit_transaction(trans, root); + err = btrfs_commit_transaction(trans, new_root); if (err && !ret) ret = err; fail_commit: @@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; - down_write(&root->snap_sem); - freeze_bdev(root->fs_info->sb->s_bdev); - thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) @@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); + err = btrfs_commit_transaction(trans, root); + + trans = btrfs_start_transaction(root, 1); ret = btrfs_update_inode(trans, root, root->inode); if (ret) @@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); if (ret) - goto fail; - - memcpy(&new_root_item, &root->root_item, + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); key.objectid = objectid; @@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); free_extent_buffer(tmp); + /* write the ordered inodes to force all delayed allocations to + * be filled. Once this is done, we can copy the root + */ + mutex_lock(&root->fs_info->trans_mutex); + btrfs_write_ordered_inodes(trans, root); + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_copy_root(trans, root, root->node, &tmp, objectid); btrfs_set_root_bytenr(&new_root_item, tmp->start); btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); +printk("new root %Lu node %Lu\n", objectid, tmp->start); free_extent_buffer(tmp); if (ret) goto fail; @@ -2321,7 +2335,6 @@ fail: ret = err; fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); - up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->ordered_trans = 0; return &ei->vfs_inode; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c new file mode 100644 index 00000000000..411aba84d30 --- /dev/null +++ b/fs/btrfs/ordered-data.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include "ctree.h" +#include "transaction.h" +#include "btrfs_inode.h" + +struct tree_entry { + u64 root_objectid; + u64 objectid; + struct rb_node rb_node; +}; + +/* + * returns > 0 if entry passed (root, objectid) is > entry, + * < 0 if (root, objectid) < entry and zero if they are equal + */ +static int comp_entry(struct tree_entry *entry, u64 root_objectid, + u64 objectid) +{ + if (root_objectid < entry->root_objectid) + return -1; + if (root_objectid > entry->root_objectid) + return 1; + if (objectid < entry->objectid) + return -1; + if (objectid > entry->objectid) + return 1; + return 0; +} + +static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, + u64 objectid, struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + int comp; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + comp = comp_entry(entry, root_objectid, objectid); + if (comp < 0) + p = &(*p)->rb_left; + else if (comp > 0) + p = &(*p)->rb_right; + else + return parent; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, + u64 objectid, struct rb_node **prev_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + int comp; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + comp = comp_entry(entry, root_objectid, objectid); + + if (comp < 0) + n = n->rb_left; + else if (comp > 0) + n = n->rb_right; + else + return n; + } + if (!prev_ret) + return NULL; + + while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, + u64 root_objectid, u64 objectid) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, root_objectid, objectid, &prev); + if (!ret) + return prev; + return ret; +} + +int btrfs_add_ordered_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 root_objectid = root->root_key.objectid; + u64 transid = root->fs_info->running_transaction->transid; + struct tree_entry *entry; + struct rb_node *node; + struct btrfs_ordered_inode_tree *tree; + + if (transid <= BTRFS_I(inode)->ordered_trans) + return 0; + + tree = &root->fs_info->running_transaction->ordered_inode_tree; + + read_lock(&tree->lock); + node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); + read_unlock(&tree->lock); + if (node) { + return 0; + } + + entry = kmalloc(sizeof(*entry), GFP_NOFS); + if (!entry) + return -ENOMEM; + + write_lock(&tree->lock); + entry->objectid = inode->i_ino; + entry->root_objectid = root_objectid; + + node = tree_insert(&tree->tree, root_objectid, + inode->i_ino, &entry->rb_node); + + BTRFS_I(inode)->ordered_trans = transid; + + write_unlock(&tree->lock); + if (node) + kfree(entry); + return 0; +} + +int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + + write_lock(&tree->lock); + node = tree_search(&tree->tree, *root_objectid, *objectid); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + entry = rb_entry(node, struct tree_entry, rb_node); + + while(comp_entry(entry, *root_objectid, *objectid) >= 0) { + node = rb_next(node); + if (!node) + break; + entry = rb_entry(node, struct tree_entry, rb_node); + } + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + *root_objectid = entry->root_objectid; + *objectid = entry->objectid; + write_unlock(&tree->lock); + return 1; +} + +int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + + write_lock(&tree->lock); + node = tree_search(&tree->tree, *root_objectid, *objectid); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + while(comp_entry(entry, *root_objectid, *objectid) >= 0) { + node = rb_next(node); + if (!node) + break; + entry = rb_entry(node, struct tree_entry, rb_node); + } + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + *root_objectid = entry->root_objectid; + *objectid = entry->objectid; + rb_erase(node, &tree->tree); + write_unlock(&tree->lock); + kfree(entry); + return 1; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h new file mode 100644 index 00000000000..aaf9eb14271 --- /dev/null +++ b/fs/btrfs/ordered-data.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_ORDERED_DATA__ +#define __BTRFS_ORDERED_DATA__ + +struct btrfs_ordered_inode_tree { + rwlock_t lock; + struct rb_root tree; +}; + +static inline void +btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) +{ + rwlock_init(&t->lock); + t->tree.rb_node = NULL; +} + +int btrfs_add_ordered_inode(struct inode *inode); +int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid); +int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid); +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 02721eea9a7..3ed5868e7c0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_map_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } +int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + struct inode *inode; + u64 root_objectid = 0; + u64 objectid = 0; + u64 transid = trans->transid; + int ret; + +printk("write ordered trans %Lu\n", transid); + while(1) { + ret = btrfs_find_first_ordered_inode( + &cur_trans->ordered_inode_tree, + &root_objectid, &objectid); + if (!ret) + break; + + mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + inode = btrfs_ilookup(root->fs_info->sb, objectid, + root_objectid); + if (inode) { + if (S_ISREG(inode->i_mode)) + filemap_fdatawrite(inode->i_mapping); + iput(inode); + } + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } + while(1) { + root_objectid = 0; + objectid = 0; + ret = btrfs_find_del_first_ordered_inode( + &cur_trans->ordered_inode_tree, + &root_objectid, &objectid); + if (!ret) + break; + mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + inode = btrfs_ilookup(root->fs_info->sb, objectid, + root_objectid); + if (inode) { + if (S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); + iput(inode); + } + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } +printk("done write ordered trans %Lu\n", transid); + return 0; +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); + ret = btrfs_write_ordered_inodes(trans, root); + } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); WARN_ON(cur_trans != trans->transaction); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); BUG_ON(ret); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eef840bca91..c157ddbe9d1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -16,9 +16,10 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __TRANSACTION__ -#define __TRANSACTION__ +#ifndef __BTRFS_TRANSACTION__ +#define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" +#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -30,6 +31,7 @@ struct btrfs_transaction { struct list_head list; struct extent_map_tree dirty_pages; unsigned long start_time; + struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; @@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From 3063d29f2a4d4a4e9fa1ec77c124514f287c6da7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: Btrfs: Move snapshot creation to commit time It is very difficult to create a consistent snapshot of the btree when other writers may update the btree before the commit is done. This changes the snapshot creation to happen during the commit, while no other updates are possible. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 79 ++++++++++-------------------------------------- fs/btrfs/transaction.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.h | 7 +++++ 3 files changed, 100 insertions(+), 67 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d6e1ac0a9a..10cece11dbd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2250,13 +2250,10 @@ fail_commit: static int create_snapshot(struct btrfs_root *root, char *name, int namelen) { + struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item new_root_item; - struct extent_buffer *tmp; int ret; int err; - u64 objectid; unsigned long nr = 0; if (!root->ref_cows) @@ -2267,72 +2264,26 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_unlock; + pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); + if (!pending_snapshot) { + ret = -ENOMEM; + goto fail_unlock; + } + pending_snapshot->name = kstrndup(name, namelen, GFP_NOFS); + if (!pending_snapshot->name) { + ret = -ENOMEM; + kfree(pending_snapshot); + goto fail_unlock; + } trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - err = btrfs_commit_transaction(trans, root); - - trans = btrfs_start_transaction(root, 1); + pending_snapshot->root = root; + list_add(&pending_snapshot->list, + &trans->transaction->pending_snapshots); ret = btrfs_update_inode(trans, root, root->inode); - if (ret) - goto fail; - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - if (ret) - goto fail; memcpy(&new_root_item, &root->root_item, - sizeof(new_root_item)); - - key.objectid = objectid; - key.offset = 1; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - - extent_buffer_get(root->node); - btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - free_extent_buffer(tmp); - - /* write the ordered inodes to force all delayed allocations to - * be filled. Once this is done, we can copy the root - */ - mutex_lock(&root->fs_info->trans_mutex); - btrfs_write_ordered_inodes(trans, root); - mutex_unlock(&root->fs_info->trans_mutex); - - btrfs_copy_root(trans, root, root->node, &tmp, objectid); - - btrfs_set_root_bytenr(&new_root_item, tmp->start); - btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &new_root_item); -printk("new root %Lu node %Lu\n", objectid, tmp->start); - free_extent_buffer(tmp); - if (ret) - goto fail; - - /* - * insert the directory item - */ - key.offset = (u64)-1; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR); - - if (ret) - goto fail; - - ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, - name, namelen, objectid, - root->fs_info->sb->s_root->d_inode->i_ino); - - if (ret) - goto fail; -fail: - nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); - if (err && !ret) - ret = err; fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3ed5868e7c0..dc9865323e3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -66,6 +66,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); + INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_map_tree_init(&cur_trans->dirty_pages, @@ -481,10 +482,8 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, struct inode *inode; u64 root_objectid = 0; u64 objectid = 0; - u64 transid = trans->transid; int ret; -printk("write ordered trans %Lu\n", transid); while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, @@ -524,7 +523,80 @@ printk("write ordered trans %Lu\n", transid); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } -printk("done write ordered trans %Lu\n", transid); + return 0; +} + +static int create_pending_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_pending_snapshot *pending) +{ + struct btrfs_key key; + struct btrfs_root_item new_root_item; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *root = pending->root; + struct extent_buffer *tmp; + int ret; + u64 objectid; + + ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); + if (ret) + goto fail; + + memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); + + key.objectid = objectid; + key.offset = 1; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + + extent_buffer_get(root->node); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + free_extent_buffer(tmp); + + btrfs_copy_root(trans, root, root->node, &tmp, objectid); + + btrfs_set_root_bytenr(&new_root_item, tmp->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + free_extent_buffer(tmp); + if (ret) + goto fail; + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + pending->name, strlen(pending->name), + root->fs_info->sb->s_root->d_inode->i_ino, + &key, BTRFS_FT_DIR); + + if (ret) + goto fail; + + ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, + pending->name, strlen(pending->name), objectid, + root->fs_info->sb->s_root->d_inode->i_ino); +fail: + return ret; +} + +static int create_pending_snapshots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + int ret; + + while(!list_empty(head)) { + pending = list_entry(head->next, + struct btrfs_pending_snapshot, list); + ret = create_pending_snapshot(trans, fs_info, pending); + BUG_ON(ret); + list_del(&pending->list); + kfree(pending->name); + kfree(pending); + } return 0; } @@ -610,6 +682,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); + ret = create_pending_snapshots(trans, root->fs_info); + BUG_ON(ret); + WARN_ON(cur_trans != trans->transaction); ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c157ddbe9d1..fd52e9b2392 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -34,6 +34,7 @@ struct btrfs_transaction { struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; + struct list_head pending_snapshots; }; struct btrfs_trans_handle { @@ -46,6 +47,12 @@ struct btrfs_trans_handle { u64 alloc_exclude_nr; }; +struct btrfs_pending_snapshot { + struct btrfs_root *root; + char *name; + struct list_head list; +}; + static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, struct inode *inode) -- cgit v1.2.3-70-g09d2 From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:08 -0500 Subject: Btrfs: Split the extent_map code into two parts There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 15 +- fs/btrfs/disk-io.c | 123 +- fs/btrfs/extent-tree.c | 43 +- fs/btrfs/extent_io.c | 3089 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 193 +++ fs/btrfs/extent_map.c | 3119 ++---------------------------------------------- fs/btrfs/extent_map.h | 214 +--- fs/btrfs/file.c | 29 +- fs/btrfs/inode.c | 175 +-- fs/btrfs/super.c | 10 +- fs/btrfs/transaction.c | 8 +- fs/btrfs/transaction.h | 2 +- 14 files changed, 3612 insertions(+), 3413 deletions(-) create mode 100644 fs/btrfs/extent_io.c create mode 100644 fs/btrfs/extent_io.h (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 72cc3e16741..9c61609f371 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,8 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9fc99883c70..9a0647dc5a0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -20,6 +20,7 @@ #define __BTRFS_I__ #include "extent_map.h" +#include "extent_io.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -27,6 +28,7 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct extent_map_tree extent_tree; + struct extent_io_tree io_tree; struct inode vfs_inode; u64 ordered_trans; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9f2975b55c..6c65473e0fe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include "bit-radix.h" +#include "extent_io.h" #include "extent_map.h" struct btrfs_trans_handle; @@ -314,11 +315,11 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct extent_map_tree free_space_cache; - struct extent_map_tree block_group_cache; - struct extent_map_tree pinned_extents; - struct extent_map_tree pending_del; - struct extent_map_tree extent_ins; + struct extent_io_tree free_space_cache; + struct extent_io_tree block_group_cache; + struct extent_io_tree pinned_extents; + struct extent_io_tree pending_del; + struct extent_io_tree extent_ins; u64 generation; u64 last_trans_committed; @@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); @@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin); + struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5d1f9bca271..4c4ebea0b2a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif -static struct extent_map_ops btree_extent_map_ops; +static struct extent_io_ops btree_extent_io_ops; struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } @@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, int ret; again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); if (em) { goto out; } @@ -85,11 +87,14 @@ again: goto out; } em->start = 0; - em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->len = i_size_read(inode); em->block_start = 0; - em->block_end = em->end; em->bdev = inode->i_sb->s_bdev; + + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret == -EEXIST) { free_extent_map(em); em = NULL; @@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; @@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; @@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping, int btree_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btree_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); } @@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0); free_extent_buffer(buf); return ret; @@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_map_tree *extent_tree; + struct extent_io_tree *io_tree; u64 end; int ret; - extent_tree = &BTRFS_I(btree_inode)->extent_tree; + io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); if (buf->flags & EXTENT_CSUM) return buf; end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } - lock_extent(extent_tree, buf->start, end, GFP_NOFS); + lock_extent(io_tree, buf->start, end, GFP_NOFS); - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; goto out_unlock; } ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; out_unlock: - unlock_extent(extent_tree, buf->start, end, GFP_NOFS); + unlock_extent(io_tree, buf->start, end, GFP_NOFS); return buf; } @@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); - BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + GFP_NOFS); + + BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_map_tree_init(&fs_info->free_space_cache, + extent_io_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->block_group_cache, + extent_io_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pending_del, + extent_io_tree_init(&fs_info->pending_del, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->extent_ins, + extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) @@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_map_tree_empty_lru(&fs_info->free_space_cache); - extent_map_tree_empty_lru(&fs_info->block_group_cache); - extent_map_tree_empty_lru(&fs_info->pinned_extents); - extent_map_tree_empty_lru(&fs_info->pending_del); - extent_map_tree_empty_lru(&fs_info->extent_ins); - extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_io_tree_empty_lru(&fs_info->free_space_cache); + extent_io_tree_empty_lru(&fs_info->block_group_cache); + extent_io_tree_empty_lru(&fs_info->pinned_extents); + extent_io_tree_empty_lru(&fs_info->pending_del); + extent_io_tree_empty_lru(&fs_info->extent_ins); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); @@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) transid, root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } void btrfs_throttle(struct btrfs_root *root) @@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); } @@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, 0); } @@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); } -static struct extent_map_ops btree_extent_map_ops = { +static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b69a46691a9..1cf125ab782 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; int slot; u64 last = 0; u64 hole_size; @@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; u64 ptr; u64 start; @@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; @@ -951,7 +951,7 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *cache; int ret; int err = 0; @@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; u64 start; u64 end; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; + struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; while(1) { @@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin) + struct extent_io_tree *unpin) { u64 start; u64 end; int ret; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int err = 0; u64 start; u64 end; - struct extent_map_tree *pending_del; - struct extent_map_tree *pinned_extents; + struct extent_io_tree *pending_del; + struct extent_io_tree *pinned_extents; pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; @@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); + existing_delalloc = count_range_bits(io_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) u64 cur_byte; u64 total_found; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct extent_buffer *leaf; int ret; @@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c new file mode 100644 index 00000000000..15cc158a049 --- /dev/null +++ b/fs/btrfs/extent_io.c @@ -0,0 +1,3089 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_io.h" +#include "extent_map.h" + +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + +static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; + +static LIST_HEAD(buffers); +static LIST_HEAD(states); + +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; +#define BUFFER_LRU_MAX 64 + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +struct extent_page_data { + struct bio *bio; + struct extent_io_tree *tree; + get_extent_t *get_extent; +}; + +int __init extent_io_init(void) +{ + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), 0, + NULL); + if (!extent_state_cache) + return -ENOMEM; + + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); + return -ENOMEM; +} + +void extent_io_exit(void) +{ + struct extent_state *state; + + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); +} + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->state.rb_node = NULL; + tree->ops = NULL; + tree->dirty_bytes = 0; + rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); + tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; +} +EXPORT_SYMBOL(extent_io_tree_init); + +void extent_io_tree_empty_lru(struct extent_io_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del_init(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_io_tree_empty_lru); + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + unsigned long flags; + + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + unsigned long flags; + if (!state) + return; + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret, + struct rb_node **next_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; + } + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev, NULL); + if (!ret) + return prev; + return ret; +} + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; + state->state |= bits; + state->start = start; + state->end = end; + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, bits); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = rb_next(node); + if (!node) + break; + } +out: + read_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + +u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + *end = (u64)-1; + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (found && state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; + goto out; + } + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; + } + } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + + write_lock_irq(&tree->lock); + if (cur_start == 0 && bits == EXTENT_DIRTY) { + total_bytes = tree->dirty_bytes; + goto out; + } + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + unsigned long flags; + + read_lock_irqsave(&tree->lock, flags); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + + if (state->start > end) + break; + + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + read_unlock_irqrestore(&tree->lock, flags); + return bitset; +} +EXPORT_SYMBOL(test_range_bit); + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + int ret; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} + +static int submit_one_bio(int rw, struct bio *bio) +{ + u64 maxsector; + int ret = 0; + + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + + submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + bio_put(bio); + return ret; +} + +static int submit_extent_page(int rw, struct extent_io_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + struct bio **bio_ret, + unsigned long max_pages, + bio_end_io_t end_io_func) +{ + int ret = 0; + struct bio *bio; + int nr; + + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } + + return ret; +} + +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, EXTENT_PAGE_PRIVATE); + page_cache_get(page); + } +} + +void set_page_extent_head(struct page *page, unsigned long len) +{ + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) +{ + struct inode *inode = page->mapping->host; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + set_page_extent_mapped(page); + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + char *userpage; + iosize = PAGE_CACHE_SIZE - page_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, + end - cur + 1, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + + iosize = min(extent_map_end(em) - cur, end - cur + 1); + cur_end = min(extent_map_end(em) - 1, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == EXTENT_MAP_HOLE) { + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); + } + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} + +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_io_tree *tree = epd->tree; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 iosize; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + + WARN_ON(!PageLocked(page)); + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + char *userpage; + + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + + set_page_extent_mapped(page); + + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; + } + lock_extent(tree, start, page_end, GFP_NOFS); + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = epd->get_extent(inode, page, page_offset, cur, + end - cur + 1, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + iosize = min(extent_map_end(em) - cur, end - cur + 1); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } + if (ret) + SetPageError(page); + else { + unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } + + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + &epd->bio, max_nr, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct address_space *mapping = page->mapping; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + + + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_write_full_page); + + +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret = 0; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_writepages); + +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + set_page_extent_mapped(page); + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + set_page_extent_mapped(page); + + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end - block_start + 1, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, extent_map_end(em) - 1); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + NULL, 1, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + int ret = 1; + + while (start <= end) { + spin_lock(&map->lock); + em = lookup_extent_mapping(map, start, end); + if (!em || IS_ERR(em)) { + spin_unlock(&map->lock); + break; + } + if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); + } + start = extent_map_end(em); + spin_unlock(&map->lock); + + /* once for us */ + free_extent_map(em); + } + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; +} +EXPORT_SYMBOL(try_release_extent_mapping); + +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + sector_t sector = 0; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); + if (!em || IS_ERR(em)) + return 0; + + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == EXTENT_MAP_HOLE) + goto out; + + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); +out: + free_extent_map(em); + return sector; +} + +static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) +{ + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del_init(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_io_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; + + if (list_empty(lru)) + return NULL; + + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; +} + +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) +{ + struct page *p; + struct address_space *mapping; + + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); + return p; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) +{ + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + spin_unlock(&tree->lru_lock); + if (eb) { + return eb; + } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); +} + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + mark_page_accessed(page0); + set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); + set_page_extent_head(page0, len); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) { + WARN_ON(1); + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; + +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + for (i = 0; i < num_pages; i++, index++) { + p = find_lock_page(mapping, index); + if (!p) { + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + WARN_ON(!list_empty(&eb->lru)); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); + } + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + start = (u64)page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); + } + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, + u64 start, + int wait) +{ + unsigned long i; + unsigned long start_i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + if (!ret) + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(dst, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER1); + + dst += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset = start & (PAGE_CACHE_SIZE - 1); + char *kaddr; + struct page *p; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len - 1) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; + } + if (start + min_len > eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } + + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_private_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + ret = memcmp(ptr, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(kaddr + offset, src, cur); + kunmap_atomic(kaddr, KM_USER1); + + src += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, c, cur); + kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(dst, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + kunmap_atomic(kaddr, KM_USER0); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + kunmap_atomic(src_kaddr, KM_USER1); + } + kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); + + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = (start_offset + dst_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + cur = min_t(unsigned long, len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur; + src_end -= cur; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h new file mode 100644 index 00000000000..06be1fe84b2 --- /dev/null +++ b/fs/btrfs/extent_io.h @@ -0,0 +1,193 @@ +#ifndef __EXTENTIO__ +#define __EXTENTIO__ + +#include + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + +struct extent_io_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + +struct extent_io_tree { + struct rb_root state; + struct address_space *mapping; + u64 dirty_bytes; + rwlock_t lock; + struct extent_io_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; +}; + +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + + /* for use by the FS */ + u64 private; + + struct list_head list; +}; + +struct extent_buffer { + u64 start; + unsigned long len; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + struct page *first_page; + struct list_head lru; + atomic_t refs; + int flags; +}; + +struct extent_map_tree; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 len, + int create); + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask); +void extent_io_tree_empty_lru(struct extent_io_tree *tree); +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page); +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent); +int __init extent_io_init(void); +void extent_io_exit(void); + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, u64 start, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); +#endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 010a287fbd7..268ad8facf6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,17 +1,10 @@ -#include -#include -#include -#include +#include #include -#include -#include +#include #include #include -#include -#include #include -#include -#include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long)); static struct kmem_cache *extent_map_cache; -static struct kmem_cache *extent_state_cache; -static struct kmem_cache *extent_buffer_cache; - -static LIST_HEAD(buffers); -static LIST_HEAD(states); - -static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -#define BUFFER_LRU_MAX 64 - -struct tree_entry { - u64 start; - u64 end; - int in_tree; - struct rb_node rb_node; -}; - -struct extent_page_data { - struct bio *bio; - struct extent_map_tree *tree; - get_extent_t *get_extent; -}; int __init extent_map_init(void) { @@ -50,72 +22,23 @@ int __init extent_map_init(void) NULL); if (!extent_map_cache) return -ENOMEM; - extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), 0, - NULL); - if (!extent_state_cache) - goto free_map_cache; - extent_buffer_cache = btrfs_cache_create("extent_buffers", - sizeof(struct extent_buffer), 0, - NULL); - if (!extent_buffer_cache) - goto free_state_cache; return 0; - -free_state_cache: - kmem_cache_destroy(extent_state_cache); -free_map_cache: - kmem_cache_destroy(extent_map_cache); - return -ENOMEM; } void extent_map_exit(void) { - struct extent_state *state; - - while (!list_empty(&states)) { - state = list_entry(states.next, struct extent_state, list); - printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); - list_del(&state->list); - kmem_cache_free(extent_state_cache, state); - - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); - if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); - if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); } -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask) +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->state.rb_node = NULL; - tree->ops = NULL; - tree->dirty_bytes = 0; - rwlock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); - tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; + tree->last = NULL; + spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_empty_lru(struct extent_map_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_map_tree_empty_lru); - struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) if (!em || IS_ERR(em)) return em; em->in_tree = 0; + em->flags = 0; atomic_set(&em->refs, 1); return em; } @@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em) { if (!em) return; + WARN_ON(atomic_read(&em->refs) == 0); if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em) } EXPORT_SYMBOL(free_extent_map); - -struct extent_state *alloc_extent_state(gfp_t mask) -{ - struct extent_state *state; - unsigned long flags; - - state = kmem_cache_alloc(extent_state_cache, mask); - if (!state || IS_ERR(state)) - return state; - state->state = 0; - state->in_tree = 0; - state->private = 0; - - spin_lock_irqsave(&state_lock, flags); - list_add(&state->list, &states); - spin_unlock_irqrestore(&state_lock, flags); - - atomic_set(&state->refs, 1); - init_waitqueue_head(&state->wq); - return state; -} -EXPORT_SYMBOL(alloc_extent_state); - -void free_extent_state(struct extent_state *state) -{ - unsigned long flags; - if (!state) - return; - if (atomic_dec_and_test(&state->refs)) { - WARN_ON(state->in_tree); - spin_lock_irqsave(&state_lock, flags); - list_del(&state->list); - spin_unlock_irqrestore(&state_lock, flags); - kmem_cache_free(extent_state_cache, state); - } -} -EXPORT_SYMBOL(free_extent_state); - static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; + struct extent_map *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct extent_map, rb_node); + + WARN_ON(!entry->in_tree); if (offset < entry->start) p = &(*p)->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) p = &(*p)->rb_right; else return parent; } - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct extent_map, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); @@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; + struct extent_map *entry; + struct extent_map *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; + WARN_ON(!entry->in_tree); + if (offset < entry->start) n = n->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) n = n->rb_right; else return n; @@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while(prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *prev_ret = prev; prev = orig_prev; } if (next_ret) { - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); while(prev && offset < prev_entry->start) { prev = rb_prev(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *next_ret = prev; } @@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } -static int tree_delete(struct rb_root *root, u64 offset) +static int mergable_maps(struct extent_map *prev, struct extent_map *next) { - struct rb_node *node; - struct tree_entry *entry; - - node = __tree_search(root, offset, NULL, NULL); - if (!node) - return -ENOENT; - entry = rb_entry(node, struct tree_entry, rb_node); - entry->in_tree = 0; - rb_erase(node, root); + if (extent_map_end(prev) == next->start && + prev->flags == next->flags && + prev->bdev == next->bdev && + ((next->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || + (next->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (next->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && + next->block_start == extent_map_block_end(prev)))) { + return 1; + } return 0; } /* - * add_extent_mapping tries a simple backward merge with existing + * add_extent_mapping tries a simple forward/backward merge with existing * mappings. The extent_map struct passed in will be inserted into * the tree directly (no copies made, just a reference taken). */ @@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *prev = NULL; + struct extent_map *merge = NULL; struct rb_node *rb; - write_lock_irq(&tree->lock); - rb = tree_insert(&tree->map, em->end, &em->rb_node); + rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - prev = rb_entry(rb, struct extent_map, rb_node); + merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; goto out; } @@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree, if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) - prev = rb_entry(rb, struct extent_map, rb_node); - if (prev && prev->end + 1 == em->start && - ((em->block_start == EXTENT_MAP_HOLE && - prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == EXTENT_MAP_INLINE && - prev->block_start == EXTENT_MAP_INLINE) || - (em->block_start == EXTENT_MAP_DELALLOC && - prev->block_start == EXTENT_MAP_DELALLOC) || - (em->block_start < EXTENT_MAP_DELALLOC - 1 && - em->block_start == prev->block_end + 1))) { - em->start = prev->start; - em->block_start = prev->block_start; - rb_erase(&prev->rb_node, &tree->map); - prev->in_tree = 0; - free_extent_map(prev); + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); } } + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + tree->last = em; out: - write_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(add_extent_mapping); +static u64 range_end(u64 start, u64 len) +{ + if (start + len < start) + return (u64)-1; + return start + len; +} + /* * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, end] (inclusive) range. There may + * tree that intersects the [start, len] range. There may * be additional objects in the tree that intersect, so check the object * returned carefully to make sure you don't need additional lookups. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end) + u64 start, u64 len) { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; + struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; - read_lock_irq(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node && next) { em = rb_entry(next, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node) { @@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); - if (em->end < start || em->start > end) { - em = NULL; - goto out; - } + if (end > em->start && start < extent_map_end(em)) + goto found; + + em = NULL; + goto out; + found: atomic_inc(&em->refs); + tree->last = em; out: - read_unlock_irq(&tree->lock); return em; } EXPORT_SYMBOL(lookup_extent_mapping); @@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping); */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { - int ret; + int ret = 0; - write_lock_irq(&tree->lock); - ret = tree_delete(&tree->map, em->end); - write_unlock_irq(&tree->lock); + rb_erase(&em->rb_node, &tree->map); + em->in_tree = 0; + if (tree->last == em) + tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); - -/* - * utility function to look for merge candidates inside a given range. - * Any extents with matching state are merged together into a single - * extent in the tree. Extents with EXTENT_IO in their state field - * are not merged because the end_io handlers need to be able to do - * operations on them without sleeping (or doing allocations/splits). - * - * This should be called with the tree lock held. - */ -static int merge_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - struct extent_state *other; - struct rb_node *other_node; - - if (state->state & EXTENT_IOBITS) - return 0; - - other_node = rb_prev(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->end == state->start - 1 && - other->state == state->state) { - state->start = other->start; - other->in_tree = 0; - rb_erase(&other->rb_node, &tree->state); - free_extent_state(other); - } - } - other_node = rb_next(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->start == state->end + 1 && - other->state == state->state) { - other->start = state->start; - state->in_tree = 0; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - } - } - return 0; -} - -/* - * insert an extent_state struct into the tree. 'bits' are set on the - * struct before it is inserted. - * - * This may return -EEXIST if the extent is already there, in which case the - * state struct is freed. - * - * The tree lock is not taken internally. This is a utility function and - * probably isn't what you want to call (see set/clear_extent_bit). - */ -static int insert_state(struct extent_map_tree *tree, - struct extent_state *state, u64 start, u64 end, - int bits) -{ - struct rb_node *node; - - if (end < start) { - printk("end < start %Lu %Lu\n", end, start); - WARN_ON(1); - } - if (bits & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits; - state->start = start; - state->end = end; - node = tree_insert(&tree->state, end, &state->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); - free_extent_state(state); - return -EEXIST; - } - merge_state(tree, state); - return 0; -} - -/* - * split a given extent state struct in two, inserting the preallocated - * struct 'prealloc' as the newly created second half. 'split' indicates an - * offset inside 'orig' where it should be split. - * - * Before calling, - * the tree has 'orig' at [orig->start, orig->end]. After calling, there - * are two extent state structs in the tree: - * prealloc: [orig->start, split - 1] - * orig: [ split, orig->end ] - * - * The tree locks are not taken by this function. They need to be held - * by the caller. - */ -static int split_state(struct extent_map_tree *tree, struct extent_state *orig, - struct extent_state *prealloc, u64 split) -{ - struct rb_node *node; - prealloc->start = orig->start; - prealloc->end = split - 1; - prealloc->state = orig->state; - orig->start = split; - - node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); - free_extent_state(prealloc); - return -EEXIST; - } - return 0; -} - -/* - * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). - * - * If no bits are set on the state struct after clearing things, the - * struct is freed and removed from the tree - */ -static int clear_state_bit(struct extent_map_tree *tree, - struct extent_state *state, int bits, int wake, - int delete) -{ - int ret = state->state & bits; - - if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - WARN_ON(range > tree->dirty_bytes); - tree->dirty_bytes -= range; - } - state->state &= ~bits; - if (wake) - wake_up(&state->wq); - if (delete || state->state == 0) { - if (state->in_tree) { - rb_erase(&state->rb_node, &tree->state); - state->in_tree = 0; - free_extent_state(state); - } else { - WARN_ON(1); - } - } else { - merge_state(tree, state); - } - return ret; -} - -/* - * clear some bits on a range in the tree. This may require splitting - * or inserting elements in the tree, so the gfp mask is used to - * indicate which allocations or sleeping are allowed. - * - * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove - * the given range from the tree regardless of state (ie for truncate). - * - * the range [start, end] is inclusive. - * - * This takes the tree lock, and returns < 0 on error, > 0 if any of the - * bits were already set, or zero if none of the bits were already set. - */ -int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err; - int set = 0; - -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - goto out; - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - goto out; - WARN_ON(state->end < start); - - /* - * | ---- desired range ---- | - * | state | or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip - * bits on second half. - * - * If the extent we found extends past our range, we - * just split and search again. It'll get split again - * the next time though. - * - * If the extent we found is inside our range, we clear - * the desired bit on it. - */ - - if (state->start < start) { - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, - wake, delete); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and clear the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - if (wake) - wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, bits, - wake, delete); - prealloc = NULL; - goto out; - } - - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, wake, delete); - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return set; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(clear_extent_bit); - -static int wait_on_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - DEFINE_WAIT(wait); - prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - read_unlock_irq(&tree->lock); - schedule(); - read_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - return 0; -} - -/* - * waits for one or more bits to clear on a range in the state tree. - * The range [start, end] is inclusive. - * The tree lock is taken by this function - */ -int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) -{ - struct extent_state *state; - struct rb_node *node; - - read_lock_irq(&tree->lock); -again: - while (1) { - /* - * this search will find all the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - break; - - state = rb_entry(node, struct extent_state, rb_node); - - if (state->start > end) - goto out; - - if (state->state & bits) { - start = state->start; - atomic_inc(&state->refs); - wait_on_state(tree, state); - free_extent_state(state); - goto again; - } - start = state->end + 1; - - if (start > end) - break; - - if (need_resched()) { - read_unlock_irq(&tree->lock); - cond_resched(); - read_lock_irq(&tree->lock); - } - } -out: - read_unlock_irq(&tree->lock); - return 0; -} -EXPORT_SYMBOL(wait_extent_bit); - -static void set_state_bits(struct extent_map_tree *tree, - struct extent_state *state, - int bits) -{ - if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - tree->dirty_bytes += range; - } - state->state |= bits; -} - -/* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. - * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. - * - * [start, end] is inclusive - * This takes the tree lock. - */ -int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err = 0; - int set; - u64 last_start; - u64 last_end; -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node) { - err = insert_state(tree, prealloc, start, end, bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - goto out; - } - - state = rb_entry(node, struct extent_state, rb_node); - last_start = state->start; - last_end = state->end; - - /* - * | ---- desired range ---- | - * | state | - * - * Just lock what we found and keep going - */ - if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { - *failed_start = state->start; - err = -EEXIST; - goto out; - } - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - goto search_again; - } - - /* - * | ---- desired range ---- | - * | state | - * or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip bits on - * second half. - * - * If the extent we found extends past our - * range, we just split and search again. It'll get split - * again the next time though. - * - * If the extent we found is inside our range, we set the - * desired bit on it. - */ - if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | or | state | - * - * There's a hole, we need to insert something in it and - * ignore the extent we found. - */ - if (state->start > start) { - u64 this_end; - if (end < last_start) - this_end = end; - else - this_end = last_start -1; - err = insert_state(tree, prealloc, start, this_end, - bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - if (err) - goto out; - start = this_end + 1; - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - set_state_bits(tree, prealloc, bits); - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return err; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(set_extent_bit); - -/* wrappers around set/clear extent bit */ -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_dirty); - -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_bits); - -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_bits); - -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_delalloc); - -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_dirty); - -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_new); - -int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_new); - -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_uptodate); - -int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_uptodate); - -int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} -EXPORT_SYMBOL(set_extent_writeback); - -int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); -} -EXPORT_SYMBOL(clear_extent_writeback); - -int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); -} -EXPORT_SYMBOL(wait_on_extent_writeback); - -/* - * locks a range in ascending order, waiting for any locked regions - * it hits on the way. [start,end] are inclusive, and this will sleep. - */ -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) -{ - int err; - u64 failed_start; - while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); - if (err == -EEXIST && (mask & __GFP_WAIT)) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else { - break; - } - WARN_ON(start > end); - } - return err; -} -EXPORT_SYMBOL(lock_extent); - -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); -} -EXPORT_SYMBOL(unlock_extent); - -/* - * helper function to set pages and extents in the tree dirty - */ -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - set_extent_dirty(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_dirty); - -/* - * helper function to set both pages and extents in the tree writeback - */ -int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - set_page_writeback(page); - page_cache_release(page); - index++; - } - set_extent_writeback(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_writeback); - -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - read_unlock_irq(&tree->lock); - return ret; -} -EXPORT_SYMBOL(find_first_extent_bit); - -u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 *start, u64 *end, u64 max_bytes) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 found = 0; - u64 total_bytes = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ -search_again: - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - *end = (u64)-1; - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (found && state->start != cur_start) { - goto out; - } - if (!(state->state & EXTENT_DELALLOC)) { - if (!found) - *end = state->end; - goto out; - } - if (!found) { - struct extent_state *prev_state; - struct rb_node *prev_node = node; - while(1) { - prev_node = rb_prev(prev_node); - if (!prev_node) - break; - prev_state = rb_entry(prev_node, - struct extent_state, - rb_node); - if (!(prev_state->state & EXTENT_DELALLOC)) - break; - state = prev_state; - node = prev_node; - } - } - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; - } - state->state |= EXTENT_LOCKED; - if (!found) - *start = state->start; - found++; - *end = state->end; - cur_start = state->end + 1; - node = rb_next(node); - if (!node) - break; - total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) - break; - } -out: - write_unlock_irq(&tree->lock); - return found; -} - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, u64 max_bytes, - unsigned long bits) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 total_bytes = 0; - int found = 0; - - if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); - WARN_ON(1); - return 0; - } - - write_lock_irq(&tree->lock); - if (cur_start == 0 && bits == EXTENT_DIRTY) { - total_bytes = tree->dirty_bytes; - goto out; - } - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > search_end) - break; - if (state->end >= cur_start && (state->state & bits)) { - total_bytes += min(search_end, state->end) + 1 - - max(cur_start, state->start); - if (total_bytes >= max_bytes) - break; - if (!found) { - *start = state->start; - found = 1; - } - } - node = rb_next(node); - if (!node) - break; - } -out: - write_unlock_irq(&tree->lock); - return total_bytes; -} -/* - * helper function to lock both pages and extents in the tree. - * pages must be locked first. - */ -int lock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - int err; - - while (index <= end_index) { - page = grab_cache_page(tree->mapping, index); - if (!page) { - err = -ENOMEM; - goto failed; - } - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed; - } - index++; - } - lock_extent(tree, start, end, GFP_NOFS); - return 0; - -failed: - /* - * we failed above in getting the page at 'index', so we undo here - * up to but not including the page at 'index' - */ - end_index = index; - index = start >> PAGE_CACHE_SHIFT; - while (index < end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - return err; -} -EXPORT_SYMBOL(lock_range); - -/* - * helper function to unlock both pages and extents in the tree. - */ -int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - unlock_extent(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(unlock_range); - -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - write_unlock_irq(&tree->lock); - return ret; -} - -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - *private = state->private; -out: - read_unlock_irq(&tree->lock); - return ret; -} - -/* - * searches a range in the state tree for a given mask. - * If 'filled' == 1, this returns 1 only if ever extent in the tree - * has the bits set. Otherwise, 1 is returned if any bit in the - * range is found set. - */ -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) -{ - struct extent_state *state = NULL; - struct rb_node *node; - int bitset = 0; - - read_lock_irq(&tree->lock); - node = tree_search(&tree->state, start); - while (node && start <= end) { - state = rb_entry(node, struct extent_state, rb_node); - - if (filled && state->start > start) { - bitset = 0; - break; - } - - if (state->start > end) - break; - - if (state->state & bits) { - bitset = 1; - if (!filled) - break; - } else if (filled) { - bitset = 0; - break; - } - start = state->end + 1; - if (start > end) - break; - node = rb_next(node); - if (!node) { - if (filled) - bitset = 0; - break; - } - } - read_unlock_irq(&tree->lock); - return bitset; -} -EXPORT_SYMBOL(test_range_bit); - -/* - * helper function to set a given page up to date if all the - * extents in the tree for that page are up to date - */ -static int check_page_uptodate(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) - SetPageUptodate(page); - return 0; -} - -/* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static int check_page_locked(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) - unlock_page(page); - return 0; -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static int check_page_writeback(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); - return 0; -} - -/* lots and lots of room for performance fixes in the end_bio funcs */ - -/* - * after a writepage IO is done, we need to: - * clear the uptodate bits on error - * clear the writeback bits in the extent tree for this IO - * end_page_writeback if the page has no more pending IO - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); - ClearPageUptodate(page); - SetPageError(page); - } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - - if (whole_page) - end_page_writeback(page); - else - check_page_writeback(tree, page); - if (tree->ops && tree->ops->writepage_end_io_hook) - tree->ops->writepage_end_io_hook(page, start, end); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * after a readpage IO is done, we need to: - * clear the uptodate bits on error - * set the uptodate bits if things worked - * set the page up to date if all extents in the tree are uptodate - * clear the lock bit in the extent tree - * unlock the page if there are no other extents locked for it - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - int ret; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end); - if (ret) - uptodate = 0; - } - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - if (whole_page) - SetPageUptodate(page); - else - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - if (whole_page) - unlock_page(page); - else - check_page_locked(tree, page); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * IO done from prepare_write is pretty simple, we just unlock - * the structs in the extent tree when done, and set the uptodate bits - * as appropriate. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) -{ - struct bio *bio; - - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_bdev = bdev; - bio->bi_sector = first_sector; - } - return bio; -} - -static int submit_one_bio(int rw, struct bio *bio) -{ - u64 maxsector; - int ret = 0; - - bio_get(bio); - - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; - if (maxsector < bio->bi_sector) { - printk("sector too large max %Lu got %llu\n", maxsector, - (unsigned long long)bio->bi_sector); - WARN_ON(1); - } - - submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - bio_put(bio); - return ret; -} - -static int submit_extent_page(int rw, struct extent_map_tree *tree, - struct page *page, sector_t sector, - size_t size, unsigned long offset, - struct block_device *bdev, - struct bio **bio_ret, - unsigned long max_pages, - bio_end_io_t end_io_func) -{ - int ret = 0; - struct bio *bio; - int nr; - - if (bio_ret && *bio_ret) { - bio = *bio_ret; - if (bio->bi_sector + (bio->bi_size >> 9) != sector || - bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio); - bio = NULL; - } else { - return 0; - } - } - nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } - bio_add_page(bio, page, size, offset); - bio->bi_end_io = end_io_func; - bio->bi_private = tree; - if (bio_ret) { - *bio_ret = bio; - } else { - ret = submit_one_bio(rw, bio); - } - - return ret; -} - -void set_page_extent_mapped(struct page *page) -{ - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); - page_cache_get(page); - } -} - -void set_page_extent_head(struct page *page, unsigned long len) -{ - set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); -} - -/* - * basic readpage implementation. Locked extent state structs are inserted - * into the tree that are removed when the IO is done (by the end_io - * handlers) - */ -static int __extent_read_full_page(struct extent_map_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio) -{ - struct inode *inode = page->mapping->host; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 cur_end; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t iosize; - size_t blocksize = inode->i_sb->s_blocksize; - - set_page_extent_mapped(page); - - end = page_end; - lock_extent(tree, start, end, GFP_NOFS); - - while (cur <= end) { - if (cur >= last_byte) { - char *userpage; - iosize = PAGE_CACHE_SIZE - page_offset; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - break; - } - em = get_extent(inode, page, page_offset, cur, end, 0); - if (IS_ERR(em) || !em) { - SetPageError(page); - unlock_extent(tree, cur, end, GFP_NOFS); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - - iosize = min(em->end - cur, end - cur) + 1; - cur_end = min(em->end, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - /* we've found a hole, just zero and go on */ - if (block_start == EXTENT_MAP_HOLE) { - char *userpage; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - ret = 0; - if (tree->ops && tree->ops->readpage_io_hook) { - ret = tree->ops->readpage_io_hook(page, cur, - cur + iosize - 1); - } - if (!ret) { - unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - nr -= page->index; - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, bio, nr, - end_bio_extent_readpage); - } - if (ret) - SetPageError(page); - cur = cur + iosize; - page_offset += iosize; - nr++; - } - if (!nr) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } - return 0; -} - -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) -{ - struct bio *bio = NULL; - int ret; - - ret = __extent_read_full_page(tree, page, get_extent, &bio); - if (bio) - submit_one_bio(READ, bio); - return ret; -} -EXPORT_SYMBOL(extent_read_full_page); - -/* - * the writepage semantics are similar to regular writepage. extent - * records are inserted to lock ranges in the tree, and as dirty areas - * are found, they are marked writeback. Then the lock bits are removed - * and the end_io handler clears the writeback ranges - */ -static int __extent_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct inode *inode = page->mapping->host; - struct extent_page_data *epd = data; - struct extent_map_tree *tree = epd->tree; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 delalloc_start; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 iosize; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t blocksize; - loff_t i_size = i_size_read(inode); - unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; - u64 nr_delalloc; - u64 delalloc_end; - - WARN_ON(!PageLocked(page)); - if (page->index > end_index) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; - } - - if (page->index == end_index) { - char *userpage; - - size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - } - - set_page_extent_mapped(page); - - delalloc_start = start; - delalloc_end = 0; - while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc == 0) { - delalloc_start = delalloc_end + 1; - continue; - } - tree->ops->fill_delalloc(inode, delalloc_start, - delalloc_end); - clear_extent_bit(tree, delalloc_start, - delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - delalloc_start = delalloc_end + 1; - } - lock_extent(tree, start, page_end, GFP_NOFS); - - end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } - - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - goto done; - } - - set_extent_uptodate(tree, start, page_end, GFP_NOFS); - blocksize = inode->i_sb->s_blocksize; - - while (cur <= end) { - if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - break; - } - em = epd->get_extent(inode, page, page_offset, cur, end, 1); - if (IS_ERR(em) || !em) { - SetPageError(page); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - iosize = min(em->end - cur, end - cur) + 1; - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - if (block_start == EXTENT_MAP_HOLE || - block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - /* leave this out until we have a page_mkwrite call */ - if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { - cur = cur + iosize; - page_offset += iosize; - continue; - } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - if (tree->ops && tree->ops->writepage_io_hook) { - ret = tree->ops->writepage_io_hook(page, cur, - cur + iosize - 1); - } else { - ret = 0; - } - if (ret) - SetPageError(page); - else { - unsigned long max_nr = end_index + 1; - set_range_writeback(tree, cur, cur + iosize - 1); - if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, - (unsigned long long)end); - } - - ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, - &epd->bio, max_nr, - end_bio_extent_writepage); - if (ret) - SetPageError(page); - } - cur = cur + iosize; - page_offset += iosize; - nr++; - } -done: - if (nr == 0) { - /* make sure the mapping tag for page dirty gets cleared */ - set_page_writeback(page); - end_page_writeback(page); - } - unlock_extent(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; -} - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - -/* Taken directly from 2.6.23 for 2.6.18 back port */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); - -/** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @writepage: function called for each page - * @data: data passed to writepage function - * - * If a page is already under I/O, write_cache_pages() skips it, even - * if it's dirty. This is desirable behaviour for memory-cleaning writeback, - * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() - * and msync() need to guarantee that all the data which was dirty at the time - * the call was made get new I/O started against them. If wbc->sync_mode is - * WB_SYNC_ALL then we were called for data integrity and we must wait for - * existing IO to complete. - */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) -{ - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - int done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - ret = (*writepage)(page, wbc, data); - - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; - } - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); - } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - return ret; -} -#endif - -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret; - struct address_space *mapping = page->mapping; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; - - - ret = __extent_writepage(page, wbc, &epd); - - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_write_full_page); - - -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret = 0; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_writepages); - -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent) -{ - struct bio *bio = NULL; - unsigned page_idx; - struct pagevec pvec; - - pagevec_init(&pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - - prefetchw(&page->flags); - list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); - __extent_read_full_page(tree, page, get_extent, &bio); - } - page_cache_release(page); - } - if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); - BUG_ON(!list_empty(pages)); - if (bio) - submit_one_bio(READ, bio); - return 0; -} -EXPORT_SYMBOL(extent_readpages); - -/* - * basic invalidatepage code, this waits on any locked or writeback - * ranges corresponding to the page, and then deletes any extent state - * records from the tree - */ -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset) -{ - u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); - u64 end = start + PAGE_CACHE_SIZE - 1; - size_t blocksize = page->mapping->host->i_sb->s_blocksize; - - start += (offset + blocksize -1) & ~(blocksize - 1); - if (start > end) - return 0; - - lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(extent_invalidatepage); - -/* - * simple commit_write call, set_range_dirty is used to mark both - * the pages and the extent records as dirty - */ -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - set_page_extent_mapped(page); - set_page_dirty(page); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} -EXPORT_SYMBOL(extent_commit_write); - -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent) -{ - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 block_start; - u64 orig_block_start; - u64 block_end; - u64 cur_end; - struct extent_map *em; - unsigned blocksize = 1 << inode->i_blkbits; - size_t page_offset = 0; - size_t block_off_start; - size_t block_off_end; - int err = 0; - int iocount = 0; - int ret = 0; - int isnew; - - set_page_extent_mapped(page); - - block_start = (page_start + from) & ~((u64)blocksize - 1); - block_end = (page_start + to - 1) | (blocksize - 1); - orig_block_start = block_start; - - lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { - em = get_extent(inode, page, page_offset, block_start, - block_end, 1); - if (IS_ERR(em) || !em) { - goto err; - } - cur_end = min(block_end, em->end); - block_off_start = block_start & (PAGE_CACHE_SIZE - 1); - block_off_end = block_off_start + blocksize; - isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); - - if (!PageUptodate(page) && isnew && - (block_off_end > to || block_off_start < from)) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_off_end > to) - memset(kaddr + to, 0, block_off_end - to); - if (block_off_start < from) - memset(kaddr + block_off_start, 0, - from - block_off_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && - !isnew && !PageUptodate(page) && - (block_off_end > to || block_off_start < from) && - !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { - u64 sector; - u64 extent_offset = block_start - em->start; - size_t iosize; - sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize) & - ~((u64)blocksize - 1); - /* - * we've already got the extent locked, but we - * need to split the state such that our end_bio - * handler can clear the lock. - */ - set_extent_bit(tree, block_start, - block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, em->bdev, - NULL, 1, - end_bio_extent_preparewrite); - iocount++; - block_start = block_start + iosize; - } else { - set_extent_uptodate(tree, block_start, cur_end, - GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); - block_start = cur_end + 1; - } - page_offset = block_start & (PAGE_CACHE_SIZE - 1); - free_extent_map(em); - } - if (iocount) { - wait_extent_bit(tree, orig_block_start, - block_end, EXTENT_LOCKED); - } - check_page_uptodate(tree, page); -err: - /* FIXME, zero out newly allocated blocks on error */ - return err; -} -EXPORT_SYMBOL(extent_prepare_write); - -/* - * a helper for releasepage. As long as there are no locked extents - * in the range corresponding to the page, both state records and extent - * map records are removed - */ -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) -{ - struct extent_map *em; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - u64 orig_start = start; - int ret = 1; - - while (start <= end) { - em = lookup_extent_mapping(tree, start, end); - if (!em || IS_ERR(em)) - break; - if (!test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { - remove_extent_mapping(tree, em); - /* once for the rb tree */ - free_extent_map(em); - } - start = em->end + 1; - /* once for us */ - free_extent_map(em); - } - if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) - ret = 0; - else - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return ret; -} -EXPORT_SYMBOL(try_release_extent_mapping); - -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent) -{ - struct inode *inode = mapping->host; - u64 start = iblock << inode->i_blkbits; - u64 end = start + (1 << inode->i_blkbits) - 1; - sector_t sector = 0; - struct extent_map *em; - - em = get_extent(inode, NULL, 0, start, end, 0); - if (!em || IS_ERR(em)) - return 0; - - if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == EXTENT_MAP_HOLE) - goto out; - - sector = (em->block_start + start - em->start) >> inode->i_blkbits; -out: - free_extent_map(em); - return sector; -} - -static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_map_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - -static inline struct page *extent_buffer_page(struct extent_buffer *eb, - unsigned long i) -{ - struct page *p; - struct address_space *mapping; - - if (i == 0) - return eb->first_page; - i += eb->start >> PAGE_CACHE_SHIFT; - mapping = eb->first_page->mapping; - read_lock_irq(&mapping->tree_lock); - p = radix_tree_lookup(&mapping->page_tree, i); - read_unlock_irq(&mapping->tree_lock); - return p; -} - -static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, - unsigned long len, - gfp_t mask) -{ - struct extent_buffer *eb = NULL; - - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); - - return eb; -} - -static void __free_extent_buffer(struct extent_buffer *eb) -{ - kmem_cache_free(extent_buffer_cache, eb); -} - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - if (page0) { - eb->first_page = page0; - i = 1; - index++; - page_cache_get(page0); - mark_page_accessed(page0); - set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); - set_page_extent_head(page0, len); - } else { - i = 0; - } - for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) { - WARN_ON(1); - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; - -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(alloc_extent_buffer); - -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); - if (!p) { - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(find_extent_buffer); - -void free_extent_buffer(struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - if (!eb) - return; - - if (!atomic_dec_and_test(&eb->refs)) - return; - - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); -} -EXPORT_SYMBOL(free_extent_buffer); - -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - int set; - unsigned long i; - unsigned long num_pages; - struct page *page; - - u64 start = eb->start; - u64 end = start + eb->len - 1; - - set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - lock_page(page); - if (i == 0) - set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); - - /* - * if we're on the last page or the first page and the - * block isn't aligned on a page boundary, do extra checks - * to make sure we don't clean page that is partially dirty - */ - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = (u64)page->index << PAGE_CACHE_SHIFT; - end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, - EXTENT_DIRTY, 0)) { - unlock_page(page); - continue; - } - } - clear_page_dirty_for_io(page); - write_lock_irq(&page->mapping->tree_lock); - if (!PageDirty(page)) { - radix_tree_tag_clear(&page->mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); - } - return 0; -} -EXPORT_SYMBOL(clear_extent_buffer_dirty); - -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - return wait_on_extent_writeback(tree, eb->start, - eb->start + eb->len - 1); -} -EXPORT_SYMBOL(wait_on_extent_buffer_writeback); - -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - /* writepage may need to do something special for the - * first page, we have to make sure page->private is - * properly set. releasepage may drop page->private - * on us if the page isn't already dirty. - */ - if (i == 0) { - lock_page(page); - set_page_extent_head(page, eb->len); - } else if (PagePrivate(page) && - page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); - set_page_extent_mapped(page); - unlock_page(page); - } - __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); - } - return set_extent_dirty(tree, eb->start, - eb->start + eb->len - 1, GFP_NOFS); -} -EXPORT_SYMBOL(set_extent_buffer_dirty); - -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - struct page *page; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - check_page_uptodate(tree, page); - continue; - } - SetPageUptodate(page); - } - return 0; -} -EXPORT_SYMBOL(set_extent_buffer_uptodate); - -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - if (eb->flags & EXTENT_UPTODATE) - return 1; - return test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); -} -EXPORT_SYMBOL(extent_buffer_uptodate); - -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, - u64 start, - int wait) -{ - unsigned long i; - unsigned long start_i; - struct page *page; - int err; - int ret = 0; - unsigned long num_pages; - - if (eb->flags & EXTENT_UPTODATE) - return 0; - - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { - return 0; - } - - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT); - } else { - start_i = 0; - } - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } - if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } - } else { - lock_page(page); - } - if (!PageUptodate(page)) { - err = page->mapping->a_ops->readpage(NULL, page); - if (err) { - ret = err; - } - } else { - unlock_page(page); - } - } - - if (ret || !wait) { - return ret; - } - - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - ret = -EIO; - } - } - if (!ret) - eb->flags |= EXTENT_UPTODATE; - return ret; -} -EXPORT_SYMBOL(read_extent_buffer_pages); - -void read_extent_buffer(struct extent_buffer *eb, void *dstv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *dst = (char *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); - - dst += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(read_extent_buffer); - -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - size_t offset = start & (PAGE_CACHE_SIZE - 1); - char *kaddr; - struct page *p; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len - 1) >> - PAGE_CACHE_SHIFT; - - if (i != end_i) - return -EINVAL; - - if (i == 0) { - offset = start_offset; - *map_start = 0; - } else { - offset = 0; - *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; - } - if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - - p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); - kaddr = kmap_atomic(p, km); - *token = kaddr; - *map = kaddr + offset; - *map_len = PAGE_CACHE_SIZE - offset; - return 0; -} -EXPORT_SYMBOL(map_private_extent_buffer); - -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} -EXPORT_SYMBOL(map_extent_buffer); - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} -EXPORT_SYMBOL(unmap_extent_buffer); - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *ptr = (char *)ptrv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - int ret = 0; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); - if (ret) - break; - - ptr += cur; - len -= cur; - offset = 0; - i++; - } - return ret; -} -EXPORT_SYMBOL(memcmp_extent_buffer); - -void write_extent_buffer(struct extent_buffer *eb, const void *srcv, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *src = (char *)srcv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); - - src += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(write_extent_buffer); - -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); - - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(memset_extent_buffer); - -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len) -{ - u64 dst_len = dst->len; - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - - WARN_ON(src->len != dst_len); - - offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(dst, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); - - src_offset += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(copy_extent_buffer); - -static void move_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - if (dst_page == src_page) { - memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); - } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *p = dst_kaddr + dst_off + len; - char *s = src_kaddr + src_off + len; - - while (len--) - *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); - } - kunmap_atomic(dst_kaddr, KM_USER0); -} - -static void copy_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *src_kaddr; - - if (dst_page != src_page) - src_kaddr = kmap_atomic(src_page, KM_USER1); - else - src_kaddr = dst_kaddr; - - memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); -} - -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - - while(len > 0) { - dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - - src_off_in_page)); - cur = min_t(unsigned long, cur, - (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - - copy_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page, src_off_in_page, cur); - - src_offset += cur; - dst_offset += cur; - len -= cur; - } -} -EXPORT_SYMBOL(memcpy_extent_buffer); - -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - unsigned long dst_end = dst_offset + len - 1; - unsigned long src_end = src_offset + len - 1; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset < src_offset) { - memcpy_extent_buffer(dst, dst_offset, src_offset, len); - return; - } - while(len > 0) { - dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - - dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - cur = min_t(unsigned long, len, src_off_in_page + 1); - cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page - cur + 1, - src_off_in_page - cur + 1, cur); - - dst_end -= cur; - src_end -= cur; - len -= cur; - } -} -EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ea60f5447b5..56314217cfc 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,215 +3,53 @@ #include +#define EXTENT_MAP_LAST_BYTE (u64)-4 #define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) -#define EXTENT_DEFRAG (1 << 6) -#define EXTENT_DEFRAG_DONE (1 << 7) -#define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - -/* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. - */ -#define EXTENT_PAGE_PRIVATE 1 -#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 - - -struct extent_map_ops { - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); - int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); -}; - -struct extent_map_tree { - struct rb_root map; - struct rb_root state; - struct address_space *mapping; - u64 dirty_bytes; - rwlock_t lock; - struct extent_map_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ struct extent_map { - u64 start; - u64 end; /* inclusive */ - int in_tree; struct rb_node rb_node; - /* block_start and block_end are in bytes */ + + /* all of these are in bytes */ + u64 start; + u64 len; u64 block_start; - u64 block_end; /* inclusive */ + unsigned long flags; struct block_device *bdev; atomic_t refs; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ -struct extent_state { - u64 start; - u64 end; /* inclusive */ int in_tree; - struct rb_node rb_node; - wait_queue_head_t wq; - atomic_t refs; - unsigned long state; - - /* for use by the FS */ - u64 private; - - struct list_head list; }; -struct extent_buffer { - u64 start; - unsigned long len; - char *map_token; - char *kaddr; - unsigned long map_start; - unsigned long map_len; - struct page *first_page; - struct list_head lru; - atomic_t refs; - int flags; +struct extent_map_tree { + struct rb_root map; + struct extent_map *last; + spinlock_t lock; }; -typedef struct extent_map *(get_extent_t)(struct inode *inode, - struct page *page, - size_t page_offset, - u64 start, u64 end, - int create); +static inline u64 extent_map_end(struct extent_map *em) +{ + if (em->start + em->len < em->start) + return (u64)-1; + return em->start + em->len; +} + +static inline u64 extent_map_block_end(struct extent_map *em) +{ + if (em->block_start + em->len < em->block_start) + return (u64)-1; + return em->block_start + em->len; +} -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask); -void extent_map_tree_empty_lru(struct extent_map_tree *tree); +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end); + u64 start, u64 len); int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); + struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent); int __init extent_map_init(void); void extent_map_exit(void); - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, - u64 max_bytes, unsigned long bits); - -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled); -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits); -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset); -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent); -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent); -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to); -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent); -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); -void set_page_extent_mapped(struct page *page); - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask); -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask); -void free_extent_buffer(struct extent_buffer *eb); -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, u64 start, int wait); - -static inline void extent_buffer_get(struct extent_buffer *eb) -{ - atomic_inc(&eb->refs); -} - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len); -void read_extent_buffer(struct extent_buffer *eb, void *dst, - unsigned long start, - unsigned long len); -void write_extent_buffer(struct extent_buffer *eb, const void *src, - unsigned long start, unsigned long len); -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len); -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len); -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb); -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1cd8c908811..c5bb00f9239 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 hint_byte; u64 num_bytes; u64 start_pos; @@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_pos = pos + write_bytes; u64 inline_size; loff_t isize = i_size_read(inode); - em = alloc_extent_map(GFP_NOFS); - if (!em) - return -ENOMEM; - - em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); num_bytes = (write_bytes + pos - start_pos + @@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; - lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); + lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); } - set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* FIXME...EIEIO, ENOSPC and more */ @@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, last_pos_in_file, + last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); } if (err) @@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end += PAGE_CACHE_SIZE - 1; if (start_pos < isize) { u64 delalloc_start = start_pos; - existing_delalloc = count_range_bits(em_tree, + existing_delalloc = count_range_bits(io_tree, &delalloc_start, end_of_last_block, (u64)-1, EXTENT_DELALLOC); } - set_extent_delalloc(em_tree, start_pos, end_of_last_block, + set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); root->fs_info->delalloc_bytes += (end_of_last_block + 1 - @@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, inline_size, pages, 0, num_pages); + btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); } if (end_pos > isize) { @@ -356,8 +353,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); - free_extent_map(em); + unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; while(1) { + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, end); - if (!em) + if (!em) { + spin_unlock(&em_tree->lock); break; + } remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + /* once for us */ free_extent_map(em); /* once for the tree*/ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67005480e13..16d3aef45d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; -static struct extent_map_ops btrfs_extent_map_ops; +static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; + u64 orig_start = start; + u64 orig_num_bytes; struct btrfs_key ins; int ret; @@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; @@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_drop_extent_cache(inode, orig_start, + orig_start + orig_num_bytes - 1); btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); @@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) int ret = 0; struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; @@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, csum); + set_state_private(io_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; u64 private; int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(em_tree, start, &private); + + ret = get_state_private(io_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { @@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, kunmap(page); } set_page_dirty(page); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); return ret; } @@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; @@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(em_tree, pos, block_end, GFP_NOFS); + lock_extent(io_tree, pos, block_end, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); @@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_ino, hole_start, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, hole_start, + hole_size - 1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, pos, block_end, GFP_NOFS); + unlock_extent(io_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); return 0; } @@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; @@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1816,7 +1828,7 @@ out_unlock: } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { int ret; @@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - int failed_insert = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; @@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); @@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, mutex_lock(&root->fs_info->fs_mutex); again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); + if (em) { if (em->start > start) { - printk("get_extent start %Lu em start %Lu\n", - start, em->start); + printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n", + start, len, em->start, em->len); WARN_ON(1); } goto out; } + em = alloc_extent_map(GFP_NOFS); if (!em) { - em = alloc_extent_map(GFP_NOFS); - if (!em) { - err = -ENOMEM; - goto out; - } - em->start = EXTENT_MAP_HOLE; - em->end = EXTENT_MAP_HOLE; + err = -ENOMEM; + goto out; } + + em->start = EXTENT_MAP_HOLE; + em->len = (u64)-1; em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); @@ -1893,28 +1907,25 @@ again: if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end - 1; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) { em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; goto insert; } bytenr += btrfs_file_extent_offset(leaf, item); em->block_start = bytenr; - em->block_end = em->block_start + - btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; @@ -1925,25 +1936,24 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = (extent_start + size - 1) | - ((u64)root->sectorsize - 1); + extent_end = (extent_start + size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; if (!page) { em->start = extent_start; - em->end = extent_start + size - 1; + em->len = size; goto out; } @@ -1952,8 +1962,7 @@ again: copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->end = (em->start + copy_size -1) | - ((u64)root->sectorsize -1); + em->len = copy_size; map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { @@ -1974,7 +1983,8 @@ again: btrfs_mark_buffer_dirty(leaf); } kunmap(page); - set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); + set_extent_uptodate(io_tree, em->start, + extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); @@ -1982,33 +1992,29 @@ again: } not_found: em->start = start; - em->end = end; + em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); - if (em->start > start || em->end < start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); + if (em->start > start || extent_map_end(em) <= start) { + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); err = -EIO; goto out; } + + err = 0; + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - if (0 && failed_insert == 1) { - btrfs_drop_extent_cache(inode, start, end); - } - failed_insert++; - if (failed_insert > 5) { - printk("failing to insert %Lu %Lu\n", start, end); + em = lookup_extent_mapping(em_tree, start, len); + if (!em) { err = -EIO; - goto out; + printk("failing to insert %Lu %Lu\n", start, len); } - goto again; } - err = 0; + spin_unlock(&em_tree->lock); out: btrfs_free_path(path); if (trans) { @@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) int btrfs_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; if (current->flags & PF_MEMALLOC) { @@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); return 0; } - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } @@ -2063,19 +2069,21 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); } @@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; unsigned long ra_index = 0; @@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) { page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, + count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) { existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; -static struct extent_map_ops btrfs_extent_map_ops = { +static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .writepage_io_hook = btrfs_writepage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8b52c69fda2..f8a1016600b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) goto free_transaction_sys; - err = extent_map_init(); + + err = extent_io_init(); if (err) goto free_cachep; + err = extent_map_init(); + if (err) + goto free_extent_io; + err = register_filesystem(&btrfs_fs_type); if (err) goto free_extent_map; @@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void) free_extent_map: extent_map_exit(); +free_extent_io: + extent_io_exit(); free_cachep: btrfs_destroy_cachep(); free_transaction_sys: @@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); + extent_io_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 163c01a2449..b4a1bc62a78 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root) INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); - extent_map_tree_init(&cur_trans->dirty_pages, + extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); } else { @@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int ret; int err; int werr = 0; - struct extent_map_tree *dirty_pages; + struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start; @@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree *pinned_copy; + struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; @@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (!pinned_copy) return -ENOMEM; - extent_map_tree_init(pinned_copy, + extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fd52e9b2392..c3172ddb332 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -29,7 +29,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct extent_map_tree dirty_pages; + struct extent_io_tree dirty_pages; unsigned long start_time; struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; -- cgit v1.2.3-70-g09d2 From 89ce8a63d0c761fbb02089850605360f389477d8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Add btrfs_end_transaction_throttle to force writers to wait for pending commits The existing throttle mechanism was often not sufficient to prevent new writers from coming in and making a given transaction run forever. This adds an explicit wait at the end of most operations so they will allow the current transaction to close. There is no wait inside file_write, inode updates, or cow filling, all which have different deadlock possibilities. This is a temporary measure until better asynchronous commit support is added. This code leads to stalls as it waits for data=ordered writeback, and it really needs to be fixed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 -------------- fs/btrfs/disk-io.h | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode.c | 24 ++++++------------ fs/btrfs/ioctl.c | 2 -- fs/btrfs/transaction.c | 66 ++++++++++++++++++++++++++++++++++---------------- fs/btrfs/transaction.h | 2 ++ 7 files changed, 55 insertions(+), 59 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3cc480b8381..52569b57692 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1672,24 +1672,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } -void btrfs_throttle(struct btrfs_root *root) -{ -#if 0 - struct backing_dev_info *bdi; - - bdi = &root->fs_info->bdi; - if (atomic_read(&root->fs_info->throttles) && - bdi_write_congested(bdi)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - - } -#endif -} - void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { /* diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2bc64fefe6e..deff6b4815a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -70,7 +70,6 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); -void btrfs_throttle(struct btrfs_root *root); int btrfs_open_device(struct btrfs_device *dev); int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b7f8f92daf8..ece221cba90 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -934,7 +934,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); cond_resched(); } out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cf27b598462..bbba3350d02 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -855,10 +855,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) btrfs_del_ordered_inode(inode, 1); } - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return ret; } @@ -889,10 +888,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) } nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); if (ret && !err) err = ret; @@ -1871,14 +1869,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -1936,14 +1933,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -1985,14 +1981,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -2055,13 +2050,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -2587,10 +2581,9 @@ static void btrfs_truncate(struct inode *inode) btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); } /* @@ -2912,14 +2905,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6002eb64daf..026039a2ac5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -164,7 +164,6 @@ fail: ret = err; fail_commit: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return ret; } @@ -206,7 +205,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail_unlock: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5a1ee0665ae..69ed5f85a38 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -130,8 +130,27 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return h; } -int btrfs_end_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static noinline int wait_for_commit(struct btrfs_root *root, + struct btrfs_transaction *commit) +{ + DEFINE_WAIT(wait); + mutex_lock(&root->fs_info->trans_mutex); + while(!commit->commit_done) { + prepare_to_wait(&commit->commit_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (commit->commit_done) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + mutex_unlock(&root->fs_info->trans_mutex); + finish_wait(&commit->commit_wait, &wait); + return 0; +} + +static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int throttle) { struct btrfs_transaction *cur_trans; @@ -140,8 +159,18 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); cur_trans->num_writers--; + if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); + + if (cur_trans->in_commit && throttle) { + int ret; + mutex_unlock(&root->fs_info->trans_mutex); + ret = wait_for_commit(root, cur_trans); + BUG_ON(ret); + mutex_lock(&root->fs_info->trans_mutex); + } + put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); @@ -149,6 +178,18 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return __btrfs_end_transaction(trans, root, 0); +} + +int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return __btrfs_end_transaction(trans, root, 1); +} + int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -240,25 +281,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -static noinline int wait_for_commit(struct btrfs_root *root, - struct btrfs_transaction *commit) -{ - DEFINE_WAIT(wait); - mutex_lock(&root->fs_info->trans_mutex); - while(!commit->commit_done) { - prepare_to_wait(&commit->commit_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (commit->commit_done) - break; - mutex_unlock(&root->fs_info->trans_mutex); - schedule(); - mutex_lock(&root->fs_info->trans_mutex); - } - mutex_unlock(&root->fs_info->trans_mutex); - finish_wait(&commit->commit_wait, &wait); - return 0; -} - struct dirty_root { struct list_head list; struct btrfs_root *root; @@ -680,6 +702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; +printk("trans %Lu in commit\n", trans->transid); cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, @@ -760,6 +783,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kfree(pinned_copy); cur_trans->commit_done = 1; +printk("trans %Lu done in commit\n", cur_trans->transid); root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c3172ddb332..52559b51b18 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -101,4 +101,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From a74a4b97b61beede185b4b3ad359d7d378b0d312 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Replace the transaction work queue with kthreads This creates one kthread for commits and one kthread for deleting old snapshots. All the work queues are removed. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++- fs/btrfs/ctree.h | 13 ++---- fs/btrfs/disk-io.c | 116 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/extent-tree.c | 10 ++--- fs/btrfs/super.c | 16 +++---- fs/btrfs/transaction.c | 72 +----------------------------- fs/btrfs/transaction.h | 10 ----- fs/btrfs/volumes.c | 12 +++-- 8 files changed, 136 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5edbcc09b3c..40f0e0cb804 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1352,6 +1352,8 @@ again: free_extent_buffer(tmp); goto again; } else { + if (tmp) + free_extent_buffer(tmp); b = read_node_slot(root, b, slot); } } @@ -3048,7 +3050,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; - path->locks[level] = 1; + if (!path->skip_locking) + path->locks[level] = 1; if (!level) break; if (level == 1 && path->locks[1] && path->reada) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9bbb53eda6..244fe86bcc5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -519,15 +518,14 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex transaction_kthread_mutex; + struct mutex cleaner_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; - struct list_head end_io_work_list; - struct work_struct end_io_work; - spinlock_t end_io_work_lock; atomic_t nr_async_submits; /* @@ -543,13 +541,10 @@ struct btrfs_fs_info { struct btrfs_workers workers; struct btrfs_workers endio_workers; struct btrfs_workers submit_workers; + struct task_struct *transaction_kthread; + struct task_struct *cleaner_kthread; int thread_pool_size; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct work_struct trans_work; -#else - struct delayed_work trans_work; -#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 52569b57692..31ca9f89388 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -24,6 +25,12 @@ #include #include // for block_sync_page #include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +# include +#else +# include +#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -1100,6 +1107,87 @@ static void end_workqueue_fn(struct btrfs_work *work) #endif } +static int cleaner_kthread(void *arg) +{ + struct btrfs_root *root = arg; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->cleaner_mutex); +printk("cleaner awake\n"); + btrfs_clean_old_snapshots(root); +printk("cleaner done\n"); + mutex_unlock(&root->fs_info->cleaner_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + smp_mb(); + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + +static int transaction_kthread(void *arg) +{ + struct btrfs_root *root = arg; + struct btrfs_trans_handle *trans; + struct btrfs_transaction *cur; + unsigned long now; + unsigned long delay; + int ret; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + delay = HZ * 30; + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto sleep; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto sleep; + } + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +sleep: + wake_up_process(root->fs_info->cleaner_kthread); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1189,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -#else - INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); -#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -1204,6 +1287,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); + mutex_init(&fs_info->transaction_kthread_mutex); + mutex_init(&fs_info->cleaner_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1247,7 +1332,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,9 +1425,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, + "btrfs-cleaner"); + if (!fs_info->cleaner_kthread) + goto fail_extent_root; + + fs_info->transaction_kthread = kthread_run(transaction_kthread, + tree_root, + "btrfs-transaction"); + if (!fs_info->transaction_kthread) + goto fail_trans_kthread; + return tree_root; +fail_trans_kthread: + kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1562,8 +1659,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); - btrfs_transaction_flush_work(root); + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + btrfs_defrag_dirty_roots(root->fs_info); + btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -1574,8 +1674,6 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); - btrfs_transaction_flush_work(root); - if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6274f30031d..89cc4f61186 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1216,15 +1216,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); space_info->full = 1; - goto out; + goto out_unlock; } BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); -out: +out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); +out: return 0; } @@ -2274,7 +2275,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); mutex_unlock(&root->fs_info->alloc_mutex); - reada_walk_down(root, cur, path->slots[*level]); + if (path->slots[*level] == 0) + reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); @@ -2446,8 +2448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - ret = -EAGAIN; - break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b61ded7a20c..726d6871fa1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -340,7 +340,6 @@ static int btrfs_fill_super(struct super_block * sb, goto fail_close; sb->s_root = root_dentry; - btrfs_transaction_queue_work(tree_root, HZ * 30); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) save_mount_options(sb, data); @@ -416,9 +415,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_free_subvol_name; bdev = fs_devices->latest_bdev; - btrfs_lock_volumes(); s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); - btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -530,13 +527,15 @@ out: static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_flush_work(root); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + mutex_lock(&root->fs_info->cleaner_mutex); } static void btrfs_unlockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_queue_work(root, HZ * 30); + mutex_unlock(&root->fs_info->cleaner_mutex); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); } static struct super_operations btrfs_super_ops = { @@ -589,10 +588,9 @@ static int __init init_btrfs_fs(void) if (err) return err; - btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - goto free_transaction_sys; + goto free_sysfs; err = extent_io_init(); if (err) @@ -618,15 +616,13 @@ free_extent_io: extent_io_exit(); free_cachep: btrfs_destroy_cachep(); -free_transaction_sys: - btrfs_exit_transaction_sys(); +free_sysfs: btrfs_exit_sysfs(); return err; } static void __exit exit_btrfs_fs(void) { - btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 69ed5f85a38..0c53ff775b9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,8 +29,6 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; -static struct workqueue_struct *trans_wq; - #define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_DEFRAG_TAG 1 @@ -807,81 +805,15 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; INIT_LIST_HEAD(&dirty_roots); - +again: mutex_lock(&root->fs_info->trans_mutex); list_splice_init(&root->fs_info->dead_roots, &dirty_roots); mutex_unlock(&root->fs_info->trans_mutex); if (!list_empty(&dirty_roots)) { drop_dirty_roots(root, &dirty_roots); + goto again; } return 0; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p) -#else -void btrfs_transaction_cleaner(struct work_struct *work) -#endif -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - trans_work.work); - -#endif - struct btrfs_root *root = fs_info->tree_root; - struct btrfs_transaction *cur; - struct btrfs_trans_handle *trans; - unsigned long now; - unsigned long delay = HZ * 30; - int ret; - - smp_mb(); - if (root->fs_info->closing) - goto out; - - mutex_lock(&root->fs_info->trans_mutex); - cur = root->fs_info->running_transaction; - if (!cur) { - mutex_unlock(&root->fs_info->trans_mutex); - goto out; - } - now = get_seconds(); - if (now < cur->start_time || now - cur->start_time < 30) { - mutex_unlock(&root->fs_info->trans_mutex); - delay = HZ * 5; - goto out; - } - mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_commit_transaction(trans, root); -out: - btrfs_clean_old_snapshots(root); - btrfs_transaction_queue_work(root, delay); -} - -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) -{ - if (!root->fs_info->closing) - queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); -} - -void btrfs_transaction_flush_work(struct btrfs_root *root) -{ - cancel_delayed_work(&root->fs_info->trans_work); - flush_workqueue(trans_wq); -} - -void __init btrfs_init_transaction_sys(void) -{ - trans_wq = create_workqueue("btrfs-transaction"); -} - -void btrfs_exit_transaction_sys(void) -{ - destroy_workqueue(trans_wq); -} diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 52559b51b18..e1e5a06b65f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -82,16 +82,6 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p); -#else -void btrfs_transaction_cleaner(struct work_struct *work); -#endif - -void btrfs_transaction_flush_work(struct btrfs_root *root); -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); -void btrfs_init_transaction_sys(void); -void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 869864ddcc2..4e7cee27aab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -271,13 +271,17 @@ again: list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->in_fs_metadata) { - if (device->bdev) { - close_bdev_excl(device->bdev); - fs_devices->open_devices--; - } + struct block_device *bdev; list_del(&device->dev_list); list_del(&device->dev_alloc_list); fs_devices->num_devices--; + if (device->bdev) { + bdev = device->bdev; + fs_devices->open_devices--; + mutex_unlock(&uuid_mutex); + close_bdev_excl(bdev); + mutex_lock(&uuid_mutex); + } kfree(device->name); kfree(device); goto again; -- cgit v1.2.3-70-g09d2 From 3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Online btree defragmentation fixes The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 61 +----------------- fs/btrfs/disk-io.h | 6 -- fs/btrfs/extent-tree.c | 2 - fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 35 +--------- fs/btrfs/transaction.h | 1 - fs/btrfs/tree-defrag.c | 36 ++++++----- 9 files changed, 190 insertions(+), 129 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7f4cc2b88d0..0cb80f32a9c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,10 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - int keep = p->keep_locks; - int skip = p->skip_locking; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + p->slots[i] = 0; if (!p->nodes[i]) continue; if (p->locks[i]) { @@ -74,10 +73,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); + p->nodes[i] = NULL; } - memset(p, 0, sizeof(*p)); - p->keep_locks = keep; - p->skip_locking = skip; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = cur->start; last_block = cur->start; *last_ret = search_start; - if (parent_level == 1) - btrfs_clear_buffer_defrag(cur); btrfs_tree_unlock(cur); free_extent_buffer(cur); } @@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) return 1; } +/* + * A helper function to walk down the tree starting at min_key, and looking + * for nodes or leaves that are either in cache or have a minimum + * transaction id. This is used by the btree defrag code, but could + * also be used to search for blocks that have changed since a given + * transaction id. + * + * This does not cow, but it does stuff the starting key it finds back + * into min_key, so you can call btrfs_search_slot with cow=1 on the + * key and get a writable path. + * + * This does lock as it descends, and path->keep_locks should be set + * to 1 by the caller. + * + * This honors path->lowest_level to prevent descent past a given level + * of the tree. + * + * returns zero if something useful was found, < 0 on error and 1 if there + * was nothing in the tree that matched the search criteria. + */ +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans) +{ + struct extent_buffer *cur; + struct btrfs_key found_key; + int slot; + u32 nritems; + int level; + int ret = 1; + +again: + cur = btrfs_lock_root_node(root); + level = btrfs_header_level(cur); + path->nodes[level] = cur; + path->locks[level] = 1; + + if (btrfs_header_generation(cur) < min_trans) { + ret = 1; + goto out; + } + while(1) { + nritems = btrfs_header_nritems(cur); + level = btrfs_header_level(cur); + bin_search(cur, min_key, level, &slot); + + /* at level = 0, we're done, setup the path and exit */ + if (level == 0) { + ret = 0; + path->slots[level] = slot; + btrfs_item_key_to_cpu(cur, &found_key, slot); + goto out; + } + /* + * check this node pointer against the cache_only and + * min_trans parameters. If it isn't in cache or is too + * old, skip to the next one. + */ + while(slot < nritems) { + u64 blockptr; + u64 gen; + struct extent_buffer *tmp; + blockptr = btrfs_node_blockptr(cur, slot); + gen = btrfs_node_ptr_generation(cur, slot); + if (gen < min_trans) { + slot++; + continue; + } + if (!cache_only) + break; + + tmp = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + free_extent_buffer(tmp); + break; + } + if (tmp) + free_extent_buffer(tmp); + slot++; + } + /* + * we didn't find a candidate key in this node, walk forward + * and find another one + */ + if (slot >= nritems) { + ret = btrfs_find_next_key(root, path, min_key, level, + cache_only, min_trans); + if (ret == 0) { + btrfs_release_path(root, path); + goto again; + } else { + goto out; + } + } + /* save our key for returning back */ + btrfs_node_key_to_cpu(cur, &found_key, slot); + path->slots[level] = slot; + if (level == path->lowest_level) { + ret = 0; + unlock_up(path, level, 1); + goto out; + } + cur = read_node_slot(root, cur, slot); + + btrfs_tree_lock(cur); + path->locks[level - 1] = 1; + path->nodes[level - 1] = cur; + unlock_up(path, level, 1); + } +out: + if (ret == 0) + memcpy(min_key, &found_key, sizeof(found_key)); + return ret; +} + +/* + * this is similar to btrfs_next_leaf, but does not try to preserve + * and fixup the path. It looks for and returns the next key in the + * tree based on the current path and the cache_only and min_trans + * parameters. + * + * 0 is returned if another key is found, < 0 if there are any errors + * and 1 is returned if there are no higher keys in the tree + * + * path->keep_locks should be set to 1 on the search made before + * calling this function. + */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level) + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans) { int level = lowest_level; int slot; @@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, slot = path->slots[level] + 1; c = path->nodes[level]; +next: if (slot >= btrfs_header_nritems(c)) { level++; if (level == BTRFS_MAX_LEVEL) { @@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, } if (level == 0) btrfs_item_key_to_cpu(c, key, slot); - else + else { + u64 blockptr = btrfs_node_blockptr(c, slot); + u64 gen = btrfs_node_ptr_generation(c, slot); + + if (cache_only) { + struct extent_buffer *cur; + cur = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + if (!cur || !btrfs_buffer_uptodate(cur, gen)) { + slot++; + if (cur) + free_extent_buffer(cur); + goto next; + } + free_extent_buffer(cur); + } + if (gen < min_trans) { + slot++; + goto next; + } btrfs_node_key_to_cpu(c, key, slot); + } return 0; } return 1; @@ -3095,6 +3241,12 @@ done: return 0; } +/* + * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps + * searching until it gets past min_objectid or finds an item of 'type' + * + * returns 0 if something is found, 1 if nothing was found and < 0 on error + */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca8e6f15859..a28796482b4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -609,6 +609,7 @@ struct btrfs_root { u64 last_inode_alloc; int ref_cows; int track_dirty; + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; @@ -1412,7 +1413,11 @@ int btrfs_previous_item(struct btrfs_root *root, struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level); + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans); +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 31ca9f89388..4cdc0b6a267 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { ret = -EIO; @@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; @@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg) goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sleep: @@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root, "btrfs-transaction"); if (!fs_info->transaction_kthread) - goto fail_trans_kthread; + goto fail_cleaner; return tree_root; -fail_trans_kthread: +fail_cleaner: kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); @@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root) kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); - btrfs_defrag_dirty_roots(root->fs_info); btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) return; } -void btrfs_set_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); -} - -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, - GFP_NOFS); -} - -int btrfs_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); -} - -int btrfs_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, 0); -} - -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, GFP_NOFS); -} - -int btrfs_clear_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG, GFP_NOFS); -} - int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index deff6b4815a..353c3c50c95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,12 +61,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); -void btrfs_set_buffer_defrag(struct extent_buffer *buf); -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_buffer_defrag(struct extent_buffer *buf); -int btrfs_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc3c03c6612..5e0857ffbc3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2095,8 +2095,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - if (!btrfs_test_opt(root, SSD)) - btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 726d6871fa1..5e28cf5c2e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -365,7 +365,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8e909cb97c6..98f422d9ab0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -30,7 +30,6 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -#define BTRFS_ROOT_DEFRAG_TAG 1 static noinline void put_transaction(struct btrfs_transaction *transaction) { @@ -92,9 +91,6 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); root->commit_root = btrfs_root_node(root); } else { WARN_ON(1); @@ -403,44 +399,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) cond_resched(); trans = btrfs_start_transaction(root, 1); - if (ret != -EAGAIN) + if (root->fs_info->closing || ret != -EAGAIN) break; } root->defrag_running = 0; smp_mb(); - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); btrfs_end_transaction(trans, root); return 0; } -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) -{ - struct btrfs_root *gang[1]; - struct btrfs_root *root; - int i; - int ret; - int err = 0; - u64 last = 0; - - while(1) { - ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, - (void **)gang, last, - ARRAY_SIZE(gang), - BTRFS_ROOT_DEFRAG_TAG); - if (ret == 0) - break; - for (i = 0; i < ret; i++) { - root = gang[i]; - last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); - } - } - btrfs_defrag_root(info->extent_root, 1); - return err; -} - static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e1e5a06b65f..9ccd5a5b170 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -84,7 +84,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b17693f61fb..cc2650b0695 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -32,10 +32,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int wret; int level; int orig_level; - int i; int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; + u64 min_trans = 0; + + if (cache_only) + goto out; if (root->fs_info->extent_root == root) { /* @@ -43,10 +46,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, * we can't defrag the extent root without deadlock */ goto out; -#if 0 - mutex_lock(&root->fs_info->alloc_mutex); - is_extent = 1; -#endif } if (root->ref_cows == 0 && !is_extent) @@ -84,6 +83,17 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, path->lowest_level = 1; path->keep_locks = 1; + if (cache_only) + min_trans = root->defrag_trans_start; + + ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + btrfs_release_path(root, path); wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { @@ -95,7 +105,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1); + next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, + min_trans); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, cache_only, &last_ret, @@ -106,19 +117,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = -EAGAIN; } - for (i = 1; i < BTRFS_MAX_LEVEL; i++) { - if (path->locks[i]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; - } - if (path->nodes[i]) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } - } + btrfs_release_path(root, path); if (is_extent) btrfs_extent_post_op(trans, root); - out: if (is_extent) mutex_unlock(&root->fs_info->alloc_mutex); @@ -138,6 +139,7 @@ done: if (ret != -EAGAIN) { memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_trans_start = trans->transid; } return ret; } -- cgit v1.2.3-70-g09d2 From e6dcd2dc9c489108648e2ed543315dd134d50a9a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:50 -0400 Subject: Btrfs: New data=ordered implementation The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 4 +- fs/btrfs/ctree.h | 19 +- fs/btrfs/disk-io.c | 13 +- fs/btrfs/extent-tree.c | 132 +++++++++----- fs/btrfs/extent_io.c | 52 +++++- fs/btrfs/extent_io.h | 14 +- fs/btrfs/extent_map.c | 5 +- fs/btrfs/file-item.c | 62 ++++--- fs/btrfs/file.c | 67 ++++--- fs/btrfs/inode.c | 447 ++++++++++++++++++++++++++++++----------------- fs/btrfs/ordered-data.c | 455 ++++++++++++++++++++++++++++++------------------ fs/btrfs/ordered-data.h | 71 ++++++-- fs/btrfs/transaction.c | 67 +------ fs/btrfs/transaction.h | 4 - 14 files changed, 910 insertions(+), 502 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 40b4e0c9cd0..8d03687510e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -21,6 +21,7 @@ #include "extent_map.h" #include "extent_io.h" +#include "ordered-data.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -32,9 +33,8 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct inode vfs_inode; - atomic_t ordered_writeback; + struct btrfs_ordered_inode_tree ordered_tree; - u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f3783dbd9b6..ceebc052ddc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "bit-radix.h" #include "extent_io.h" @@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; +struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" @@ -510,6 +512,7 @@ struct btrfs_fs_info { u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; + wait_queue_head_t transaction_throttle; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -541,6 +544,7 @@ struct btrfs_fs_info { */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; @@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root @@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, u64 bytenr, int mod); int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums); + struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret); + struct bio *bio, struct btrfs_ordered_sum **sums_ret); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b01b3f4f92a..4a5ebafb935 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio, end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); + if (bio->bi_rw & (1 << BIO_RW)) + btrfs_queue_worker(&fs_info->endio_write_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + init_waitqueue_head(&fs_info->transaction_throttle); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { @@ -1447,6 +1456,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); @@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ebfa6be079..343d1101c31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1895,36 +1895,17 @@ error: return ret; } -/* - * finds a free extent and does all the dirty work required for allocation - * returns the key for the extent through ins, and a tree buffer for - * the first block of the extent through buf. - * - * returns 0 if everything worked, non-zero otherwise. - */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, u64 data) +static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) { int ret; - int pending_ret; - u64 super_used; - u64 root_used; u64 search_start = 0; u64 alloc_profile; - u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_root *extent_root = info->extent_root; - struct btrfs_extent_item *extent_item; - struct btrfs_extent_ref *ref; - struct btrfs_path *path; - struct btrfs_key keys[2]; - - maybe_lock_mutex(root); if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -1974,11 +1955,48 @@ again: } if (ret) { printk("allocation failed flags %Lu\n", data); - } - if (ret) { BUG(); - goto out; } + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + return 0; +} + +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, + empty_size, hint_byte, search_end, ins, + data); + maybe_unlock_mutex(root); + return ret; +} + +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + int pending_ret; + u64 super_used; + u64 root_used; + u64 num_bytes = ins->offset; + u32 sizes[2]; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct btrfs_extent_item *extent_item; + struct btrfs_extent_ref *ref; + struct btrfs_path *path; + struct btrfs_key keys[2]; /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); @@ -1990,10 +2008,6 @@ again: root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used + num_bytes); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); - if (root == extent_root) { set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, @@ -2001,10 +2015,6 @@ again: goto update_block; } - WARN_ON(trans->alloc_exclude_nr); - trans->alloc_exclude_start = ins->objectid; - trans->alloc_exclude_nr = ins->offset; - memcpy(&keys[0], ins, sizeof(*ins)); keys[1].offset = hash_extent_ref(root_objectid, ref_generation, owner, owner_offset); @@ -2054,6 +2064,51 @@ update_block: BUG(); } out: + return ret; +} + +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} +/* + * finds a free extent and does all the dirty work required for allocation + * returns the key for the extent through ins, and a tree buffer for + * the first block of the extent through buf. + * + * returns 0 if everything worked, non-zero otherwise. + */ +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, u64 data) +{ + int ret; + + maybe_lock_mutex(root); + + ret = __btrfs_reserve_extent(trans, root, num_bytes, + min_alloc_size, empty_size, hint_byte, + search_end, ins, data); + BUG_ON(ret); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + BUG_ON(ret); + maybe_unlock_mutex(root); return ret; } @@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ - ret = drop_snap_lookup_refcount(root, bytenr, - blocksize, &refs); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, + &refs); BUG_ON(ret); if (refs != 1) { parent = path->nodes[*level]; @@ -2584,7 +2639,6 @@ out_unlock: kfree(ra); trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (trans) { - btrfs_add_ordered_inode(inode); btrfs_end_transaction(trans, BTRFS_I(inode)->root); mark_inode_dirty(inode); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 40a5f53cb04..3f82a6e9ca4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_ordered); + int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { @@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); + EXTENT_DELALLOC | EXTENT_DIRTY, + 0, NULL, mask); } EXPORT_SYMBOL(set_extent_delalloc); @@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(clear_extent_dirty); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_ordered); + int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { ret = tree->ops->writepage_end_io_hook(page, start, - end, state); + end, state, uptodate); if (ret) uptodate = 0; } @@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unlock_extent(tree, cur, end, GFP_NOFS); break; } - extent_offset = cur - em->start; + if (extent_map_end(em) <= cur) { +printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); + } BUG_ON(extent_map_end(em) <= cur); + if (end < cur) { +printk("2bad mapping end %Lu cur %Lu\n", end, cur); + } BUG_ON(end < cur); iosize = min(extent_map_end(em) - cur, end - cur + 1); @@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 last_byte = i_size_read(inode); u64 block_start; u64 iosize; + u64 unlock_start; sector_t sector; struct extent_map *em; struct block_device *bdev; @@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 nr_delalloc; u64 delalloc_end; - WARN_ON(!PageLocked(page)); page_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || @@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; } lock_extent(tree, start, page_end, GFP_NOFS); + unlock_start = start; end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { @@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_extent(tree, start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, + page_end, NULL, 1); + unlock_start = page_end + 1; goto done; } @@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, while (cur <= end) { if (cur >= last_byte) { clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + page_end, NULL, 1); + unlock_start = page_end + 1; break; } em = epd->get_extent(inode, page, page_offset, cur, @@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + + unlock_extent(tree, unlock_start, cur + iosize -1, + GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + cur + iosize - 1, + NULL, 1); cur = cur + iosize; page_offset += iosize; + unlock_start = cur; continue; } @@ -2119,7 +2156,8 @@ done: set_page_writeback(page); end_page_writeback(page); } - unlock_extent(tree, start, page_end, GFP_NOFS); + if (unlock_start <= page_end) + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f1960dafaa1..2268a799589 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,6 +13,8 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_ORDERED (1 << 9) +#define EXTENT_ORDERED_METADATA (1 << 10) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* @@ -42,7 +44,7 @@ struct extent_io_ops { int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state); + struct extent_state *state, int uptodate); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, @@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int filled); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, @@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits); struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, @@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, struct extent_buffer *eb); +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end); +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); int clear_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_io_tree *tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f5a04eb9a2a..81123277c2b 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *merge = NULL; struct rb_node *rb; + BUG_ON(spin_trylock(&tree->lock)); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; + free_extent_map(merge); goto out; } atomic_inc(&em->refs); @@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); + BUG_ON(spin_trylock(&tree->lock)); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; @@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; + BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; if (tree->last == em) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f537eb43c2c..345caf8ff51 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, } int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret) + struct bio *bio, struct btrfs_ordered_sum **sums_ret) { - u32 *sums; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; char *data; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; - sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); + WARN_ON(bio->bi_vcnt <= 0); + sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); if (!sums) return -ENOMEM; - *sums_ret = (char *)sums; + *sums_ret = sums; + sector_sum = &sums->sums; + sums->file_offset = page_offset(bvec->bv_page); + sums->len = bio->bi_size; + INIT_LIST_HEAD(&sums->list); while(bio_index < bio->bi_vcnt) { data = kmap_atomic(bvec->bv_page, KM_USER0); - *sums = ~(u32)0; - *sums = btrfs_csum_data(root, data + bvec->bv_offset, - *sums, bvec->bv_len); + sector_sum->sum = ~(u32)0; + sector_sum->sum = btrfs_csum_data(root, + data + bvec->bv_offset, + sector_sum->sum, + bvec->bv_len); kunmap_atomic(data, KM_USER0); - btrfs_csum_final(*sums, (char *)sums); - sums++; + btrfs_csum_final(sector_sum->sum, + (char *)§or_sum->sum); + sector_sum->offset = page_offset(bvec->bv_page) + + bvec->bv_offset; + sector_sum++; bio_index++; bvec++; } @@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums) + struct btrfs_ordered_sum *sums) { u64 objectid = inode->i_ino; u64 offset; @@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; u64 next_offset; + u64 total_bytes = 0; int found_next; struct btrfs_path *path; struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 csum_offset; - u32 *sums32 = (u32 *)sums; + struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - int bio_index = 0; - struct bio_vec *bvec = bio->bi_io_vec; char *eb_map; char *eb_token; unsigned long map_len; @@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + sector_sum = &sums->sums; again: next_offset = (u64)-1; found_next = 0; - offset = page_offset(bvec->bv_page) + bvec->bv_offset; + offset = sector_sum->offset; file_key.objectid = objectid; file_key.offset = offset; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); @@ -303,7 +314,7 @@ found: item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; -next_bvec: +next_sector: if (!eb_token || (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { @@ -321,21 +332,20 @@ next_bvec: } if (eb_token) { memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - sums32, BTRFS_CRC32_SIZE); + §or_sum->sum, BTRFS_CRC32_SIZE); } else { - write_extent_buffer(leaf, sums32, (unsigned long)item, - BTRFS_CRC32_SIZE); + write_extent_buffer(leaf, §or_sum->sum, + (unsigned long)item, BTRFS_CRC32_SIZE); } - bio_index++; - bvec++; - sums32++; - if (bio_index < bio->bi_vcnt) { + total_bytes += root->sectorsize; + sector_sum++; + if (total_bytes < sums->len) { item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); if (item < item_end && offset + PAGE_CACHE_SIZE == - page_offset(bvec->bv_page)) { - offset = page_offset(bvec->bv_page); - goto next_bvec; + sector_sum->offset) { + offset = sector_sum->offset; + goto next_sector; } } if (eb_token) { @@ -343,7 +353,7 @@ next_bvec: eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); - if (bio_index < bio->bi_vcnt) { + if (total_bytes < sums->len) { btrfs_release_path(root, path); goto again; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8037792f878..12e765f7e0d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,7 +34,6 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" #include "compat.h" @@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - if (last_pos_in_file < start_pos) { + if (hole_size > 0) { + btrfs_wait_ordered_range(inode, last_pos_in_file, + last_pos_in_file + hole_size); err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, @@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size > root->fs_info->max_inline || (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { - u64 last_end; - + /* check for reserved extents on each page, we don't want + * to reset the delalloc bit on things that already have + * extents reserved. + */ + set_extent_delalloc(io_tree, start_pos, + end_of_last_block, GFP_NOFS); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); set_page_dirty(p); } - last_end = (u64)(pages[num_pages -1]->index) << - PAGE_CACHE_SHIFT; - last_end += PAGE_CACHE_SIZE - 1; - set_extent_delalloc(io_tree, start_pos, end_of_last_block, - GFP_NOFS); - btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map *split = NULL; struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *tmp; u64 len = end - start + 1; + u64 next_start; int ret; int testend = 1; + WARN_ON(end < start); if (end == (u64)-1) { len = (u64)-1; testend = 0; @@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } + tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); + next_start = tmp->start; remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && @@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; + u64 last_pos; start_pos = pos & ~((u64)root->sectorsize - 1); + last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; memset(pages, 0, num_pages * sizeof(struct page *)); - +again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { err = -ENOMEM; BUG_ON(1); } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(pages[i]); - set_page_extent_mapped(pages[i]); - WARN_ON(!PageLocked(pages[i])); } if (start_pos < inode->i_size) { - u64 last_pos; - last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; + struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); + if (ordered && + ordered->file_offset + ordered->len > start_pos && + ordered->file_offset < last_pos) { + btrfs_put_ordered_extent(ordered); + unlock_extent(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, GFP_NOFS); + for (i = 0; i < num_pages; i++) { + unlock_page(pages[i]); + page_cache_release(pages[i]); + } + btrfs_wait_ordered_range(inode, start_pos, + last_pos - start_pos); + goto again; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); unlock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); } + for (i = 0; i < num_pages; i++) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif + set_page_extent_mapped(pages[i]); + WARN_ON(!PageLocked(pages[i])); + } return 0; } @@ -969,13 +994,11 @@ out_nolock: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; - btrfs_ordered_throttle(root, inode); return num_written ? num_written : err; } int btrfs_release_file(struct inode * inode, struct file * filp) { - btrfs_del_ordered_inode(inode, 0); if (filp->private_data) btrfs_ioctl_trans_end(filp); return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d39433dfb2c..c5a62f0b959 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include "ioctl.h" #include "print-tree.h" #include "volumes.h" +#include "ordered-data.h" struct btrfs_iget_args { u64 ino; @@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; - u64 orig_start = start; u64 orig_num_bytes; struct btrfs_key ins; - int ret; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 0; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); - ret = btrfs_drop_extents(trans, root, inode, - start, start + num_bytes, start, &alloc_hint); orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1); while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_alloc_extent(trans, root, cur_alloc_size, - root->sectorsize, - root->root_key.objectid, - trans->transid, - inode->i_ino, start, 0, - alloc_hint, (u64)-1, &ins, 1); + ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + root->sectorsize, 0, 0, + (u64)-1, &ins, 1); if (ret) { WARN_ON(1); goto out; } + em = alloc_extent_map(GFP_NOFS); + em->start = start; + em->len = ins.offset; + em->block_start = ins.objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + while(1) { + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, start, + start + ins.offset - 1); + } + cur_alloc_size = ins.offset; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start, ins.objectid, ins.offset, - ins.offset, 0); - inode->i_blocks += ins.offset >> 9; - btrfs_check_file(root, inode); + ret = btrfs_add_ordered_extent(inode, start, ins.objectid, + ins.offset); + BUG_ON(ret); if (num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, cur_alloc_size); @@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } - btrfs_drop_extent_cache(inode, orig_start, - orig_start + orig_num_bytes - 1); - btrfs_add_ordered_inode(inode); - btrfs_update_inode(trans, root, inode); out: btrfs_end_transaction(trans, root); return ret; @@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; int ret = 0; - char *sums = NULL; + struct btrfs_ordered_sum *sums; ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - trans = btrfs_start_transaction(root, 1); - - btrfs_set_trans_block_group(trans, inode); - mutex_lock(&BTRFS_I(inode)->csum_mutex); - btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_unlock(&BTRFS_I(inode)->csum_mutex); - - ret = btrfs_end_transaction(trans, root); + ret = btrfs_add_ordered_sum(inode, sums); BUG_ON(ret); - kfree(sums); - return btrfs_map_bio(root, rw, bio, mirror_num, 1); } @@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - if (!(rw & (1 << BIO_RW))) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - BUG_ON(ret); - goto mapit; - } + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) { + if (!(rw & (1 << BIO_RW))) { goto mapit; } @@ -387,6 +382,96 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +static int add_pending_csums(struct btrfs_trans_handle *trans, + struct inode *inode, u64 file_offset, + struct list_head *list) +{ + struct list_head *cur; + struct btrfs_ordered_sum *sum; + + btrfs_set_trans_block_group(trans, inode); + while(!list_empty(list)) { + cur = list->next; + sum = list_entry(cur, struct btrfs_ordered_sum, list); + mutex_lock(&BTRFS_I(inode)->csum_mutex); + btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, + inode, sum); + mutex_unlock(&BTRFS_I(inode)->csum_mutex); + list_del(&sum->list); + kfree(sum); + } + return 0; +} + +int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state, int uptodate) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_ordered_extent *ordered_extent; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u64 alloc_hint = 0; + struct list_head list; + struct btrfs_key ins; + int ret; + + ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); + if (!ret) { + return 0; + } + + trans = btrfs_start_transaction(root, 1); + + ordered_extent = btrfs_lookup_ordered_extent(inode, start); + BUG_ON(!ordered_extent); + + lock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + + INIT_LIST_HEAD(&list); + + ins.objectid = ordered_extent->start; + ins.offset = ordered_extent->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, + trans->transid, inode->i_ino, + ordered_extent->file_offset, &ins); + BUG_ON(ret); + ret = btrfs_drop_extents(trans, root, inode, + ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len, + ordered_extent->file_offset, &alloc_hint); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + ordered_extent->file_offset, + ordered_extent->start, + ordered_extent->len, + ordered_extent->len, 0); + BUG_ON(ret); + btrfs_drop_extent_cache(inode, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1); + inode->i_blocks += ordered_extent->len >> 9; + unlock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + add_pending_csums(trans, inode, ordered_extent->file_offset, + &ordered_extent->list); + + btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ + btrfs_put_ordered_extent(ordered_extent); + /* once for the tree */ + btrfs_put_ordered_extent(ordered_extent); + + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; - struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; - if (inode->i_nlink == 0) { - /* if the inode isn't linked anywhere, - * we don't need to worry about - * data=ordered - */ - btrfs_del_ordered_inode(inode, 1); - } - btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); @@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; @@ -1117,34 +1195,6 @@ error: return ret; } -static int btrfs_cow_one_page(struct inode *inode, struct page *page, - size_t zero_start) -{ - char *kaddr; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - int ret = 0; - - WARN_ON(!PageLocked(page)); - set_page_extent_mapped(page); - - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); - - if (zero_start != PAGE_CACHE_SIZE) { - kaddr = kmap(page); - memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); - flush_dcache_page(page); - kunmap(page); - } - set_page_dirty(page); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - - return ret; -} - /* * taken from block_truncate_page, but does cow as it zeros out * any bytes left in the last page in the file. @@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; int ret = 0; u64 page_start; + u64 page_end; if ((offset & (blocksize - 1)) == 0) goto out; @@ -1168,6 +1222,10 @@ again: page = grab_cache_page(mapping, index); if (!page) goto out; + + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); @@ -1181,10 +1239,32 @@ again: goto out; } } - - page_start = (u64)page->index << PAGE_CACHE_SHIFT; wait_on_page_writeback(page); - ret = btrfs_cow_one_page(inode, page, offset); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; + if (offset != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); @@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; + btrfs_wait_ordered_range(inode, hole_start, hole_size); + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode) unsigned long nr; int ret; + btrfs_wait_ordered_range(inode, 0, (u64)-1); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; @@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); return 0; } @@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2066,64 +2145,18 @@ out_unlock: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, - struct extent_map *em) + struct extent_map *em, + u64 map_start, u64 map_len) { u64 start_diff; - u64 new_end; - int ret = 0; - int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; - - if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) - goto invalid; - - if (!real_blocks && em->block_start != existing->block_start) - goto invalid; - - new_end = max(existing->start + existing->len, em->start + em->len); - - if (existing->start >= em->start) { - if (em->start + em->len < existing->start) - goto invalid; - start_diff = existing->start - em->start; - if (real_blocks && em->block_start + start_diff != - existing->block_start) - goto invalid; - - em->len = new_end - em->start; - - remove_extent_mapping(em_tree, existing); - /* free for the tree */ - free_extent_map(existing); - ret = add_extent_mapping(em_tree, em); - - } else if (em->start > existing->start) { - - if (existing->start + existing->len < em->start) - goto invalid; - - start_diff = em->start - existing->start; - if (real_blocks && existing->block_start + start_diff != - em->block_start) - goto invalid; - - remove_extent_mapping(em_tree, existing); - em->block_start = existing->block_start; - em->start = existing->start; - em->len = new_end - existing->start; - free_extent_map(existing); - - ret = add_extent_mapping(em_tree, em); - } else { - goto invalid; - } - return ret; - -invalid: - printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", - existing->start, existing->len, existing->block_start, - em->start, em->len, em->block_start); - return -EIO; + BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); + start_diff = map_start - em->start; + em->start = map_start; + em->len = map_len; + if (em->block_start < EXTENT_MAP_LAST_BYTE) + em->block_start += start_diff; + return add_extent_mapping(em_tree, em); } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, @@ -2170,10 +2203,9 @@ again: err = -ENOMEM; goto out; } - + em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -2314,6 +2346,9 @@ insert: */ if (ret == -EEXIST) { struct extent_map *existing; + + ret = 0; + existing = lookup_extent_mapping(em_tree, start, len); if (existing && (existing->start > start || existing->start + existing->len <= start)) { @@ -2325,7 +2360,8 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em); + em, start, + root->sectorsize); free_extent_map(existing); if (err) { free_extent_map(em); @@ -2341,6 +2377,7 @@ insert: } else { free_extent_map(em); em = existing; + err = 0; } } spin_unlock(&em_tree->lock); @@ -2348,8 +2385,9 @@ out: btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); - if (!err) + if (!err) { err = ret; + } } if (err) { free_extent_map(em); @@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } - -static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) return ret; } +static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + btrfs_put_ordered_extent(ordered); + return 0; + } + return __btrfs_releasepage(page, gfp_flags); +} + static void btrfs_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; + struct btrfs_ordered_extent *ordered; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + wait_on_page_writeback(page); tree = &BTRFS_I(page->mapping->host)->io_tree; - extent_invalidatepage(tree, page, offset); - btrfs_releasepage(page, GFP_NOFS); + if (offset) { + btrfs_releasepage(page, GFP_NOFS); + return; + } + + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + clear_extent_bit(tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_LOCKED, 1, 0, GFP_NOFS); + btrfs_writepage_end_io_hook(page, page_start, + page_end, NULL, 1); + btrfs_put_ordered_extent(ordered); + lock_extent(tree, page_start, page_end, GFP_NOFS); + } + clear_extent_bit(tree, page_start, page_end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_ORDERED, + 1, 1, GFP_NOFS); + __btrfs_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + invalidate_extent_lru(tree, page_offset(page), + PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long end; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; + unsigned long zero_start; loff_t size; int ret; u64 page_start; + u64 page_end; ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); if (ret) goto out; ret = -EINVAL; - +again: lock_page(page); - wait_on_page_writeback(page); size = i_size_read(inode); - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; if ((page->mapping != inode->i_mapping) || - (page_start > size)) { + (page_start >= size)) { /* page got truncated out from underneath us */ goto out_unlock; } + wait_on_page_writeback(page); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; /* page is wholly or partially inside EOF */ if (page_start + PAGE_CACHE_SIZE > size) - end = size & ~PAGE_CACHE_MASK; + zero_start = size & ~PAGE_CACHE_MASK; else - end = PAGE_CACHE_SIZE; + zero_start = PAGE_CACHE_SIZE; - ret = btrfs_cow_one_page(inode, page, end); + if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: unlock_page(page); @@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; - ei->ordered_trans = 0; + btrfs_ordered_inode_tree_init(&ei->ordered_tree); return &ei->vfs_inode; } void btrfs_destroy_inode(struct inode *inode) { + struct btrfs_ordered_extent *ordered; WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); + if (!ordered) + break; + else { + printk("found ordered extent %Lu %Lu\n", + ordered->file_offset, ordered->len); + btrfs_remove_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + btrfs_put_ordered_extent(ordered); + } + } btrfs_drop_extent_cache(inode, 0, (u64)-1); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } @@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2921,6 +3037,20 @@ out_fail: return err; } +static int btrfs_set_page_dirty(struct page *page) +{ + struct inode *inode = page->mapping->host; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DELALLOC, 0)) { +printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); +WARN_ON(1); + } + return __set_page_dirty_nobuffers(page); +} + static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) { @@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .writepage_end_io_hook = btrfs_writepage_end_io_hook, .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, @@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = { .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = btrfs_set_page_dirty, }; static struct address_space_operations btrfs_symlink_aops = { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 254da822566..6513270f054 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -22,48 +22,30 @@ #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" +#include "extent_io.h" -struct tree_entry { - u64 root_objectid; - u64 objectid; - struct inode *inode; - struct rb_node rb_node; -}; -/* - * returns > 0 if entry passed (root, objectid) is > entry, - * < 0 if (root, objectid) < entry and zero if they are equal - */ -static int comp_entry(struct tree_entry *entry, u64 root_objectid, - u64 objectid) +static u64 entry_end(struct btrfs_ordered_extent *entry) { - if (root_objectid < entry->root_objectid) - return -1; - if (root_objectid > entry->root_objectid) - return 1; - if (objectid < entry->objectid) - return -1; - if (objectid > entry->objectid) - return 1; - return 0; + if (entry->file_offset + entry->len < entry->file_offset) + return (u64)-1; + return entry->file_offset + entry->len; } -static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node *node) +static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, + struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; - int comp; + struct btrfs_ordered_extent *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) p = &(*p)->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) p = &(*p)->rb_right; else return parent; @@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, return NULL; } -static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node **prev_ret) +static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, + struct rb_node **prev_ret) { struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; - int comp; + struct rb_node *test; + struct btrfs_ordered_extent *entry; + struct btrfs_ordered_extent *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) n = n->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) n = n->rb_right; else return n; @@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, if (!prev_ret) return NULL; - while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { - prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && file_offset >= entry_end(prev_entry)) { + test = rb_next(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + if (file_offset < entry_end(prev_entry)) + break; + + prev = test; + } + if (prev) + prev_entry = rb_entry(prev, struct btrfs_ordered_extent, + rb_node); + while(prev && file_offset < entry_end(prev_entry)) { + test = rb_prev(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + prev = test; } *prev_ret = prev; return NULL; } -static inline struct rb_node *tree_search(struct rb_root *root, - u64 root_objectid, u64 objectid) +static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) +{ + if (file_offset < entry->file_offset || + entry->file_offset + entry->len <= file_offset) + return 0; + return 1; +} + +static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, + u64 file_offset) { + struct rb_root *root = &tree->tree; struct rb_node *prev; struct rb_node *ret; - ret = __tree_search(root, root_objectid, objectid, &prev); + struct btrfs_ordered_extent *entry; + + if (tree->last) { + entry = rb_entry(tree->last, struct btrfs_ordered_extent, + rb_node); + if (offset_in_entry(entry, file_offset)) + return tree->last; + } + ret = __tree_search(root, file_offset, &prev); if (!ret) - return prev; + ret = prev; + if (ret) + tree->last = ret; return ret; } -int btrfs_add_ordered_inode(struct inode *inode) +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; - u64 transid = root->fs_info->running_transaction->transid; - struct tree_entry *entry; - struct rb_node *node; struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry; - if (transid <= BTRFS_I(inode)->ordered_trans) - return 0; - - tree = &root->fs_info->running_transaction->ordered_inode_tree; - - read_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); - read_unlock(&tree->lock); - if (node) { - return 0; - } - - entry = kmalloc(sizeof(*entry), GFP_NOFS); + tree = &BTRFS_I(inode)->ordered_tree; + entry = kzalloc(sizeof(*entry), GFP_NOFS); if (!entry) return -ENOMEM; - write_lock(&tree->lock); - entry->objectid = inode->i_ino; - entry->root_objectid = root_objectid; + mutex_lock(&tree->mutex); + entry->file_offset = file_offset; + entry->start = start; + entry->len = len; entry->inode = inode; + /* one ref for the tree */ + atomic_set(&entry->refs, 1); + init_waitqueue_head(&entry->wait); + INIT_LIST_HEAD(&entry->list); - node = tree_insert(&tree->tree, root_objectid, - inode->i_ino, &entry->rb_node); - - BTRFS_I(inode)->ordered_trans = transid; - if (!node) - igrab(inode); - - write_unlock(&tree->lock); + node = tree_insert(&tree->tree, file_offset, + &entry->rb_node); + if (node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); + } + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, + entry_end(entry) - 1, GFP_NOFS); - if (node) - kfree(entry); + set_bit(BTRFS_ORDERED_START, &entry->flags); + mutex_unlock(&tree->mutex); + BUG_ON(node); return 0; } -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; + struct btrfs_ordered_extent *entry; - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, sum->file_offset); if (!node) { - write_unlock(&tree->lock); - return 0; +search_fail: +printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset); + node = rb_first(&tree->tree); + while(node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start); + node = rb_next(node); + } + BUG(); } - entry = rb_entry(node, struct tree_entry, rb_node); + BUG_ON(!node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); - } - if (!node) { - write_unlock(&tree->lock); - return 0; + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, sum->file_offset)) { + goto search_fail; } - *root_objectid = entry->root_objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - *objectid = entry->objectid; - write_unlock(&tree->lock); - return 1; + list_add_tail(&sum->list, &entry->list); + mutex_unlock(&tree->mutex); + return 0; } -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); + struct btrfs_ordered_extent *entry; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int ret; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, + GFP_NOFS); + node = tree_search(tree, file_offset); if (!node) { - write_unlock(&tree->lock); - return 0; + ret = 1; + goto out; } - entry = rb_entry(node, struct tree_entry, rb_node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) { + ret = 1; + goto out; } - if (!node) { - write_unlock(&tree->lock); - return 0; + + ret = test_range_bit(io_tree, entry->file_offset, + entry->file_offset + entry->len - 1, + EXTENT_ORDERED, 0); + if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) { +printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry)); } + if (ret == 0) + ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); +out: + mutex_unlock(&tree->mutex); + return ret == 0; +} - *root_objectid = entry->root_objectid; - *objectid = entry->objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - rb_erase(node, &tree->tree); - write_unlock(&tree->lock); - kfree(entry); - return 1; +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) +{ + if (atomic_dec_and_test(&entry->refs)) + kfree(entry); + return 0; } -static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, - struct inode *inode, - u64 root_objectid, u64 objectid) +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - struct rb_node *prev; - write_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, objectid, &prev); - if (!node) { - write_unlock(&tree->lock); - return; - } + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = &entry->rb_node; rb_erase(node, &tree->tree); - BTRFS_I(inode)->ordered_trans = 0; - write_unlock(&tree->lock); - atomic_dec(&inode->i_count); - entry = rb_entry(node, struct tree_entry, rb_node); - kfree(entry); - return; + tree->last = NULL; + set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + mutex_unlock(&tree->mutex); + wake_up(&entry->wait); + return 0; } -void btrfs_del_ordered_inode(struct inode *inode, int force) +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + wait_event(entry->wait, + test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); +} - if (!BTRFS_I(inode)->ordered_trans) { - return; - } +static void btrfs_start_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry, int wait) +{ + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; - if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) - return; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + if (wait) + wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, + &entry->flags)); +} - spin_lock(&root->fs_info->new_trans_lock); - if (root->fs_info->running_transaction) { - struct btrfs_ordered_inode_tree *tree; - tree = &root->fs_info->running_transaction->ordered_inode_tree; - __btrfs_del_ordered_inode(tree, inode, root_objectid, - inode->i_ino); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +{ + u64 end; + struct btrfs_ordered_extent *ordered; + int found; + int should_wait = 0; + +again: + if (start + len < start) + end = (u64)-1; + else + end = start + len - 1; + found = 0; + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, end); + if (!ordered) { + break; + } + if (ordered->file_offset >= start + len) { + btrfs_put_ordered_extent(ordered); + break; + } + if (ordered->file_offset + ordered->len < start) { + btrfs_put_ordered_extent(ordered); + break; + } + btrfs_start_ordered_extent(inode, ordered, should_wait); + found++; + end = ordered->file_offset; + btrfs_put_ordered_extent(ordered); + if (end == 0) + break; + end--; + } + if (should_wait && found) { + should_wait = 0; + goto again; } - spin_unlock(&root->fs_info->new_trans_lock); } -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len) { - struct btrfs_transaction *cur = root->fs_info->running_transaction; - while(cur == root->fs_info->running_transaction && - atomic_read(&BTRFS_I(inode)->ordered_writeback)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - } + WARN_ON(1); return 0; +#if 0 + int ret; + struct btrfs_ordered_inode_tree *tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { + ret = -EAGAIN; + goto out; + } + set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS); + ret = 0; +out: + mutex_unlock(&tree->mutex); + return ret; +#endif +} + +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) + entry = NULL; + if (entry) + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; +} + +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4fa78736423..33292c5fe90 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -20,24 +20,73 @@ #define __BTRFS_ORDERED_DATA__ struct btrfs_ordered_inode_tree { - rwlock_t lock; + struct mutex mutex; struct rb_root tree; + struct rb_node *last; }; +struct btrfs_sector_sum { + u64 offset; + u32 sum; +}; + +struct btrfs_ordered_sum { + u64 file_offset; + u64 len; + struct list_head list; + struct btrfs_sector_sum sums; +}; + +/* bits for the flags field */ +#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ +#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ +#define BTRFS_ORDERED_START 2 /* set when tree setup */ + +struct btrfs_ordered_extent { + u64 file_offset; + u64 start; + u64 len; + unsigned long flags; + atomic_t refs; + struct list_head list; + struct inode *inode; + wait_queue_head_t wait; + struct rb_node rb_node; +}; + + +static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) +{ + unsigned long num_sectors = (bytes + root->sectorsize - 1) / + root->sectorsize; + return sizeof(struct btrfs_ordered_sum) + + num_sectors * sizeof(struct btrfs_sector_sum); +} + static inline void btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) { - rwlock_init(&t->lock); + mutex_init(&t->mutex); t->tree.rb_node = NULL; + t->last = NULL; } -int btrfs_add_ordered_inode(struct inode *inode); -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -void btrfs_del_ordered_inode(struct inode *inode, int force); -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size); +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len); +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset); +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a8a3cb03de5..86a5acc19ce 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root) cur_trans->start_time = get_seconds(); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->writer_wait); if (cur_trans->in_commit && throttle) { - int ret; + DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + prepare_to_wait(&root->fs_info->transaction_throttle, &wait, + TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&root->fs_info->transaction_throttle, &wait); mutex_lock(&root->fs_info->trans_mutex); } @@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_transaction *cur_trans = trans->transaction; - struct inode *inode; - u64 root_objectid = 0; - u64 objectid = 0; - int ret; - - atomic_inc(&root->fs_info->throttles); - while(1) { - ret = btrfs_find_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_fdatawrite(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - while(1) { - root_objectid = 0; - objectid = 0; - ret = btrfs_find_del_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_write_and_wait(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - atomic_dec(&inode->i_count); - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - atomic_dec(&root->fs_info->throttles); - return 0; -} - static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); +printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); - ret = btrfs_write_ordered_inodes(trans, root); - } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); @@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); + wake_up(&root->fs_info->transaction_throttle); + mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); @@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); +printk("done commit trans %Lu\n", trans->transid); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ccd5a5b170..910350cd4cf 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -19,7 +19,6 @@ #ifndef __BTRFS_TRANSACTION__ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" -#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -31,7 +30,6 @@ struct btrfs_transaction { struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; - struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; @@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From f9295749388f82c8d2f485e99c72cd7c7876a99b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:14 -0400 Subject: btrfs_start_transaction: wait for commits in progress to finish btrfs_commit_transaction has to loop waiting for any writers in the transaction to finish before it can proceed. btrfs_start_transaction should be polite and not join a transaction that is in the process of being finished off. There are a few places that can't wait, basically the ones doing IO that might be needed to finish the transaction. For them, btrfs_join_transaction is added. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 10 +++++----- fs/btrfs/transaction.c | 43 ++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/transaction.h | 3 +++ 6 files changed, 51 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4ddc8a8f82c..acbce542d29 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -513,6 +513,7 @@ struct btrfs_fs_info { u64 alloc_start; struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; + wait_queue_head_t transaction_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 66466d125c0..99bb385c298 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1291,6 +1291,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); init_waitqueue_head(&fs_info->transaction_throttle); + init_waitqueue_head(&fs_info->transaction_wait); #if 0 ret = add_hasher(fs_info, "crc32c"); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e4e5c227c0..d6505892cd5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); if (!trans) { err = -ENOMEM; goto out_unlock; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index baf46017d0d..0a687326c0b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -116,7 +116,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); @@ -502,7 +502,7 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); @@ -1812,7 +1812,7 @@ int btrfs_write_inode(struct inode *inode, int wait) int ret = 0; if (wait) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); } @@ -1830,7 +1830,7 @@ void btrfs_dirty_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); @@ -2395,7 +2395,7 @@ again: free_extent_map(em); em = NULL; btrfs_release_path(root, path); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); goto again; } write_extent_buffer(leaf, map + pg_offset, ptr, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 86a5acc19ce..05823904ecb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -62,6 +62,7 @@ static noinline int join_transaction(struct btrfs_root *root) init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; + cur_trans->blocked = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); @@ -99,14 +100,36 @@ static noinline int record_root_in_trans(struct btrfs_root *root) return 0; } -struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, - int num_blocks) +struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, + int num_blocks, int join) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + struct btrfs_transaction *cur_trans; int ret; mutex_lock(&root->fs_info->trans_mutex); + cur_trans = root->fs_info->running_transaction; + if (cur_trans && cur_trans->blocked && !join) { + DEFINE_WAIT(wait); + cur_trans->use_count++; + while(1) { + prepare_to_wait(&root->fs_info->transaction_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (cur_trans->blocked) { + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + finish_wait(&root->fs_info->transaction_wait, + &wait); + } else { + finish_wait(&root->fs_info->transaction_wait, + &wait); + break; + } + } + put_transaction(cur_trans); + } ret = join_transaction(root); BUG_ON(ret); @@ -123,6 +146,17 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return h; } +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks) +{ + return start_transaction(root, num_blocks, 0); +} +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, + int num_blocks) +{ + return start_transaction(root, num_blocks, 1); +} + static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -156,7 +190,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - if (cur_trans->in_commit && throttle) { + if (0 && cur_trans->in_commit && throttle) { DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); prepare_to_wait(&root->fs_info->transaction_throttle, &wait, @@ -617,6 +651,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; + trans->transaction->blocked = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, @@ -684,7 +719,9 @@ printk("commit trans %Lu\n", trans->transid); btrfs_copy_pinned(root, pinned_copy); + trans->transaction->blocked = 0; wake_up(&root->fs_info->transaction_throttle); + wake_up(&root->fs_info->transaction_wait); mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 910350cd4cf..11fbdeceb26 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -27,6 +27,7 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; + int blocked; struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; @@ -75,6 +76,8 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks); +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, + int num_blocks); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, -- cgit v1.2.3-70-g09d2 From ab78c84de1ce4db1b2a2cef361625ad80abbab3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jul 2008 16:15:18 -0400 Subject: Btrfs: Throttle operations if the reference cache gets too large A large reference cache is directly related to a lot of work pending for the cleaner thread. This throttles back new operations based on the size of the reference cache so the cleaner thread will be able to keep up. Overall, this actually makes the FS faster because the cleaner thread will be more likely to find things in cache. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 7 ++++-- fs/btrfs/extent-tree.c | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 10 ++++----- fs/btrfs/transaction.c | 59 +++++++++++++++++++++++++++++++++++++------------- fs/btrfs/transaction.h | 1 + 7 files changed, 58 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4eca0aa1ce7..5517dfc6f71 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -580,6 +580,7 @@ struct btrfs_fs_info { int do_barriers; int closing; atomic_t throttles; + atomic_t throttle_gen; u64 total_pinned; struct list_head dirty_cowonly_roots; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eccdf13a95a..27ffa9b7ddc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1171,8 +1171,10 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + } mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -1256,6 +1258,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->throttles, 0); + atomic_set(&fs_info->throttle_gen, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e294cfaa60..6290cf41d64 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2650,6 +2650,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } while(1) { + atomic_inc(&root->fs_info->throttle_gen); wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3efec25e34b..ded5281f846 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -974,6 +974,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); cond_resched(); } out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f977ea5497..7c87f863d6f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2482,7 +2482,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2535,7 +2535,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2609,7 +2609,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: if (drop_on_err) @@ -3434,7 +3434,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; out_fail: - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: return ret; } @@ -3548,7 +3548,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fcef3cae0c9..b8be6703189 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -202,35 +202,64 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } +void btrfs_throttle(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + +harder: + if (atomic_read(&info->throttles)) { + DEFINE_WAIT(wait); + int thr; + int harder_count = 0; + thr = atomic_read(&info->throttle_gen); + + do { + prepare_to_wait(&info->transaction_throttle, + &wait, TASK_UNINTERRUPTIBLE); + if (!atomic_read(&info->throttles)) { + finish_wait(&info->transaction_throttle, &wait); + break; + } + schedule(); + finish_wait(&info->transaction_throttle, &wait); + } while (thr == atomic_read(&info->throttle_gen)); + + if (harder_count < 5 && + info->total_ref_cache_size > 5 * 1024 * 1024) { + harder_count++; + goto harder; + } + + if (harder_count < 10 && + info->total_ref_cache_size > 10 * 1024 * 1024) { + harder_count++; + goto harder; + } + } +} + static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, int throttle) { struct btrfs_transaction *cur_trans; + struct btrfs_fs_info *info = root->fs_info; - mutex_lock(&root->fs_info->trans_mutex); - cur_trans = root->fs_info->running_transaction; + mutex_lock(&info->trans_mutex); + cur_trans = info->running_transaction; WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); cur_trans->num_writers--; if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - - if (throttle && atomic_read(&root->fs_info->throttles)) { - DEFINE_WAIT(wait); - mutex_unlock(&root->fs_info->trans_mutex); - prepare_to_wait(&root->fs_info->transaction_throttle, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&root->fs_info->throttles)) - schedule(); - finish_wait(&root->fs_info->transaction_throttle, &wait); - mutex_lock(&root->fs_info->trans_mutex); - } - put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&info->trans_mutex); memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); + + if (throttle) + btrfs_throttle(root); + return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 11fbdeceb26..df2ca2aad1c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -91,4 +91,5 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_throttle(struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From f321e4910398cf7922265d269fb17fd26f312571 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 30 Jul 2008 09:26:11 -0400 Subject: Btrfs: Update and fix mount -o nodatacow To check whether a given file extent is referenced by multiple snapshots, the checker walks down the fs tree through dead root and checks all tree blocks in the path. We can easily detect whether a given tree block is directly referenced by other snapshot. We can also detect any indirect reference from other snapshot by checking reference's generation. The checker can always detect multiple references, but can't reliably detect cases of single reference. So btrfs may do file data cow even there is only one reference. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 +-- fs/btrfs/extent-tree.c | 202 ++++++++++++++++++++++++++++++------------------- fs/btrfs/inode.c | 6 +- fs/btrfs/transaction.c | 16 ++-- fs/btrfs/transaction.h | 5 ++ 5 files changed, 142 insertions(+), 96 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5517dfc6f71..83422088c62 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -617,7 +617,7 @@ struct btrfs_leaf_ref_tree { * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. */ -struct dirty_root; +struct btrfs_dirty_root; struct btrfs_root { struct extent_buffer *node; @@ -627,7 +627,7 @@ struct btrfs_root { struct extent_buffer *commit_root; struct btrfs_leaf_ref_tree *ref_tree; struct btrfs_leaf_ref_tree ref_tree_struct; - struct dirty_root *dirty_root; + struct btrfs_dirty_root *dirty_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1399,9 +1399,8 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 expected_owner, u64 first_extent); +int btrfs_cross_ref_exists(struct btrfs_root *root, + struct btrfs_key *key, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6290cf41d64..fe1ddbd2bfd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -802,70 +802,57 @@ out: return 0; } -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 expected_owner, - u64 first_extent) + +static int get_reference_status(struct btrfs_root *root, u64 bytenr, + u64 parent_gen, u64 ref_objectid, + u64 *min_generation, u32 *ref_count) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; - u64 bytenr; - u64 found_objectid; - u64 found_owner; + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref_item; + struct btrfs_key key; + struct btrfs_key found_key; u64 root_objectid = root->root_key.objectid; - u32 total_count = 0; - u32 extent_refs; - u32 cur_count; + u64 ref_generation; u32 nritems; int ret; - struct btrfs_key key; - struct btrfs_key found_key; - struct extent_buffer *l; - struct btrfs_extent_item *item; - struct btrfs_extent_ref *ref_item; - int level = -1; - /* FIXME, needs locking */ - BUG(); - - mutex_lock(&root->fs_info->alloc_mutex); - path = btrfs_alloc_path(); -again: - if (level == -1) - bytenr = first_extent; - else - bytenr = count_path->nodes[level]->start; - - cur_count = 0; key.objectid = bytenr; key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + path = btrfs_alloc_path(); + mutex_lock(&root->fs_info->alloc_mutex); ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr || found_key.type != BTRFS_EXTENT_ITEM_KEY) { + ret = 1; goto out; } - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - extent_refs = btrfs_extent_refs(l, item); + *ref_count = 0; + *min_generation = (u64)-1; + while (1) { - l = path->nodes[0]; - nritems = btrfs_header_nritems(l); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; if (ret == 0) continue; break; } - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr) break; @@ -874,57 +861,120 @@ again: continue; } - cur_count++; - ref_item = btrfs_item_ptr(l, path->slots[0], + ref_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - found_objectid = btrfs_ref_root(l, ref_item); - - if (found_objectid != root_objectid) { - total_count = 2; - goto out; - } - if (level == -1) { - found_owner = btrfs_ref_objectid(l, ref_item); - if (found_owner != expected_owner) { - total_count = 2; - goto out; - } - /* - * nasty. we don't count a reference held by - * the running transaction. This allows nodatacow - * to avoid cow most of the time - */ - if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && - btrfs_ref_generation(l, ref_item) == - root->fs_info->generation) { - extent_refs--; - } + ref_generation = btrfs_ref_generation(leaf, ref_item); + /* + * For (parent_gen > 0 && parent_gen > ref_gen): + * + * we reach here through the oldest root, therefore + * all other reference from same snapshot should have + * a larger generation. + */ + if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || + (parent_gen > 0 && parent_gen > ref_generation) || + (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && + ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { + if (ref_count) + *ref_count = 2; + break; } - total_count = 1; + + *ref_count = 1; + if (*min_generation > ref_generation) + *min_generation = ref_generation; + path->slots[0]++; } - /* - * if there is more than one reference against a data extent, - * we have to assume the other ref is another snapshot - */ - if (level == -1 && extent_refs > 1) { - total_count = 2; + ret = 0; +out: + mutex_unlock(&root->fs_info->alloc_mutex); + btrfs_free_path(path); + return ret; +} + +int btrfs_cross_ref_exists(struct btrfs_root *root, + struct btrfs_key *key, u64 bytenr) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *old_root; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + struct btrfs_file_extent_item *item; + u64 ref_generation; + u64 min_generation; + u64 extent_start; + u32 ref_count; + int level; + int ret; + + BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); + ret = get_reference_status(root, bytenr, 0, key->objectid, + &min_generation, &ref_count); + if (ret) + return ret; + + if (ref_count != 1) + return 1; + + trans = btrfs_start_transaction(root, 0); + old_root = root->dirty_root->root; + ref_generation = old_root->root_key.offset; + + /* all references are created in running transaction */ + if (min_generation > ref_generation) { + ret = 0; goto out; } - if (cur_count == 0) { - total_count = 0; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; goto out; } - if (level >= 0 && root->node == count_path->nodes[level]) + + path->skip_locking = 1; + /* if no item found, the extent is referenced by other snapshot */ + ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0); + if (ret) goto out; - level++; - btrfs_release_path(root, path); - goto again; + eb = path->nodes[0]; + item = btrfs_item_ptr(eb, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(eb, item) != bytenr) { + ret = 1; + goto out; + } + + for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) { + if (level >= 0) { + eb = path->nodes[level]; + if (!eb) + continue; + extent_start = eb->start; + } else + extent_start = bytenr; + + ret = get_reference_status(root, extent_start, ref_generation, + 0, &min_generation, &ref_count); + if (ret) + goto out; + + if (ref_count != 1) { + ret = 1; + goto out; + } + if (level >= 0) + ref_generation = btrfs_header_generation(eb); + } + ret = 0; out: - btrfs_free_path(path); - mutex_unlock(&root->fs_info->alloc_mutex); - return total_count; + if (path) + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + return ret; } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c87f863d6f..3aa82cec6bf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -249,11 +249,8 @@ again: if (bytenr == 0) goto not_found; - if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, - bytenr) != 1) { + if (btrfs_cross_ref_exists(root, &found_key, bytenr)) goto not_found; - } - /* * we may be called by the resizer, make sure we're inside * the limits of the FS @@ -277,6 +274,7 @@ loop: goto again; not_found: + btrfs_release_path(root, path); cow_file_range(inode, start, end); start = end + 1; goto loop; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b8be6703189..216f3157162 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -32,12 +32,6 @@ extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -struct dirty_root { - struct list_head list; - struct btrfs_root *root; - struct btrfs_root *latest_root; -}; - static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); @@ -91,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) static noinline int record_root_in_trans(struct btrfs_root *root) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; if (root->ref_cows && root->last_trans < running_trans_id) { WARN_ON(root == root->fs_info->extent_root); @@ -372,7 +366,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) @@ -387,7 +381,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; int i; @@ -498,7 +492,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; u64 num_bytes; @@ -509,7 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - dirty = list_entry(list->prev, struct dirty_root, list); + dirty = list_entry(list->prev, struct btrfs_dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index df2ca2aad1c..f5adb23151f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -52,6 +52,11 @@ struct btrfs_pending_snapshot { struct list_head list; }; +struct btrfs_dirty_root { + struct list_head list; + struct btrfs_root *root; + struct btrfs_root *latest_root; +}; static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, struct inode *inode) -- cgit v1.2.3-70-g09d2 From 9ca9ee09c176a814189063c8b88f75c8f8e4ad19 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 4 Aug 2008 10:41:27 -0400 Subject: Btrfs: fix ioctl-initiated transactions vs wait_current_trans() Commit 597:466b27332893 (btrfs_start_transaction: wait for commits in progress) breaks the transaction start/stop ioctls by making btrfs_start_transaction conditionally wait for the next transaction to start. If an application artificially is holding a transaction open, things deadlock. This workaround maintains a count of open ioctl-initiated transactions in fs_info, and avoids wait_current_trans() if any are currently open (in start_transaction() and btrfs_throttle()). The start transaction ioctl uses a new btrfs_start_ioctl_transaction() that _does_ call wait_current_trans(), effectively pushing the join/wait decision to the outer ioctl-initiated transaction. This more or less neuters btrfs_throttle() when ioctl-initiated transactions are in use, but that seems like a pretty fundamental consequence of wrapping lots of write()'s in a transaction. Btrfs has no way to tell if the application considers a given operation as part of it's transaction. Obviously, if the transaction start/stop ioctls aren't being used, there is no effect on current behavior. Signed-off-by: Sage Weil --- ctree.h | 1 + ioctl.c | 12 +++++++++++- transaction.c | 18 +++++++++++++----- transaction.h | 2 ++ 4 files changed, 27 insertions(+), 6 deletions(-) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/ioctl.c | 12 +++++++++++- fs/btrfs/transaction.c | 18 +++++++++++++----- fs/btrfs/transaction.h | 2 ++ 4 files changed, 27 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9b025960bbd..62499dd761b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -518,6 +518,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + u64 open_ioctl_trans; unsigned long mount_opt; u64 max_extent; u64 max_inline; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 224da287b3e..0b63c3c77cf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -715,7 +715,12 @@ long btrfs_ioctl_trans_start(struct file *file) ret = -EINPROGRESS; goto out; } - trans = btrfs_start_transaction(root, 0); + + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans++; + mutex_unlock(&root->fs_info->trans_mutex); + + trans = btrfs_start_ioctl_transaction(root, 0); if (trans) file->private_data = trans; else @@ -745,6 +750,11 @@ long btrfs_ioctl_trans_end(struct file *file) } btrfs_end_transaction(trans, root); file->private_data = 0; + + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans--; + mutex_unlock(&root->fs_info->trans_mutex); + out: return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cf73342e821..a2c821e3c3a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -152,14 +152,14 @@ static void wait_current_trans(struct btrfs_root *root) } struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, - int num_blocks, int join) + int num_blocks, int wait) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; mutex_lock(&root->fs_info->trans_mutex); - if (!join) + if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2) wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -180,14 +180,21 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - return start_transaction(root, num_blocks, 0); + return start_transaction(root, num_blocks, 1); } struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, int num_blocks) { - return start_transaction(root, num_blocks, 1); + return start_transaction(root, num_blocks, 0); } +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, + int num_blocks) +{ + return start_transaction(r, num_blocks, 2); +} + + static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -247,7 +254,8 @@ harder: void btrfs_throttle(struct btrfs_root *root) { mutex_lock(&root->fs_info->trans_mutex); - wait_current_trans(root); + if (!root->fs_info->open_ioctl_trans) + wait_current_trans(root); mutex_unlock(&root->fs_info->trans_mutex); throttle_on_drops(root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f5adb23151f..2c73caeebb2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -83,6 +83,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, int num_blocks); +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, + int num_blocks); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, -- cgit v1.2.3-70-g09d2 From b48652c101cce7a54379a49cc0cf854cec2c94e2 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 4 Aug 2008 23:23:47 -0400 Subject: Btrfs: Various small fixes. This trivial patch contains two locking fixes and a off by one fix. --- Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 4 ++-- fs/btrfs/root-tree.c | 3 +-- fs/btrfs/super.c | 2 ++ fs/btrfs/transaction.c | 9 +++++---- fs/btrfs/transaction.h | 3 +-- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 676e4bd65c5..db200e6baf7 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -548,7 +548,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); if (test->file_offset > entry_end(ordered)) { - i_size_test = test->file_offset - 1; + i_size_test = test->file_offset; } } else { i_size_test = i_size_read(inode); @@ -561,7 +561,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * disk_i_size to the end of the region. */ if (i_size_test > entry_end(ordered) && - !test_range_bit(io_tree, entry_end(ordered), i_size_test, + !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, EXTENT_DELALLOC, 0)) { new_i_size = min_t(u64, i_size_test, i_size_read(inode)); } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a5c0e98b5ae..36726696e58 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -209,8 +209,7 @@ again: goto err; } - ret = btrfs_add_dead_root(dead_root, latest, - &root->fs_info->dead_roots); + ret = btrfs_add_dead_root(dead_root, latest); if (ret) goto err; goto again; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a6a418b6894..eb4b357d05e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -449,7 +449,9 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } + mutex_lock(&s->s_root->d_inode->i_mutex); root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + mutex_unlock(&s->s_root->d_inode->i_mutex); if (IS_ERR(root)) { up_write(&s->s_umount); deactivate_super(s); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a2c821e3c3a..ebf5362da1d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -389,9 +389,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -int btrfs_add_dead_root(struct btrfs_root *root, - struct btrfs_root *latest, - struct list_head *dead_list) +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) { struct btrfs_dirty_root *dirty; @@ -400,7 +398,10 @@ int btrfs_add_dead_root(struct btrfs_root *root, return -ENOMEM; dirty->root = root; dirty->latest_root = latest; - list_add(&dirty->list, dead_list); + + mutex_lock(&root->fs_info->trans_mutex); + list_add(&dirty->list, &latest->fs_info->dead_roots); + mutex_unlock(&root->fs_info->trans_mutex); return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2c73caeebb2..598baa31241 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -90,8 +90,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, - struct list_head *dead_list); +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, -- cgit v1.2.3-70-g09d2 From e02119d5a7b4396c5a872582fddc8bd6d305a70a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:13:11 -0400 Subject: Btrfs: Add a write ahead tree log to optimize synchronous operations File syncs and directory syncs are optimized by copying their items into a special (copy-on-write) log tree. There is one log tree per subvolume and the btrfs super block points to a tree of log tree roots. After a crash, items are copied out of the log tree and back into the subvolume. See tree-log.c for all the details. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 8 + fs/btrfs/compat.h | 15 + fs/btrfs/ctree.c | 73 +- fs/btrfs/ctree.h | 72 +- fs/btrfs/dir-item.c | 3 + fs/btrfs/disk-io.c | 138 ++- fs/btrfs/disk-io.h | 8 +- fs/btrfs/extent-tree.c | 93 +- fs/btrfs/file.c | 39 +- fs/btrfs/inode.c | 261 ++--- fs/btrfs/root-tree.c | 5 +- fs/btrfs/transaction.c | 45 +- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 4 +- fs/btrfs/tree-log.c | 2804 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-log.h | 41 + 17 files changed, 3408 insertions(+), 205 deletions(-) create mode 100644 fs/btrfs/tree-log.c create mode 100644 fs/btrfs/tree-log.h (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 3a01065d4ef..b7addbfd8c2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,8 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o acl.o export.o - + ref-cache.o export.o tree-log.o acl.o else # Normal Makefile diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 111f90524ae..fcc8cf27e90 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -33,6 +33,7 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct mutex extent_mutex; + struct mutex log_mutex; struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; @@ -44,10 +45,17 @@ struct btrfs_inode { struct list_head delalloc_inodes; + /* full 64 bit generation number */ + u64 generation; + /* * transid of the trans_handle that last modified this inode */ u64 last_trans; + /* + * transid that last logged this inode + */ + u64 logged_trans; u64 delalloc_bytes; u64 disk_i_size; u32 flags; diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index d45fb37887b..b0ed1887d9b 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -22,6 +22,21 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +static inline void btrfs_drop_nlink(struct inode *inode) +{ + inode->i_nlink--; +} + +static inline void btrfs_inc_nlink(struct inode *inode) +{ + inode->i_nlink++; +} +#else +# define btrfs_drop_nlink(inode) drop_nlink(inode) +# define btrfs_inc_nlink(inode) inc_nlink(inode) +#endif + /* * Even if AppArmor isn't enabled, it still has different prototypes. * Add more distro/version pairs here to declare which has AppArmor applied. diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7114faafa9d..579124043d9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -60,7 +60,7 @@ void btrfs_free_path(struct btrfs_path *p) kmem_cache_free(btrfs_path_cachep, p); } -void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) +void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -176,7 +176,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } -int __btrfs_cow_block(struct btrfs_trans_handle *trans, +int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -294,7 +294,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, +int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 prealloc_dest) @@ -677,9 +677,10 @@ static int noinline check_block(struct btrfs_root *root, * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(struct extent_buffer *eb, unsigned long p, - int item_size, struct btrfs_key *key, - int max, int *slot) +static noinline int generic_bin_search(struct extent_buffer *eb, + unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; @@ -765,7 +766,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } -static struct extent_buffer *read_node_slot(struct btrfs_root *root, +static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); @@ -781,7 +782,7 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_node_ptr_generation(parent, slot)); } -static int balance_level(struct btrfs_trans_handle *trans, +static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -1128,8 +1129,9 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, /* * readahead one full node of leaves */ -static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int level, int slot, u64 objectid) +static noinline void reada_for_search(struct btrfs_root *root, + struct btrfs_path *path, + int level, int slot, u64 objectid) { struct extent_buffer *node; struct btrfs_disk_key disk_key; @@ -1201,7 +1203,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } -static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) +static noinline void unlock_up(struct btrfs_path *path, int level, + int lowest_unlock) { int i; int skip_level = level; @@ -1759,8 +1762,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root * * returns 0 on success and < 0 on failure */ -static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static noinline int split_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { u64 root_gen; struct extent_buffer *c; @@ -1874,7 +1878,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) +int noinline btrfs_leaf_free_space(struct btrfs_root *root, + struct extent_buffer *leaf) { int nritems = btrfs_header_nritems(leaf); int ret; @@ -2283,9 +2288,11 @@ out: * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size, int extend) +static noinline int split_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size, + int extend) { u64 root_gen; struct extent_buffer *l; @@ -3079,6 +3086,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * was nothing in the tree that matched the search criteria. */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans) { @@ -3093,6 +3101,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, again: cur = btrfs_lock_root_node(root); level = btrfs_header_level(cur); + WARN_ON(path->nodes[level]); path->nodes[level] = cur; path->locks[level] = 1; @@ -3107,6 +3116,8 @@ again: /* at level = 0, we're done, setup the path and exit */ if (level == 0) { + if (slot >= nritems) + goto find_next_key; ret = 0; path->slots[level] = slot; btrfs_item_key_to_cpu(cur, &found_key, slot); @@ -3123,6 +3134,8 @@ again: u64 blockptr; u64 gen; struct extent_buffer *tmp; + struct btrfs_disk_key disk_key; + blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); if (gen < min_trans) { @@ -3132,6 +3145,14 @@ again: if (!cache_only) break; + if (max_key) { + btrfs_node_key(cur, &disk_key, slot); + if (comp_keys(&disk_key, max_key) >= 0) { + ret = 1; + goto out; + } + } + tmp = btrfs_find_tree_block(root, blockptr, btrfs_level_size(root, level - 1)); @@ -3143,14 +3164,16 @@ again: free_extent_buffer(tmp); slot++; } +find_next_key: /* * we didn't find a candidate key in this node, walk forward * and find another one */ if (slot >= nritems) { - ret = btrfs_find_next_key(root, path, min_key, level, + path->slots[level] = slot; + sret = btrfs_find_next_key(root, path, min_key, level, cache_only, min_trans); - if (ret == 0) { + if (sret == 0) { btrfs_release_path(root, path); goto again; } else { @@ -3351,6 +3374,7 @@ int btrfs_previous_item(struct btrfs_root *root, { struct btrfs_key found_key; struct extent_buffer *leaf; + u32 nritems; int ret; while(1) { @@ -3362,9 +3386,20 @@ int btrfs_previous_item(struct btrfs_root *root, path->slots[0]--; } leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.type == type) return 0; + if (found_key.objectid < min_objectid) + break; + if (found_key.objectid == min_objectid && + found_key.type < type) + break; } return 1; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b305ae7e10b..6532b60683e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -77,6 +77,10 @@ struct btrfs_ordered_sum; /* orhpan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL +/* does write ahead logging to speed up fsyncs */ +#define BTRFS_TREE_LOG_OBJECTID -6ULL +#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL + /* * All files have objectids higher than this. */ @@ -276,6 +280,7 @@ struct btrfs_super_block { __le64 generation; __le64 root; __le64 chunk_root; + __le64 log_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -287,6 +292,7 @@ struct btrfs_super_block { __le32 sys_chunk_array_size; u8 root_level; u8 chunk_root_level; + u8 log_root_level; struct btrfs_dev_item dev_item; char label[BTRFS_LABEL_SIZE]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; @@ -392,7 +398,10 @@ struct btrfs_timespec { * make a new item type */ struct btrfs_inode_item { + /* nfs style generation number */ __le64 generation; + /* transid that last touched this inode */ + __le64 transid; __le64 size; __le64 nblocks; __le64 block_group; @@ -409,8 +418,13 @@ struct btrfs_inode_item { struct btrfs_timespec otime; } __attribute__ ((__packed__)); +struct btrfs_dir_log_item { + __le64 end; +} __attribute__ ((__packed__)); + struct btrfs_dir_item { struct btrfs_disk_key location; + __le64 transid; __le16 data_len; __le16 name_len; u8 type; @@ -505,6 +519,9 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + + /* the log root tree is a directory of all the other log roots */ + struct btrfs_root *log_root_tree; struct radix_tree_root fs_roots_radix; struct extent_io_tree free_space_cache; @@ -518,6 +535,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + u64 last_trans_new_blockgroup; u64 open_ioctl_trans; unsigned long mount_opt; u64 max_extent; @@ -527,6 +545,9 @@ struct btrfs_fs_info { wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; wait_queue_head_t async_submit_wait; + + wait_queue_head_t tree_log_wait; + struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -535,6 +556,7 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex alloc_mutex; @@ -544,8 +566,13 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + atomic_t nr_async_submits; atomic_t nr_async_bios; + atomic_t tree_log_writers; + atomic_t tree_log_commit; + unsigned long tree_log_batch; + u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -583,6 +610,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; + int log_root_recovering; atomic_t throttles; atomic_t throttle_gen; @@ -596,6 +624,7 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; + u64 last_log_alloc; spinlock_t ref_cache_lock; u64 total_ref_cache_size; @@ -632,6 +661,7 @@ struct btrfs_root { struct btrfs_leaf_ref_tree *ref_tree; struct btrfs_leaf_ref_tree ref_tree_struct; struct btrfs_dirty_root *dirty_root; + struct btrfs_root *log_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -640,6 +670,7 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + struct mutex log_mutex; u64 objectid; u64 last_trans; @@ -692,6 +723,8 @@ struct btrfs_root { * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ +#define BTRFS_DIR_LOG_ITEM_KEY 14 +#define BTRFS_DIR_LOG_INDEX_KEY 15 #define BTRFS_DIR_ITEM_KEY 16 #define BTRFS_DIR_INDEX_KEY 17 /* @@ -703,7 +736,8 @@ struct btrfs_root { */ #define BTRFS_CSUM_ITEM_KEY 19 -/* reserve 20-31 for other file stuff */ + +/* reserve 21-31 for other file/dir stuff */ /* * root items point to tree roots. There are typically in the root @@ -938,6 +972,7 @@ BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); @@ -1126,10 +1161,13 @@ static inline void btrfs_set_item_key(struct extent_buffer *eb, write_eb_member(eb, item, struct btrfs_item, key, disk_key); } +BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); + /* struct btrfs_dir_item */ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -1301,7 +1339,11 @@ BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, chunk_root, 64); BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, - chunk_root_level, 64); + chunk_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, + log_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, + log_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -1405,6 +1447,12 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ +int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 start, u64 len); +int btrfs_update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin); +int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, u64 bytenr); @@ -1448,6 +1496,11 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); +int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -1488,9 +1541,9 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, int cache_only, u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans); - int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -1656,6 +1709,18 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len); +int btrfs_add_link(struct btrfs_trans_handle *trans, + struct inode *parent_inode, struct inode *inode, + const char *name, int name_len, int add_backref, u64 index); +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, u64 new_size, + u32 min_type); + int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, @@ -1715,6 +1780,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* file.c */ +int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 125094617fe..e4f30090d64 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -96,6 +96,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); btrfs_set_dir_data_len(leaf, dir_item, data_len); name_ptr = (unsigned long)(dir_item + 1); data_ptr = (unsigned long)((char *)name_ptr + name_len); @@ -142,6 +143,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); @@ -169,6 +171,7 @@ second_insert: btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e7a938bfbc..a4373db5967 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ #include "async-thread.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -694,6 +695,18 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, } +int btrfs_write_tree_block(struct extent_buffer *buf) +{ + return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, + buf->start + buf->len - 1, WB_SYNC_NONE); +} + +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) +{ + return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, + buf->start, buf->start + buf->len -1); +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { @@ -732,15 +745,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int wait_on_tree_block_writeback(struct btrfs_root *root, - struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, - buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -771,6 +775,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); + mutex_init(&root->log_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -809,11 +814,74 @@ static int find_and_setup_root(struct btrfs_root *tree_root, return 0; } -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct extent_buffer *eb; + int ret; + + if (!fs_info->log_root_tree) + return 0; + + eb = fs_info->log_root_tree->node; + + WARN_ON(btrfs_header_level(eb) != 0); + WARN_ON(btrfs_header_nritems(eb) != 0); + + ret = btrfs_free_extent(trans, fs_info->tree_root, + eb->start, eb->len, + BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + BUG_ON(ret); + + free_extent_buffer(eb); + kfree(fs_info->log_root_tree); + fs_info->log_root_tree = NULL; + return 0; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + + root = kzalloc(sizeof(*root), GFP_NOFS); + if (!root) + return -ENOMEM; + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; + root->root_key.type = BTRFS_ROOT_ITEM_KEY; + root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + root->ref_cows = 0; + + root->node = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + + btrfs_set_header_nritems(root->node, 0); + btrfs_set_header_level(root->node, 0); + btrfs_set_header_bytenr(root->node, root->node->start); + btrfs_set_header_generation(root->node, trans->transid); + btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(root->node, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(root->node), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(root->node); + btrfs_tree_unlock(root->node); + fs_info->log_root_tree = root; + return 0; +} + +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; @@ -863,11 +931,13 @@ out: blocksize, 0); BUG_ON(!root->node); insert: - root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { + root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } } return root; } @@ -907,7 +977,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (root) return root; - root = btrfs_read_fs_root_no_radix(fs_info, location); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, @@ -1250,16 +1320,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; struct buffer_head *bh; - struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); - struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *log_tree_root; + int ret; int err = -EINVAL; @@ -1343,6 +1415,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1352,6 +1425,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); + init_waitqueue_head(&fs_info->tree_log_wait); + atomic_set(&fs_info->tree_log_commit, 0); + atomic_set(&fs_info->tree_log_writers, 0); + fs_info->tree_log_transid = 0; #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1532,7 +1609,26 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; + if (btrfs_super_log_root(disk_super) != 0) { + u32 blocksize; + u64 bytenr = btrfs_super_log_root(disk_super); + + blocksize = + btrfs_level_size(tree_root, + btrfs_super_log_root_level(disk_super)); + log_tree_root = kzalloc(sizeof(struct btrfs_root), + GFP_NOFS); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + log_tree_root->node = read_tree_block(tree_root, bytenr, + blocksize, 0); + ret = btrfs_recover_log_trees(log_tree_root); + BUG_ON(ret); + } + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; fail_cleaner: diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2562a273ae1..6b6fdc697f3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -45,7 +45,7 @@ struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_key *location); struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location); @@ -74,4 +74,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook); int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); +int btrfs_write_tree_block(struct extent_buffer *buf); +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e63b3b4bed7..646b9148ca2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -496,6 +496,23 @@ static int match_extent_ref(struct extent_buffer *leaf, return ret == 0; } +/* simple helper to search for an existing extent at a given offset */ +int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 start, u64 len) +{ + int ret; + struct btrfs_key key; + + maybe_lock_mutex(root); + key.objectid = start; + key.offset = len; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, + 0, 0); + maybe_unlock_mutex(root); + return ret; +} + static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1409,7 +1426,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) } -static int update_pinned_extents(struct btrfs_root *root, +int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { u64 len; @@ -1492,7 +1509,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, EXTENT_DIRTY); if (ret) break; - update_pinned_extents(root, start, end + 1 - start, 0); + btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); if (need_resched()) { @@ -1538,14 +1555,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = btrfs_find_tree_block(extent_root, ins.objectid, + eb = btrfs_find_create_tree_block(extent_root, ins.objectid, ins.offset); - if (!btrfs_buffer_uptodate(eb, trans->transid)) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); + if (!btrfs_buffer_uptodate(eb, trans->transid)) btrfs_read_buffer(eb, trans->transid); - mutex_lock(&extent_root->fs_info->alloc_mutex); - } btrfs_tree_lock(eb); level = btrfs_header_level(eb); @@ -1585,13 +1599,20 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ if (btrfs_buffer_uptodate(buf, 0) && btrfs_try_tree_lock(buf)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = btrfs_header_generation(buf); - if (header_transid == transid && + if (btrfs_header_owner(buf) != + BTRFS_TREE_LOG_OBJECTID && + header_transid == transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); @@ -1603,7 +1624,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } - update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, @@ -1801,7 +1822,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct GFP_NOFS); if (!test_range_bit(&extent_root->fs_info->extent_ins, start, end, EXTENT_LOCKED, 0)) { - update_pinned_extents(extent_root, start, + btrfs_update_pinned_extents(extent_root, start, end + 1 - start, 1); ret = __free_extent(trans, extent_root, start, end + 1 - start, @@ -1919,6 +1940,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { last_ptr = &root->fs_info->last_data_alloc; } + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + last_ptr = &root->fs_info->last_log_alloc; + if (!last_ptr == 0 && root->fs_info->last_alloc) { + *last_ptr = root->fs_info->last_alloc + empty_cluster; + } + } if (last_ptr) { if (*last_ptr) @@ -2268,6 +2295,35 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, maybe_unlock_mutex(root); return ret; } + +/* + * this is used by the tree logging recovery code. It records that + * an extent has been allocated and makes sure to clear the free + * space cache bits as well + */ +int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + struct btrfs_block_group_cache *block_group; + + maybe_lock_mutex(root); + block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); + cache_block_group(root, block_group); + + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} + /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -2350,9 +2406,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf) +int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -2402,9 +2457,9 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, return 0; } -static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_leaf_ref *ref) +static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_leaf_ref *ref) { int i; int ret; @@ -2512,7 +2567,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_header_nritems(cur)) break; if (*level == 0) { - ret = drop_leaf_ref_no_cache(trans, root, cur); + ret = btrfs_drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } @@ -2552,7 +2607,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { - ret = drop_leaf_ref(trans, root, ref); + ret = cache_drop_leaf_ref(trans, root, ref); BUG_ON(ret); btrfs_remove_leaf_ref(root, ref); btrfs_free_leaf_ref(root, ref); @@ -3628,6 +3683,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; + root->fs_info->last_trans_new_blockgroup = trans->transid; + cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); cache->key.objectid = chunk_offset; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e9e86fbaa24..84ecf3ab851 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -36,6 +36,8 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "tree-log.h" +#include "locking.h" #include "compat.h" @@ -988,10 +990,27 @@ out_nolock: *ppos = pos; if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = sync_page_range(inode, inode->i_mapping, - start_pos, num_written); + struct btrfs_trans_handle *trans; + + err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, + start_pos + num_written -1, + WB_SYNC_NONE); + if (err < 0) + num_written = err; + + err = btrfs_wait_on_page_writeback_range(inode->i_mapping, + start_pos, start_pos + num_written - 1); if (err < 0) num_written = err; + + trans = btrfs_start_transaction(root, 1); + ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + if (ret == 0) { + btrfs_sync_log(trans, root); + btrfs_end_transaction(trans, root); + } else { + btrfs_commit_transaction(trans, root); + } } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) do_sync_file_range(file, start_pos, @@ -1019,8 +1038,7 @@ int btrfs_release_file(struct inode * inode, struct file * filp) return 0; } -static int btrfs_sync_file(struct file *file, - struct dentry *dentry, int datasync) +int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1043,6 +1061,8 @@ static int btrfs_sync_file(struct file *file, } mutex_unlock(&root->fs_info->trans_mutex); + filemap_fdatawait(inode->i_mapping); + /* * ok we haven't committed the transaction yet, lets do a commit */ @@ -1054,7 +1074,16 @@ static int btrfs_sync_file(struct file *file, ret = -ENOMEM; goto out; } - ret = btrfs_commit_transaction(trans, root); + + ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + if (ret < 0) + goto out; + if (ret > 0) { + ret = btrfs_commit_transaction(trans, root); + } else { + btrfs_sync_log(trans, root); + ret = btrfs_end_transaction(trans, root); + } out: return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43d3f2649ca..65df9d83023 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -46,6 +46,8 @@ #include "volumes.h" #include "ordered-data.h" #include "xattr.h" +#include "compat.h" +#include "tree-log.h" struct btrfs_iget_args { u64 ino; @@ -586,6 +588,7 @@ nocow: &ordered_extent->list); btrfs_ordered_update_i_size(inode, ordered_extent); + btrfs_update_inode(trans, root, inode); btrfs_remove_ordered_extent(inode, ordered_extent); /* once for us */ @@ -593,7 +596,6 @@ nocow: /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); return 0; } @@ -1007,7 +1009,8 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); - inode->i_generation = btrfs_inode_generation(leaf, inode_item); + BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); + inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -1056,7 +1059,8 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct extent_buffer *leaf, +static void fill_inode_item(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf, struct btrfs_inode_item *item, struct inode *inode) { @@ -1082,7 +1086,8 @@ static void fill_inode_item(struct extent_buffer *leaf, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); - btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); + btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, @@ -1112,7 +1117,7 @@ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(leaf, inode_item, inode); + fill_inode_item(trans, leaf, inode_item, inode); btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; @@ -1122,14 +1127,12 @@ failed: } -static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, - struct dentry *dentry) +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len) { struct btrfs_path *path; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; int ret = 0; struct extent_buffer *leaf; struct btrfs_dir_item *di; @@ -1160,13 +1163,12 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); ret = btrfs_del_inode_ref(trans, root, name, name_len, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino, &index); + inode->i_ino, + dir->i_ino, &index); if (ret) { printk("failed to delete reference to %.*s, " "inode %lu parent %lu\n", name_len, name, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); + inode->i_ino, dir->i_ino); goto err; } @@ -1183,21 +1185,25 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = btrfs_delete_one_dir_name(trans, root, path, di); btrfs_release_path(root, path); - dentry->d_inode->i_ctime = dir->i_ctime; + ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, + inode, dir->i_ino); + BUG_ON(ret); + + ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, + dir, index); + BUG_ON(ret); err: btrfs_free_path(path); - if (!ret) { - btrfs_i_size_write(dir, dir->i_size - name_len * 2); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - btrfs_update_inode(trans, root, dir); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - dentry->d_inode->i_nlink--; -#else - drop_nlink(dentry->d_inode); -#endif - ret = btrfs_update_inode(trans, root, dentry->d_inode); - dir->i_sb->s_dirt = 1; - } + if (ret) + goto out; + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; + btrfs_update_inode(trans, root, dir); + btrfs_drop_nlink(inode); + ret = btrfs_update_inode(trans, root, inode); + dir->i_sb->s_dirt = 1; +out: return ret; } @@ -1218,7 +1224,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); - ret = btrfs_unlink_trans(trans, root, dir, dentry); + ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, + dentry->d_name.name, dentry->d_name.len); if (inode->i_nlink == 0) ret = btrfs_orphan_add(trans, inode); @@ -1256,7 +1263,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto fail_trans; /* now the directory is empty */ - err = btrfs_unlink_trans(trans, root, dir, dentry); + err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, + dentry->d_name.name, dentry->d_name.len); if (!err) { btrfs_i_size_write(inode, 0); } @@ -1283,10 +1291,10 @@ fail: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u32 min_type) +noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { int ret; struct btrfs_path *path; @@ -1307,7 +1315,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; - btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); + if (root->ref_cows) + btrfs_drop_extent_cache(inode, + new_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -1324,7 +1334,13 @@ search_again: goto error; } if (ret > 0) { - BUG_ON(path->slots[0] == 0); + /* there are no items in the tree for us to truncate, we're + * done + */ + if (path->slots[0] == 0) { + ret = 0; + goto error; + } path->slots[0]--; } @@ -1358,10 +1374,10 @@ search_again: } if (found_type == BTRFS_CSUM_ITEM_KEY) { ret = btrfs_csum_truncate(trans, root, path, - inode->i_size); + new_size); BUG_ON(ret); } - if (item_end < inode->i_size) { + if (item_end < new_size) { if (found_type == BTRFS_DIR_ITEM_KEY) { found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { @@ -1378,7 +1394,7 @@ search_again: btrfs_set_key_type(&key, found_type); goto next; } - if (found_key.offset >= inode->i_size) + if (found_key.offset >= new_size) del_item = 1; else del_item = 0; @@ -1394,7 +1410,7 @@ search_again: if (!del_item) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - extent_num_bytes = inode->i_size - + extent_num_bytes = new_size - found_key.offset + root->sectorsize - 1; extent_num_bytes = extent_num_bytes & ~((u64)root->sectorsize - 1); @@ -1402,7 +1418,7 @@ search_again: extent_num_bytes); num_dec = (orig_num_bytes - extent_num_bytes); - if (extent_start != 0) + if (root->ref_cows && extent_start != 0) dec_i_blocks(inode, num_dec); btrfs_mark_buffer_dirty(leaf); } else { @@ -1413,22 +1429,29 @@ search_again: num_dec = btrfs_file_extent_num_bytes(leaf, fi); if (extent_start != 0) { found_extent = 1; - dec_i_blocks(inode, num_dec); + if (root->ref_cows) + dec_i_blocks(inode, num_dec); + } + if (root->ref_cows) { + root_gen = + btrfs_header_generation(leaf); } - root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { if (!del_item) { - u32 newsize = inode->i_size - found_key.offset; - dec_i_blocks(inode, item_end + 1 - - found_key.offset - newsize); - newsize = - btrfs_file_extent_calc_inline_size(newsize); + u32 size = new_size - found_key.offset; + + if (root->ref_cows) { + dec_i_blocks(inode, item_end + 1 - + found_key.offset - size); + } + size = + btrfs_file_extent_calc_inline_size(size); ret = btrfs_truncate_item(trans, root, path, - newsize, 1); + size, 1); BUG_ON(ret); - } else { + } else if (root->ref_cows) { dec_i_blocks(inode, item_end + 1 - found_key.offset); } @@ -1666,7 +1689,7 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_in_trans(trans, root, inode, 0); + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); if (ret) { btrfs_orphan_del(NULL, inode); goto no_delete_lock; @@ -1753,15 +1776,20 @@ static int fixup_tree_root_location(struct btrfs_root *root, return 0; } -static int btrfs_init_locked_inode(struct inode *inode, void *p) +static noinline void init_btrfs_i(struct inode *inode) { - struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; - BTRFS_I(inode)->root = args->root; - BTRFS_I(inode)->delalloc_bytes = 0; - inode->i_mapping->writeback_index = 0; - BTRFS_I(inode)->disk_i_size = 0; - BTRFS_I(inode)->index_cnt = (u64)-1; + struct btrfs_inode *bi = BTRFS_I(inode); + + bi->i_acl = NULL; + bi->i_default_acl = NULL; + + bi->generation = 0; + bi->last_trans = 0; + bi->logged_trans = 0; + bi->delalloc_bytes = 0; + bi->disk_i_size = 0; + bi->flags = 0; + bi->index_cnt = (u64)-1; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1771,6 +1799,15 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); + mutex_init(&BTRFS_I(inode)->log_mutex); +} + +static int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + init_btrfs_i(inode); + BTRFS_I(inode)->root = args->root; return 0; } @@ -2263,21 +2300,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, * btrfs_get_inode_index_count has an explanation for the magic * number */ + init_btrfs_i(inode); BTRFS_I(inode)->index_cnt = 2; - - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - inode->i_mapping->writeback_index = 0; - BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; + BTRFS_I(inode)->generation = trans->transid; if (mode & S_IFDIR) owner = 0; @@ -2290,7 +2316,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, new_inode_group = group; } BTRFS_I(inode)->block_group = new_inode_group; - BTRFS_I(inode)->flags = 0; key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -2318,7 +2343,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - fill_inode_item(path->nodes[0], inode_item, inode); + fill_inode_item(trans, path->nodes[0], inode_item, inode); ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, struct btrfs_inode_ref); @@ -2349,38 +2374,34 @@ static inline u8 btrfs_inode_type(struct inode *inode) return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; } -static int btrfs_add_link(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode, - int add_backref, u64 index) +int btrfs_add_link(struct btrfs_trans_handle *trans, + struct inode *parent_inode, struct inode *inode, + const char *name, int name_len, int add_backref, u64 index) { int ret; struct btrfs_key key; - struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; - struct inode *parent_inode = dentry->d_parent->d_inode; + struct btrfs_root *root = BTRFS_I(parent_inode)->root; key.objectid = inode->i_ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - ret = btrfs_insert_dir_item(trans, root, - dentry->d_name.name, dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode->i_ino, &key, btrfs_inode_type(inode), index); if (ret == 0) { if (add_backref) { ret = btrfs_insert_inode_ref(trans, root, - dentry->d_name.name, - dentry->d_name.len, - inode->i_ino, - parent_inode->i_ino, - index); + name, name_len, + inode->i_ino, + parent_inode->i_ino, + index); } btrfs_i_size_write(parent_inode, parent_inode->i_size + - dentry->d_name.len * 2); + name_len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, - dentry->d_parent->d_inode); + ret = btrfs_update_inode(trans, root, parent_inode); } return ret; } @@ -2389,7 +2410,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode, int backref, u64 index) { - int err = btrfs_add_link(trans, dentry, inode, backref, index); + int err = btrfs_add_link(trans, dentry->d_parent->d_inode, + inode, dentry->d_name.name, + dentry->d_name.len, backref, index); if (!err) { d_instantiate(dentry, inode); return 0; @@ -2513,19 +2536,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - BTRFS_I(inode)->disk_i_size = 0; - inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2556,11 +2567,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - inode->i_nlink++; -#else - inc_nlink(inode); -#endif + btrfs_inc_nlink(inode); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -2650,7 +2657,9 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry, inode, 0, index); + err = btrfs_add_link(trans, dentry->d_parent->d_inode, + inode, dentry->d_name.name, + dentry->d_name.len, 0, index); if (err) goto out_fail; @@ -3221,7 +3230,7 @@ static void btrfs_truncate(struct inode *inode) if (ret) goto out; /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_in_trans(trans, root, inode, + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); btrfs_update_inode(trans, root, inode); @@ -3304,6 +3313,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->logged_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); ei->i_acl = BTRFS_ACL_NOT_CACHED; ei->i_default_acl = BTRFS_ACL_NOT_CACHED; @@ -3463,31 +3473,39 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, btrfs_set_trans_block_group(trans, new_dir); - old_dentry->d_inode->i_nlink++; + btrfs_inc_nlink(old_dentry->d_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); if (ret) goto out_fail; if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + ret = btrfs_unlink_inode(trans, root, new_dir, + new_dentry->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len); if (ret) goto out_fail; if (new_inode->i_nlink == 0) { - ret = btrfs_orphan_add(trans, new_inode); + ret = btrfs_orphan_add(trans, new_dentry->d_inode); if (ret) goto out_fail; } + } ret = btrfs_set_inode_index(new_dir, old_inode, &index); if (ret) goto out_fail; - ret = btrfs_add_link(trans, new_dentry, old_inode, 1, index); + ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, + old_inode, new_dentry->d_name.name, + new_dentry->d_name.len, 1, index); if (ret) goto out_fail; @@ -3577,19 +3595,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - BTRFS_I(inode)->disk_i_size = 0; - inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -3691,6 +3697,7 @@ static struct file_operations btrfs_dir_file_operations = { .compat_ioctl = btrfs_ioctl, #endif .release = btrfs_release_file, + .fsync = btrfs_sync_file, }; static struct extent_io_ops btrfs_extent_io_ops = { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 36726696e58..e3984f902e7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -202,8 +202,9 @@ again: memcpy(&found_key, &key, sizeof(key)); key.offset++; btrfs_release_path(root, path); - dead_root = btrfs_read_fs_root_no_radix(root->fs_info, - &found_key); + dead_root = + btrfs_read_fs_root_no_radix(root->fs_info->tree_root, + &found_key); if (IS_ERR(dead_root)) { ret = PTR_ERR(dead_root); goto err; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eff3ad72991..49c4f5b40ed 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -25,6 +25,7 @@ #include "transaction.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -57,6 +58,7 @@ static noinline int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->last_alloc = 0; root->fs_info->last_data_alloc = 0; + root->fs_info->last_log_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; @@ -83,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) return 0; } -static noinline int record_root_in_trans(struct btrfs_root *root) +noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { struct btrfs_dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; @@ -151,7 +153,7 @@ static void wait_current_trans(struct btrfs_root *root) } } -struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, +static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, int num_blocks, int wait) { struct btrfs_trans_handle *h = @@ -164,7 +166,7 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ret = join_transaction(root); BUG_ON(ret); - record_root_in_trans(root); + btrfs_record_root_in_trans(root); h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; @@ -456,6 +458,8 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, BUG_ON(!root->ref_tree); dirty = root->dirty_root; + btrfs_free_log(trans, root); + if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); @@ -600,7 +604,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { - record_root_in_trans(root); + btrfs_record_root_in_trans(root); btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } @@ -745,7 +749,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int ret; INIT_LIST_HEAD(&dirty_fs_roots); - mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { cur_trans = trans->transaction; @@ -821,10 +824,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, WARN_ON(cur_trans != trans->transaction); + /* btrfs_commit_tree_roots is responsible for getting the + * various roots consistent with each other. Every pointer + * in the tree of tree roots has to point to the most up to date + * root for every subvolume and other tree. So, we have to keep + * the tree logging code from jumping in and changing any + * of the trees. + * + * At this point in the commit, there can't be any tree-log + * writers, but a little lower down we drop the trans mutex + * and let new people in. By holding the tree_log_mutex + * from now until after the super is written, we avoid races + * with the tree-log code. + */ + mutex_lock(&root->fs_info->tree_log_mutex); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); BUG_ON(ret); + /* add_dirty_roots gets rid of all the tree log roots, it is now + * safe to free the root of tree log roots + */ + btrfs_free_log_root_tree(trans, root->fs_info); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); @@ -843,6 +866,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); + + if (!root->fs_info->log_root_recovering) { + btrfs_set_super_log_root(&root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); + } + memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); @@ -857,6 +886,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); write_ctree_super(trans, root); + /* + * the super is written, we can safely allow the tree-loggers + * to go about their business + */ + mutex_unlock(&root->fs_info->tree_log_mutex); + btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 598baa31241..cc63650d60d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -98,4 +98,5 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_throttle(struct btrfs_root *root); +int btrfs_record_root_in_trans(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index cc2650b0695..b3bb5bbad76 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -81,12 +81,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, memcpy(&key, &root->defrag_progress, sizeof(key)); } - path->lowest_level = 1; path->keep_locks = 1; if (cache_only) min_trans = root->defrag_trans_start; - ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + ret = btrfs_search_forward(root, &key, NULL, path, + cache_only, min_trans); if (ret < 0) goto out; if (ret > 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c new file mode 100644 index 00000000000..d1ce8314b94 --- /dev/null +++ b/fs/btrfs/tree-log.c @@ -0,0 +1,2804 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "locking.h" +#include "print-tree.h" +#include "compat.h" + +/* magic values for the inode_only field in btrfs_log_inode: + * + * LOG_INODE_ALL means to log everything + * LOG_INODE_EXISTS means to log just enough to recreate the inode + * during log replay + */ +#define LOG_INODE_ALL 0 +#define LOG_INODE_EXISTS 1 + +/* + * stages for the tree walking. The first + * stage (0) is to only pin down the blocks we find + * the second stage (1) is to make sure that all the inodes + * we find in the log are created in the subvolume. + * + * The last stage is to deal with directories and links and extents + * and all the other fun semantics + */ +#define LOG_WALK_PIN_ONLY 0 +#define LOG_WALK_REPLAY_INODES 1 +#define LOG_WALK_REPLAY_ALL 2 + +static int __btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only); + +/* + * tree logging is a special write ahead log used to make sure that + * fsyncs and O_SYNCs can happen without doing full tree commits. + * + * Full tree commits are expensive because they require commonly + * modified blocks to be recowed, creating many dirty pages in the + * extent tree an 4x-6x higher write load than ext3. + * + * Instead of doing a tree commit on every fsync, we use the + * key ranges and transaction ids to find items for a given file or directory + * that have changed in this transaction. Those items are copied into + * a special tree (one per subvolume root), that tree is written to disk + * and then the fsync is considered complete. + * + * After a crash, items are copied out of the log-tree back into the + * subvolume tree. Any file data extents found are recorded in the extent + * allocation tree, and the log-tree freed. + * + * The log tree is read three times, once to pin down all the extents it is + * using in ram and once, once to create all the inodes logged in the tree + * and once to do all the other items. + */ + +/* + * btrfs_add_log_tree adds a new per-subvolume log tree into the + * tree of log tree roots. This must be called with a tree log transaction + * running (see start_log_trans). + */ +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_root *new_root = root; + int ret; + u64 objectid = root->root_key.objectid; + + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + return ret; + } + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); + btrfs_set_root_refs(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; + + btrfs_tree_unlock(leaf); + free_extent_buffer(leaf); + leaf = NULL; + + btrfs_set_root_dirid(&root_item, 0); + + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = objectid; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, + &root_item); + if (ret) + goto fail; + + new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, + &key); + BUG_ON(!new_root); + + WARN_ON(root->log_root); + root->log_root = new_root; + + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ + new_root->ref_cows = 0; + new_root->last_trans = trans->transid; +fail: + return ret; +} + +/* + * start a sub transaction and setup the log tree + * this increments the log tree writer count to make the people + * syncing the tree wait for us to finish + */ +static int start_log_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + mutex_lock(&root->fs_info->tree_log_mutex); + if (!root->fs_info->log_root_tree) { + ret = btrfs_init_log_root_tree(trans, root->fs_info); + BUG_ON(ret); + } + if (!root->log_root) { + ret = btrfs_add_log_tree(trans, root); + BUG_ON(ret); + } + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; + mutex_unlock(&root->fs_info->tree_log_mutex); + return 0; +} + +/* + * returns 0 if there was a log transaction running and we were able + * to join, or returns -ENOENT if there were not transactions + * in progress + */ +static int join_running_log_trans(struct btrfs_root *root) +{ + int ret = -ENOENT; + + smp_mb(); + if (!root->log_root) + return -ENOENT; + + mutex_lock(&root->fs_info->tree_log_mutex); + if (root->log_root) { + ret = 0; + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; + } + mutex_unlock(&root->fs_info->tree_log_mutex); + return ret; +} + +/* + * indicate we're done making changes to the log tree + * and wake up anyone waiting to do a sync + */ +static int end_log_trans(struct btrfs_root *root) +{ + atomic_dec(&root->fs_info->tree_log_writers); + smp_mb(); + if (waitqueue_active(&root->fs_info->tree_log_wait)) + wake_up(&root->fs_info->tree_log_wait); + return 0; +} + + +/* + * the walk control struct is used to pass state down the chain when + * processing the log tree. The stage field tells us which part + * of the log tree processing we are currently doing. The others + * are state fields used for that specific part + */ +struct walk_control { + /* should we free the extent on disk when done? This is used + * at transaction commit time while freeing a log tree + */ + int free; + + /* should we write out the extent buffer? This is used + * while flushing the log tree to disk during a sync + */ + int write; + + /* should we wait for the extent buffer io to finish? Also used + * while flushing the log tree to disk for a sync + */ + int wait; + + /* pin only walk, we record which extents on disk belong to the + * log trees + */ + int pin; + + /* what stage of the replay code we're currently in */ + int stage; + + /* the root we are currently replaying */ + struct btrfs_root *replay_dest; + + /* the trans handle for the current replay */ + struct btrfs_trans_handle *trans; + + /* the function that gets used to process blocks we find in the + * tree. Note the extent_buffer might not be up to date when it is + * passed in, and it must be checked or read if you need the data + * inside it + */ + int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, + struct walk_control *wc, u64 gen); +}; + +/* + * process_func used to pin down extents, write them or wait on them + */ +static int process_one_buffer(struct btrfs_root *log, + struct extent_buffer *eb, + struct walk_control *wc, u64 gen) +{ + if (wc->pin) { + mutex_lock(&log->fs_info->alloc_mutex); + btrfs_update_pinned_extents(log->fs_info->extent_root, + eb->start, eb->len, 1); + mutex_unlock(&log->fs_info->alloc_mutex); + } + + if (btrfs_buffer_uptodate(eb, gen)) { + if (wc->write) + btrfs_write_tree_block(eb); + if (wc->wait) + btrfs_wait_tree_block_writeback(eb); + } + return 0; +} + +/* + * Item overwrite used by replay and tree logging. eb, slot and key all refer + * to the src data we are copying out. + * + * root is the tree we are copying into, and path is a scratch + * path for use in this function (it should be released on entry and + * will be released on exit). + * + * If the key is already in the destination tree the existing item is + * overwritten. If the existing item isn't big enough, it is extended. + * If it is too large, it is truncated. + * + * If the key isn't in the destination yet, a new item is inserted. + */ +static noinline int overwrite_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size; + u64 saved_i_size = 0; + int save_old_i_size = 0; + unsigned long src_ptr; + unsigned long dst_ptr; + int overwrite_root = 0; + + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + overwrite_root = 1; + + item_size = btrfs_item_size_nr(eb, slot); + src_ptr = btrfs_item_ptr_offset(eb, slot); + + /* look for the key in the destination tree */ + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret == 0) { + char *src_copy; + char *dst_copy; + u32 dst_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + if (dst_size != item_size) + goto insert; + + if (item_size == 0) { + btrfs_release_path(root, path); + return 0; + } + dst_copy = kmalloc(item_size, GFP_NOFS); + src_copy = kmalloc(item_size, GFP_NOFS); + + read_extent_buffer(eb, src_copy, src_ptr, item_size); + + dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, + item_size); + ret = memcmp(dst_copy, src_copy, item_size); + + kfree(dst_copy); + kfree(src_copy); + /* + * they have the same contents, just return, this saves + * us from cowing blocks in the destination tree and doing + * extra writes that may not have been done by a previous + * sync + */ + if (ret == 0) { + btrfs_release_path(root, path); + return 0; + } + + } +insert: + btrfs_release_path(root, path); + /* try to insert the key into the destination tree */ + ret = btrfs_insert_empty_item(trans, root, path, + key, item_size); + + /* make sure any existing item is the correct size */ + if (ret == -EEXIST) { + u32 found_size; + found_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + if (found_size > item_size) { + btrfs_truncate_item(trans, root, path, item_size, 1); + } else if (found_size < item_size) { + ret = btrfs_del_item(trans, root, + path); + BUG_ON(ret); + + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, + root, path, key, item_size); + BUG_ON(ret); + } + } else if (ret) { + BUG(); + } + dst_ptr = btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + + /* don't overwrite an existing inode if the generation number + * was logged as zero. This is done when the tree logging code + * is just logging an inode to make sure it exists after recovery. + * + * Also, don't overwrite i_size on directories during replay. + * log replay inserts and removes directory items based on the + * state of the tree found in the subvolume, and i_size is modified + * as it goes + */ + if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { + struct btrfs_inode_item *src_item; + struct btrfs_inode_item *dst_item; + + src_item = (struct btrfs_inode_item *)src_ptr; + dst_item = (struct btrfs_inode_item *)dst_ptr; + + if (btrfs_inode_generation(eb, src_item) == 0) + goto no_copy; + + if (overwrite_root && + S_ISDIR(btrfs_inode_mode(eb, src_item)) && + S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { + save_old_i_size = 1; + saved_i_size = btrfs_inode_size(path->nodes[0], + dst_item); + } + } + + copy_extent_buffer(path->nodes[0], eb, dst_ptr, + src_ptr, item_size); + + if (save_old_i_size) { + struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; + btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); + } + + /* make sure the generation is filled in */ + if (key->type == BTRFS_INODE_ITEM_KEY) { + struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; + if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { + btrfs_set_inode_generation(path->nodes[0], dst_item, + trans->transid); + } + } +no_copy: + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + return 0; +} + +/* + * simple helper to read an inode off the disk from a given root + * This can only be called for subvolume roots and not for the log + */ +static noinline struct inode *read_one_inode(struct btrfs_root *root, + u64 objectid) +{ + struct inode *inode; + inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + if (is_bad_inode(inode)) { + iput(inode); + inode = NULL; + } + return inode; +} + +/* replays a single extent in 'eb' at 'slot' with 'key' into the + * subvolume 'root'. path is released on entry and should be released + * on exit. + * + * extents in the log tree have not been allocated out of the extent + * tree yet. So, this completes the allocation, taking a reference + * as required if the extent already exists or creating a new extent + * if it isn't in the extent allocation tree yet. + * + * The extent is inserted into the file, dropping any existing extents + * from the file that overlap the new one. + */ +static noinline int replay_one_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int found_type; + u64 mask = root->sectorsize - 1; + u64 extent_end; + u64 alloc_hint; + u64 start = key->offset; + struct btrfs_file_extent_item *item; + struct inode *inode = NULL; + unsigned long size; + int ret = 0; + + item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(eb, item); + + if (found_type == BTRFS_FILE_EXTENT_REG) + extent_end = start + btrfs_file_extent_num_bytes(eb, item); + else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + size = btrfs_file_extent_inline_len(eb, + btrfs_item_nr(eb, slot)); + extent_end = (start + size + mask) & ~mask; + } else { + ret = 0; + goto out; + } + + inode = read_one_inode(root, key->objectid); + if (!inode) { + ret = -EIO; + goto out; + } + + /* + * first check to see if we already have this extent in the + * file. This must be done before the btrfs_drop_extents run + * so we don't try to drop this extent. + */ + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + start, 0); + + if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_file_extent_item cmp1; + struct btrfs_file_extent_item cmp2; + struct btrfs_file_extent_item *existing; + struct extent_buffer *leaf; + + leaf = path->nodes[0]; + existing = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + read_extent_buffer(eb, &cmp1, (unsigned long)item, + sizeof(cmp1)); + read_extent_buffer(leaf, &cmp2, (unsigned long)existing, + sizeof(cmp2)); + + /* + * we already have a pointer to this exact extent, + * we don't have to do anything + */ + if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { + btrfs_release_path(root, path); + goto out; + } + } + btrfs_release_path(root, path); + + /* drop any overlapping extents */ + ret = btrfs_drop_extents(trans, root, inode, + start, extent_end, start, &alloc_hint); + BUG_ON(ret); + + BUG_ON(ret); + if (found_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_key ins; + + ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + /* insert the extent pointer in the file */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, path, ins.objectid, ins.offset); + btrfs_release_path(root, path); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + root->root_key.objectid, + trans->transid, key->objectid, start); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + root->root_key.objectid, + trans->transid, key->objectid, + start, &ins); + BUG_ON(ret); + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + /* inline extents are easy, we just overwrite them */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + } + /* btrfs_drop_extents changes i_blocks, update it here */ + inode->i_blocks += (extent_end - start) >> 9; + btrfs_update_inode(trans, root, inode); +out: + if (inode) + iput(inode); + return ret; +} + +/* + * when cleaning up conflicts between the directory names in the + * subvolume, directory names in the log and directory names in the + * inode back references, we may have to unlink inodes from directories. + * + * This is a helper function to do the unlink of a specific directory + * item + */ +static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode *dir, + struct btrfs_dir_item *di) +{ + struct inode *inode; + char *name; + int name_len; + struct extent_buffer *leaf; + struct btrfs_key location; + int ret; + + leaf = path->nodes[0]; + + btrfs_dir_item_key_to_cpu(leaf, di, &location); + name_len = btrfs_dir_name_len(leaf, di); + name = kmalloc(name_len, GFP_NOFS); + read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); + btrfs_release_path(root, path); + + inode = read_one_inode(root, location.objectid); + BUG_ON(!inode); + + btrfs_inc_nlink(inode); + ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); + kfree(name); + + iput(inode); + return ret; +} + +/* + * helper function to see if a given name and sequence number found + * in an inode back reference are already in a directory and correctly + * point to this inode + */ +static noinline int inode_in_dir(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 objectid, u64 index, + const char *name, int name_len) +{ + struct btrfs_dir_item *di; + struct btrfs_key location; + int match = 0; + + di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, + index, name, name_len, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid != objectid) + goto out; + } else + goto out; + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid != objectid) + goto out; + } else + goto out; + match = 1; +out: + btrfs_release_path(root, path); + return match; +} + +/* + * helper function to check a log tree for a named back reference in + * an inode. This is used to decide if a back reference that is + * found in the subvolume conflicts with what we find in the log. + * + * inode backreferences may have multiple refs in a single item, + * during replay we process one reference at a time, and we don't + * want to delete valid links to a file from the subvolume if that + * link is also in the log. + */ +static noinline int backref_in_log(struct btrfs_root *log, + struct btrfs_key *key, + char *name, int namelen) +{ + struct btrfs_path *path; + struct btrfs_inode_ref *ref; + unsigned long ptr; + unsigned long ptr_end; + unsigned long name_ptr; + int found_name_len; + int item_size; + int ret; + int match = 0; + + path = btrfs_alloc_path(); + ret = btrfs_search_slot(NULL, log, key, path, 0, 0); + if (ret != 0) + goto out; + + item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + ptr_end = ptr + item_size; + while (ptr < ptr_end) { + ref = (struct btrfs_inode_ref *)ptr; + found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); + if (found_name_len == namelen) { + name_ptr = (unsigned long)(ref + 1); + ret = memcmp_extent_buffer(path->nodes[0], name, + name_ptr, namelen); + if (ret == 0) { + match = 1; + goto out; + } + } + ptr = (unsigned long)(ref + 1) + found_name_len; + } +out: + btrfs_free_path(path); + return match; +} + + +/* + * replay one inode back reference item found in the log tree. + * eb, slot and key refer to the buffer and key found in the log tree. + * root is the destination we are replaying into, and path is for temp + * use by this function. (it should be released on return). + */ +static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + struct inode *dir; + int ret; + struct btrfs_key location; + struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *inode; + char *name; + int namelen; + unsigned long ref_ptr; + unsigned long ref_end; + + location.objectid = key->objectid; + location.type = BTRFS_INODE_ITEM_KEY; + location.offset = 0; + + /* + * it is possible that we didn't log all the parent directories + * for a given inode. If we don't find the dir, just don't + * copy the back ref in. The link count fixup code will take + * care of the rest + */ + dir = read_one_inode(root, key->offset); + if (!dir) + return -ENOENT; + + inode = read_one_inode(root, key->objectid); + BUG_ON(!dir); + + ref_ptr = btrfs_item_ptr_offset(eb, slot); + ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); + +again: + ref = (struct btrfs_inode_ref *)ref_ptr; + + namelen = btrfs_inode_ref_name_len(eb, ref); + name = kmalloc(namelen, GFP_NOFS); + BUG_ON(!name); + + read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); + + /* if we already have a perfect match, we're done */ + if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, + btrfs_inode_ref_index(eb, ref), + name, namelen)) { + goto out; + } + + /* + * look for a conflicting back reference in the metadata. + * if we find one we have to unlink that name of the file + * before we add our new link. Later on, we overwrite any + * existing back reference, and we don't want to create + * dangling pointers in the directory. + */ +conflict_again: + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret == 0) { + char *victim_name; + int victim_name_len; + struct btrfs_inode_ref *victim_ref; + unsigned long ptr; + unsigned long ptr_end; + struct extent_buffer *leaf = path->nodes[0]; + + /* are we trying to overwrite a back ref for the root directory + * if so, just jump out, we're done + */ + if (key->objectid == key->offset) + goto out_nowrite; + + /* check all the names in this back reference to see + * if they are in the log. if so, we allow them to stay + * otherwise they must be unlinked as a conflict + */ + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); + while(ptr < ptr_end) { + victim_ref = (struct btrfs_inode_ref *)ptr; + victim_name_len = btrfs_inode_ref_name_len(leaf, + victim_ref); + victim_name = kmalloc(victim_name_len, GFP_NOFS); + BUG_ON(!victim_name); + + read_extent_buffer(leaf, victim_name, + (unsigned long)(victim_ref + 1), + victim_name_len); + + if (!backref_in_log(log, key, victim_name, + victim_name_len)) { + btrfs_inc_nlink(inode); + btrfs_release_path(root, path); + ret = btrfs_unlink_inode(trans, root, dir, + inode, victim_name, + victim_name_len); + kfree(victim_name); + btrfs_release_path(root, path); + goto conflict_again; + } + kfree(victim_name); + ptr = (unsigned long)(victim_ref + 1) + victim_name_len; + } + BUG_ON(ret); + } + btrfs_release_path(root, path); + + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + + /* look for a conflicting name */ + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + /* insert our name */ + ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, + btrfs_inode_ref_index(eb, ref)); + BUG_ON(ret); + + btrfs_update_inode(trans, root, inode); + +out: + ref_ptr = (unsigned long)(ref + 1) + namelen; + kfree(name); + if (ref_ptr < ref_end) + goto again; + + /* finally write the back reference in the inode */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + +out_nowrite: + btrfs_release_path(root, path); + iput(dir); + iput(inode); + return 0; +} + +/* + * replay one csum item from the log tree into the subvolume 'root' + * eb, slot and key all refer to the log tree + * path is for temp use by this function and should be released on return + * + * This copies the checksums out of the log tree and inserts them into + * the subvolume. Any existing checksums for this range in the file + * are overwritten, and new items are added where required. + * + * We keep this simple by reusing the btrfs_ordered_sum code from + * the data=ordered mode. This basically means making a copy + * of all the checksums in ram, which we have to do anyway for kmap + * rules. + * + * The copy is then sent down to btrfs_csum_file_blocks, which + * does all the hard work of finding existing items in the file + * or adding new ones. + */ +static noinline int replay_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 cur_offset; + unsigned long file_bytes; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct inode *inode; + unsigned long ptr; + + file_bytes = (item_size / BTRFS_CRC32_SIZE) * root->sectorsize; + inode = read_one_inode(root, key->objectid); + if (!inode) { + return -EIO; + } + + sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); + if (!sums) { + iput(inode); + return -ENOMEM; + } + + INIT_LIST_HEAD(&sums->list); + sums->len = file_bytes; + sums->file_offset = key->offset; + + /* + * copy all the sums into the ordered sum struct + */ + sector_sum = sums->sums; + cur_offset = key->offset; + ptr = btrfs_item_ptr_offset(eb, slot); + while(item_size > 0) { + sector_sum->offset = cur_offset; + read_extent_buffer(eb, §or_sum->sum, ptr, BTRFS_CRC32_SIZE); + sector_sum++; + item_size -= BTRFS_CRC32_SIZE; + ptr += BTRFS_CRC32_SIZE; + cur_offset += root->sectorsize; + } + + /* let btrfs_csum_file_blocks add them into the file */ + ret = btrfs_csum_file_blocks(trans, root, inode, sums); + BUG_ON(ret); + kfree(sums); + iput(inode); + + return 0; +} +/* + * There are a few corners where the link count of the file can't + * be properly maintained during replay. So, instead of adding + * lots of complexity to the log code, we just scan the backrefs + * for any file that has been through replay. + * + * The scan will update the link count on the inode to reflect the + * number of back refs found. If it goes down to zero, the iput + * will free the inode. + */ +static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + u64 nlink = 0; + unsigned long ptr; + unsigned long ptr_end; + int name_len; + + key.objectid = inode->i_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + break; + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid != inode->i_ino || + key.type != BTRFS_INODE_REF_KEY) + break; + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + while(ptr < ptr_end) { + struct btrfs_inode_ref *ref; + + ref = (struct btrfs_inode_ref *)ptr; + name_len = btrfs_inode_ref_name_len(path->nodes[0], + ref); + ptr = (unsigned long)(ref + 1) + name_len; + nlink++; + } + + if (key.offset == 0) + break; + key.offset--; + btrfs_release_path(root, path); + } + btrfs_free_path(path); + if (nlink != inode->i_nlink) { + inode->i_nlink = nlink; + btrfs_update_inode(trans, root, inode); + } + + return 0; +} + +static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + struct inode *inode; + + key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = (u64)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + break; + + if (ret == 1) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || + key.type != BTRFS_ORPHAN_ITEM_KEY) + break; + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_release_path(root, path); + inode = read_one_inode(root, key.offset); + BUG_ON(!inode); + + ret = fixup_inode_link_count(trans, root, inode); + BUG_ON(ret); + + iput(inode); + + if (key.offset == 0) + break; + key.offset--; + } + btrfs_release_path(root, path); + return 0; +} + + +/* + * record a given inode in the fixup dir so we can check its link + * count when replay is done. The link count is incremented here + * so the inode won't go away until we check it + */ +static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_key key; + int ret = 0; + struct inode *inode; + + inode = read_one_inode(root, objectid); + BUG_ON(!inode); + + key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; + btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.offset = objectid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + + btrfs_release_path(root, path); + if (ret == 0) { + btrfs_inc_nlink(inode); + btrfs_update_inode(trans, root, inode); + } else if (ret == -EEXIST) { + ret = 0; + } else { + BUG(); + } + iput(inode); + + return ret; +} + +/* + * when replaying the log for a directory, we only insert names + * for inodes that actually exist. This means an fsync on a directory + * does not implicitly fsync all the new files in it + */ +static noinline int insert_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 index, + char *name, int name_len, u8 type, + struct btrfs_key *location) +{ + struct inode *inode; + struct inode *dir; + int ret; + + inode = read_one_inode(root, location->objectid); + if (!inode) + return -ENOENT; + + dir = read_one_inode(root, dirid); + if (!dir) { + iput(inode); + return -EIO; + } + ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); + + /* FIXME, put inode into FIXUP list */ + + iput(inode); + iput(dir); + return ret; +} + +/* + * take a single entry in a log directory item and replay it into + * the subvolume. + * + * if a conflicting item exists in the subdirectory already, + * the inode it points to is unlinked and put into the link count + * fix up tree. + * + * If a name from the log points to a file or directory that does + * not exist in the FS, it is skipped. fsyncs on directories + * do not force down inodes inside that directory, just changes to the + * names or unlinks in a directory. + */ +static noinline int replay_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, + struct btrfs_dir_item *di, + struct btrfs_key *key) +{ + char *name; + int name_len; + struct btrfs_dir_item *dst_di; + struct btrfs_key found_key; + struct btrfs_key log_key; + struct inode *dir; + struct inode *inode; + u8 log_type; + int ret; + + dir = read_one_inode(root, key->objectid); + BUG_ON(!dir); + + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + log_type = btrfs_dir_type(eb, di); + read_extent_buffer(eb, name, (unsigned long)(di + 1), + name_len); + + btrfs_dir_item_key_to_cpu(eb, di, &log_key); + if (key->type == BTRFS_DIR_ITEM_KEY) { + dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, + name, name_len, 1); + } + else if (key->type == BTRFS_DIR_INDEX_KEY) { + dst_di = btrfs_lookup_dir_index_item(trans, root, path, + key->objectid, + key->offset, name, + name_len, 1); + } else { + BUG(); + } + if (!dst_di || IS_ERR(dst_di)) { + /* we need a sequence number to insert, so we only + * do inserts for the BTRFS_DIR_INDEX_KEY types + */ + if (key->type != BTRFS_DIR_INDEX_KEY) + goto out; + goto insert; + } + + btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); + /* the existing item matches the logged item */ + if (found_key.objectid == log_key.objectid && + found_key.type == log_key.type && + found_key.offset == log_key.offset && + btrfs_dir_type(path->nodes[0], dst_di) == log_type) { + goto out; + } + + /* + * don't drop the conflicting directory entry if the inode + * for the new entry doesn't exist + */ + inode = read_one_inode(root, log_key.objectid); + if (!inode) + goto out; + + iput(inode); + ret = drop_one_dir_item(trans, root, path, dir, dst_di); + BUG_ON(ret); + + if (key->type == BTRFS_DIR_INDEX_KEY) + goto insert; +out: + btrfs_release_path(root, path); + kfree(name); + iput(dir); + return 0; + +insert: + btrfs_release_path(root, path); + ret = insert_one_name(trans, root, path, key->objectid, key->offset, + name, name_len, log_type, &log_key); + + if (ret && ret != -ENOENT) + BUG(); + goto out; +} + +/* + * find all the names in a directory item and reconcile them into + * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than + * one name in a directory item, but the same code gets used for + * both directory index types + */ +static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size = btrfs_item_size_nr(eb, slot); + struct btrfs_dir_item *di; + int name_len; + unsigned long ptr; + unsigned long ptr_end; + + ptr = btrfs_item_ptr_offset(eb, slot); + ptr_end = ptr + item_size; + while(ptr < ptr_end) { + di = (struct btrfs_dir_item *)ptr; + name_len = btrfs_dir_name_len(eb, di); + ret = replay_one_name(trans, root, path, eb, di, key); + BUG_ON(ret); + ptr = (unsigned long)(di + 1); + ptr += name_len; + } + return 0; +} + +/* + * directory replay has two parts. There are the standard directory + * items in the log copied from the subvolume, and range items + * created in the log while the subvolume was logged. + * + * The range items tell us which parts of the key space the log + * is authoritative for. During replay, if a key in the subvolume + * directory is in a logged range item, but not actually in the log + * that means it was deleted from the directory before the fsync + * and should be removed. + */ +static noinline int find_dir_range(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, int key_type, + u64 *start_ret, u64 *end_ret) +{ + struct btrfs_key key; + u64 found_end; + struct btrfs_dir_log_item *item; + int ret; + int nritems; + + if (*start_ret == (u64)-1) + return 1; + + key.objectid = dirid; + key.type = key_type; + key.offset = *start_ret; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + if (ret != 0) + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != key_type || key.objectid != dirid) { + ret = 1; + goto next; + } + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + found_end = btrfs_dir_log_end(path->nodes[0], item); + + if (*start_ret >= key.offset && *start_ret <= found_end) { + ret = 0; + *start_ret = key.offset; + *end_ret = found_end; + goto out; + } + ret = 1; +next: + /* check the next slot in the tree to see if it is a valid item */ + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + } else { + path->slots[0]++; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != key_type || key.objectid != dirid) { + ret = 1; + goto out; + } + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + found_end = btrfs_dir_log_end(path->nodes[0], item); + *start_ret = key.offset; + *end_ret = found_end; + ret = 0; +out: + btrfs_release_path(root, path); + return ret; +} + +/* + * this looks for a given directory item in the log. If the directory + * item is not in the log, the item is removed and the inode it points + * to is unlinked + */ +static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + struct btrfs_path *log_path, + struct inode *dir, + struct btrfs_key *dir_key) +{ + int ret; + struct extent_buffer *eb; + int slot; + u32 item_size; + struct btrfs_dir_item *di; + struct btrfs_dir_item *log_di; + int name_len; + unsigned long ptr; + unsigned long ptr_end; + char *name; + struct inode *inode; + struct btrfs_key location; + +again: + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); + ptr_end = ptr + item_size; + while(ptr < ptr_end) { + di = (struct btrfs_dir_item *)ptr; + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + if (!name) { + ret = -ENOMEM; + goto out; + } + read_extent_buffer(eb, name, (unsigned long)(di + 1), + name_len); + log_di = NULL; + if (dir_key->type == BTRFS_DIR_ITEM_KEY) { + log_di = btrfs_lookup_dir_item(trans, log, log_path, + dir_key->objectid, + name, name_len, 0); + } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { + log_di = btrfs_lookup_dir_index_item(trans, log, + log_path, + dir_key->objectid, + dir_key->offset, + name, name_len, 0); + } + if (!log_di || IS_ERR(log_di)) { + btrfs_dir_item_key_to_cpu(eb, di, &location); + btrfs_release_path(root, path); + btrfs_release_path(log, log_path); + inode = read_one_inode(root, location.objectid); + BUG_ON(!inode); + + ret = link_to_fixup_dir(trans, root, + path, location.objectid); + BUG_ON(ret); + btrfs_inc_nlink(inode); + ret = btrfs_unlink_inode(trans, root, dir, inode, + name, name_len); + BUG_ON(ret); + kfree(name); + iput(inode); + + /* there might still be more names under this key + * check and repeat if required + */ + ret = btrfs_search_slot(NULL, root, dir_key, path, + 0, 0); + if (ret == 0) + goto again; + ret = 0; + goto out; + } + btrfs_release_path(log, log_path); + kfree(name); + + ptr = (unsigned long)(di + 1); + ptr += name_len; + } + ret = 0; +out: + btrfs_release_path(root, path); + btrfs_release_path(log, log_path); + return ret; +} + +/* + * deletion replay happens before we copy any new directory items + * out of the log or out of backreferences from inodes. It + * scans the log to find ranges of keys that log is authoritative for, + * and then scans the directory to find items in those ranges that are + * not present in the log. + * + * Anything we don't find in the log is unlinked and removed from the + * directory. + */ +static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + u64 dirid) +{ + u64 range_start; + u64 range_end; + int key_type = BTRFS_DIR_LOG_ITEM_KEY; + int ret = 0; + struct btrfs_key dir_key; + struct btrfs_key found_key; + struct btrfs_path *log_path; + struct inode *dir; + + dir_key.objectid = dirid; + dir_key.type = BTRFS_DIR_ITEM_KEY; + log_path = btrfs_alloc_path(); + if (!log_path) + return -ENOMEM; + + dir = read_one_inode(root, dirid); + /* it isn't an error if the inode isn't there, that can happen + * because we replay the deletes before we copy in the inode item + * from the log + */ + if (!dir) { + btrfs_free_path(log_path); + return 0; + } +again: + range_start = 0; + range_end = 0; + while(1) { + ret = find_dir_range(log, path, dirid, key_type, + &range_start, &range_end); + if (ret != 0) + break; + + dir_key.offset = range_start; + while(1) { + int nritems; + ret = btrfs_search_slot(NULL, root, &dir_key, path, + 0, 0); + if (ret < 0) + goto out; + + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != dirid || + found_key.type != dir_key.type) + goto next_type; + + if (found_key.offset > range_end) + break; + + ret = check_item_in_log(trans, root, log, path, + log_path, dir, &found_key); + BUG_ON(ret); + if (found_key.offset == (u64)-1) + break; + dir_key.offset = found_key.offset + 1; + } + btrfs_release_path(root, path); + if (range_end == (u64)-1) + break; + range_start = range_end + 1; + } + +next_type: + ret = 0; + if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { + key_type = BTRFS_DIR_LOG_INDEX_KEY; + dir_key.type = BTRFS_DIR_INDEX_KEY; + btrfs_release_path(root, path); + goto again; + } +out: + btrfs_release_path(root, path); + btrfs_free_path(log_path); + iput(dir); + return ret; +} + +/* + * the process_func used to replay items from the log tree. This + * gets called in two different stages. The first stage just looks + * for inodes and makes sure they are all copied into the subvolume. + * + * The second stage copies all the other item types from the log into + * the subvolume. The two stage approach is slower, but gets rid of + * lots of complexity around inodes referencing other inodes that exist + * only in the log (references come from either directory items or inode + * back refs). + */ +static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, + struct walk_control *wc, u64 gen) +{ + int nritems; + struct btrfs_path *path; + struct btrfs_root *root = wc->replay_dest; + struct btrfs_key key; + u32 item_size; + int level; + int i; + int ret; + + btrfs_read_buffer(eb, gen); + + level = btrfs_header_level(eb); + + if (level != 0) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + item_size = btrfs_item_size_nr(eb, i); + + /* inode keys are done during the first stage */ + if (key.type == BTRFS_INODE_ITEM_KEY && + wc->stage == LOG_WALK_REPLAY_INODES) { + struct inode *inode; + struct btrfs_inode_item *inode_item; + u32 mode; + + inode_item = btrfs_item_ptr(eb, i, + struct btrfs_inode_item); + mode = btrfs_inode_mode(eb, inode_item); + if (S_ISDIR(mode)) { + ret = replay_dir_deletes(wc->trans, + root, log, path, key.objectid); + BUG_ON(ret); + } + ret = overwrite_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + + /* for regular files, truncate away + * extents past the new EOF + */ + if (S_ISREG(mode)) { + inode = read_one_inode(root, + key.objectid); + BUG_ON(!inode); + + ret = btrfs_truncate_inode_items(wc->trans, + root, inode, inode->i_size, + BTRFS_EXTENT_DATA_KEY); + BUG_ON(ret); + iput(inode); + } + ret = link_to_fixup_dir(wc->trans, root, + path, key.objectid); + BUG_ON(ret); + } + if (wc->stage < LOG_WALK_REPLAY_ALL) + continue; + + /* these keys are simply copied */ + if (key.type == BTRFS_XATTR_ITEM_KEY) { + ret = overwrite_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_INODE_REF_KEY) { + ret = add_inode_ref(wc->trans, root, log, path, + eb, i, &key); + BUG_ON(ret && ret != -ENOENT); + } else if (key.type == BTRFS_EXTENT_DATA_KEY) { + ret = replay_one_extent(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_CSUM_ITEM_KEY) { + ret = replay_one_csum(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_DIR_ITEM_KEY || + key.type == BTRFS_DIR_INDEX_KEY) { + ret = replay_one_dir_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } + } + btrfs_free_path(path); + return 0; +} + +static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + struct walk_control *wc) +{ + u64 root_owner; + u64 root_gen; + u64 bytenr; + u64 ptr_gen; + struct extent_buffer *next; + struct extent_buffer *cur; + struct extent_buffer *parent; + u32 blocksize; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (btrfs_header_level(cur) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(cur)) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + + next = btrfs_find_create_tree_block(root, bytenr, blocksize); + + wc->process_func(root, next, wc, ptr_gen); + + if (*level == 1) { + path->slots[*level]++; + if (wc->free) { + btrfs_read_buffer(next, ptr_gen); + + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + ret = btrfs_drop_leaf_ref(trans, root, next); + BUG_ON(ret); + + WARN_ON(root_owner != + BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, root_owner, + root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(next); + continue; + } + btrfs_read_buffer(next, ptr_gen); + + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + free_extent_buffer(path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(next); + path->slots[*level] = 0; + cond_resched(); + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + } + bytenr = path->nodes[*level]->start; + + blocksize = btrfs_level_size(root, *level); + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + + wc->process_func(root, path->nodes[*level], wc, + btrfs_header_generation(path->nodes[*level])); + + if (wc->free) { + next = path->nodes[*level]; + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, next); + BUG_ON(ret); + } + WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, bytenr, blocksize, + root_owner, root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + + cond_resched(); + return 0; +} + +static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + struct walk_control *wc) +{ + u64 root_owner; + u64 root_gen; + int i; + int slot; + int ret; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + node = path->nodes[i]; + path->slots[i]++; + *level = i; + WARN_ON(*level == 0); + return 0; + } else { + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + root_gen = + btrfs_header_generation(path->nodes[*level]); + } else { + struct extent_buffer *node; + node = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(node); + root_gen = btrfs_header_generation(node); + } + wc->process_func(root, path->nodes[*level], wc, + btrfs_header_generation(path->nodes[*level])); + if (wc->free) { + struct extent_buffer *next; + + next = path->nodes[*level]; + + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, + next); + BUG_ON(ret); + } + + WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +/* + * drop the reference count on the tree rooted at 'snap'. This traverses + * the tree freeing any blocks that have a ref count of zero after being + * decremented. + */ +static int walk_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *log, struct walk_control *wc) +{ + int ret = 0; + int wret; + int level; + struct btrfs_path *path; + int i; + int orig_level; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + level = btrfs_header_level(log->node); + orig_level = level; + path->nodes[level] = log->node; + extent_buffer_get(log->node); + path->slots[level] = 0; + + while(1) { + wret = walk_down_log_tree(trans, log, path, &level, wc); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = walk_up_log_tree(trans, log, path, &level, wc); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + } + + /* was the root node processed? if not, catch it here */ + if (path->nodes[orig_level]) { + wc->process_func(log, path->nodes[orig_level], wc, + btrfs_header_generation(path->nodes[orig_level])); + if (wc->free) { + struct extent_buffer *next; + + next = path->nodes[orig_level]; + + btrfs_tree_lock(next); + clean_tree_block(trans, log, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (orig_level == 0) { + ret = btrfs_drop_leaf_ref(trans, log, + next); + BUG_ON(ret); + } + WARN_ON(log->root_key.objectid != + BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, log, + next->start, next->len, + log->root_key.objectid, + btrfs_header_generation(next), + 0, 0, 1); + BUG_ON(ret); + } + } + + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + } + btrfs_free_path(path); + if (wc->free) + free_extent_buffer(log->node); + return ret; +} + +int wait_log_commit(struct btrfs_root *log) +{ + DEFINE_WAIT(wait); + u64 transid = log->fs_info->tree_log_transid; + + do { + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) + schedule(); + finish_wait(&log->fs_info->tree_log_wait, &wait); + mutex_lock(&log->fs_info->tree_log_mutex); + } while(transid == log->fs_info->tree_log_transid && + atomic_read(&log->fs_info->tree_log_commit)); + return 0; +} + +/* + * btrfs_sync_log does sends a given tree log down to the disk and + * updates the super blocks to record it. When this call is done, + * you know that any inodes previously logged are safely on disk + */ +int btrfs_sync_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + unsigned long batch; + struct btrfs_root *log = root->log_root; + struct walk_control wc = { + .write = 1, + .process_func = process_one_buffer + }; + + mutex_lock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) { + wait_log_commit(log); + goto out; + } + atomic_set(&log->fs_info->tree_log_commit, 1); + + while(1) { + mutex_unlock(&log->fs_info->tree_log_mutex); + schedule_timeout_uninterruptible(1); + mutex_lock(&log->fs_info->tree_log_mutex); + batch = log->fs_info->tree_log_batch; + + while(atomic_read(&log->fs_info->tree_log_writers)) { + DEFINE_WAIT(wait); + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + batch = log->fs_info->tree_log_batch; + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_writers)) + schedule(); + mutex_lock(&log->fs_info->tree_log_mutex); + finish_wait(&log->fs_info->tree_log_wait, &wait); + } + if (batch == log->fs_info->tree_log_batch) + break; + } + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + BUG_ON(ret); + + wc.wait = 1; + + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + BUG_ON(ret); + + btrfs_set_super_log_root(&root->fs_info->super_for_commit, + log->fs_info->log_root_tree->node->start); + btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, + btrfs_header_level(log->fs_info->log_root_tree->node)); + + write_ctree_super(trans, log->fs_info->tree_root); + log->fs_info->tree_log_transid++; + log->fs_info->tree_log_batch = 0; + atomic_set(&log->fs_info->tree_log_commit, 0); + smp_mb(); + if (waitqueue_active(&log->fs_info->tree_log_wait)) + wake_up(&log->fs_info->tree_log_wait); +out: + mutex_unlock(&log->fs_info->tree_log_mutex); + return 0; + +} + +/* + * free all the extents used by the tree log. This should be called + * at commit time of the full transaction + */ +int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) +{ + int ret; + struct btrfs_root *log; + struct key; + struct walk_control wc = { + .free = 1, + .process_func = process_one_buffer + }; + + if (!root->log_root) + return 0; + + log = root->log_root; + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + log = root->log_root; + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, + &log->root_key); + BUG_ON(ret); + root->log_root = NULL; + kfree(root->log_root); + return 0; +} + +/* + * helper function to update the item for a given subvolumes log root + * in the tree of log roots + */ +static int update_log_root(struct btrfs_trans_handle *trans, + struct btrfs_root *log) +{ + u64 bytenr = btrfs_root_bytenr(&log->root_item); + int ret; + + if (log->node->start == bytenr) + return 0; + + btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + BUG_ON(ret); + return ret; +} + +/* + * If both a file and directory are logged, and unlinks or renames are + * mixed in, we have a few interesting corners: + * + * create file X in dir Y + * link file X to X.link in dir Y + * fsync file X + * unlink file X but leave X.link + * fsync dir Y + * + * After a crash we would expect only X.link to exist. But file X + * didn't get fsync'd again so the log has back refs for X and X.link. + * + * We solve this by removing directory entries and inode backrefs from the + * log when a file that was logged in the current transaction is + * unlinked. Any later fsync will include the updated log entries, and + * we'll be able to reconstruct the proper directory items from backrefs. + * + * This optimizations allows us to avoid relogging the entire inode + * or the entire directory. + */ +int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *dir, u64 index) +{ + struct btrfs_root *log; + struct btrfs_dir_item *di; + struct btrfs_path *path; + int ret; + int bytes_del = 0; + + ret = join_running_log_trans(root); + if (ret) + return 0; + + mutex_lock(&BTRFS_I(dir)->log_mutex); + + log = root->log_root; + path = btrfs_alloc_path(); + di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, + name, name_len, -1); + if (di && !IS_ERR(di)) { + ret = btrfs_delete_one_dir_name(trans, log, path, di); + bytes_del += name_len; + BUG_ON(ret); + } + btrfs_release_path(log, path); + di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, + index, name, name_len, -1); + if (di && !IS_ERR(di)) { + ret = btrfs_delete_one_dir_name(trans, log, path, di); + bytes_del += name_len; + BUG_ON(ret); + } + + /* update the directory size in the log to reflect the names + * we have removed + */ + if (bytes_del) { + struct btrfs_key key; + + key.objectid = dir->i_ino; + key.offset = 0; + key.type = BTRFS_INODE_ITEM_KEY; + btrfs_release_path(log, path); + + ret = btrfs_search_slot(trans, log, &key, path, 0, 1); + if (ret == 0) { + struct btrfs_inode_item *item; + u64 i_size; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + i_size = btrfs_inode_size(path->nodes[0], item); + if (i_size > bytes_del) + i_size -= bytes_del; + else + i_size = 0; + btrfs_set_inode_size(path->nodes[0], item, i_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + } else + ret = 0; + btrfs_release_path(log, path); + } + + btrfs_free_path(path); + mutex_unlock(&BTRFS_I(dir)->log_mutex); + end_log_trans(root); + + return 0; +} + +/* see comments for btrfs_del_dir_entries_in_log */ +int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *inode, u64 dirid) +{ + struct btrfs_root *log; + u64 index; + int ret; + + ret = join_running_log_trans(root); + if (ret) + return 0; + log = root->log_root; + mutex_lock(&BTRFS_I(inode)->log_mutex); + + ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, + dirid, &index); + mutex_unlock(&BTRFS_I(inode)->log_mutex); + end_log_trans(root); + + if (ret == 0 || ret == -ENOENT) + return 0; + return ret; +} + +/* + * creates a range item in the log for 'dirid'. first_offset and + * last_offset tell us which parts of the key space the log should + * be considered authoritative for. + */ +static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + int key_type, u64 dirid, + u64 first_offset, u64 last_offset) +{ + int ret; + struct btrfs_key key; + struct btrfs_dir_log_item *item; + + key.objectid = dirid; + key.offset = first_offset; + if (key_type == BTRFS_DIR_ITEM_KEY) + key.type = BTRFS_DIR_LOG_ITEM_KEY; + else + key.type = BTRFS_DIR_LOG_INDEX_KEY; + ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); + BUG_ON(ret); + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + btrfs_set_dir_log_end(path->nodes[0], item, last_offset); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(log, path); + return 0; +} + +/* + * log all the items included in the current transaction for a given + * directory. This also creates the range items in the log tree required + * to replay anything deleted before the fsync + */ +static noinline int log_dir_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path, int key_type, + u64 min_offset, u64 *last_offset_ret) +{ + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; + struct extent_buffer *src; + int ret; + int i; + int nritems; + u64 first_offset = min_offset; + u64 last_offset = (u64)-1; + + log = root->log_root; + max_key.objectid = inode->i_ino; + max_key.offset = (u64)-1; + max_key.type = key_type; + + min_key.objectid = inode->i_ino; + min_key.type = key_type; + min_key.offset = min_offset; + + path->keep_locks = 1; + + ret = btrfs_search_forward(root, &min_key, &max_key, + path, 0, trans->transid); + + /* + * we didn't find anything from this transaction, see if there + * is anything at all + */ + if (ret != 0 || min_key.objectid != inode->i_ino || + min_key.type != key_type) { + min_key.objectid = inode->i_ino; + min_key.type = key_type; + min_key.offset = (u64)-1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret < 0) { + btrfs_release_path(root, path); + return ret; + } + ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + + /* if ret == 0 there are items for this type, + * create a range to tell us the last key of this type. + * otherwise, there are no items in this directory after + * *min_offset, and we create a range to indicate that. + */ + if (ret == 0) { + struct btrfs_key tmp; + btrfs_item_key_to_cpu(path->nodes[0], &tmp, + path->slots[0]); + if (key_type == tmp.type) { + first_offset = max(min_offset, tmp.offset) + 1; + } + } + goto done; + } + + /* go backward to find any previous key */ + ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + if (ret == 0) { + struct btrfs_key tmp; + btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); + if (key_type == tmp.type) { + first_offset = tmp.offset; + ret = overwrite_item(trans, log, dst_path, + path->nodes[0], path->slots[0], + &tmp); + } + } + btrfs_release_path(root, path); + + /* find the first key from this transaction again */ + ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret != 0) { + WARN_ON(1); + goto done; + } + + /* + * we have a block from this transaction, log every item in it + * from our directory + */ + while(1) { + struct btrfs_key tmp; + src = path->nodes[0]; + nritems = btrfs_header_nritems(src); + for (i = path->slots[0]; i < nritems; i++) { + btrfs_item_key_to_cpu(src, &min_key, i); + + if (min_key.objectid != inode->i_ino || + min_key.type != key_type) + goto done; + ret = overwrite_item(trans, log, dst_path, src, i, + &min_key); + BUG_ON(ret); + } + path->slots[0] = nritems; + + /* + * look ahead to the next item and see if it is also + * from this directory and from this transaction + */ + ret = btrfs_next_leaf(root, path); + if (ret == 1) { + last_offset = (u64)-1; + goto done; + } + btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); + if (tmp.objectid != inode->i_ino || tmp.type != key_type) { + last_offset = (u64)-1; + goto done; + } + if (btrfs_header_generation(path->nodes[0]) != trans->transid) { + ret = overwrite_item(trans, log, dst_path, + path->nodes[0], path->slots[0], + &tmp); + + BUG_ON(ret); + last_offset = tmp.offset; + goto done; + } + } +done: + *last_offset_ret = last_offset; + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + + /* insert the log range keys to indicate where the log is valid */ + ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, + first_offset, last_offset); + BUG_ON(ret); + return 0; +} + +/* + * logging directories is very similar to logging inodes, We find all the items + * from the current transaction and write them to the log. + * + * The recovery code scans the directory in the subvolume, and if it finds a + * key in the range logged that is not present in the log tree, then it means + * that dir entry was unlinked during the transaction. + * + * In order for that scan to work, we must include one key smaller than + * the smallest logged by this transaction and one key larger than the largest + * key logged by this transaction. + */ +static noinline int log_directory_changes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + u64 min_key; + u64 max_key; + int ret; + int key_type = BTRFS_DIR_ITEM_KEY; + +again: + min_key = 0; + max_key = 0; + while(1) { + ret = log_dir_items(trans, root, inode, path, + dst_path, key_type, min_key, + &max_key); + BUG_ON(ret); + if (max_key == (u64)-1) + break; + min_key = max_key + 1; + } + + if (key_type == BTRFS_DIR_ITEM_KEY) { + key_type = BTRFS_DIR_INDEX_KEY; + goto again; + } + return 0; +} + +/* + * a helper function to drop items from the log before we relog an + * inode. max_key_type indicates the highest item type to remove. + * This cannot be run for file data extents because it does not + * free the extents they point to. + */ +static int drop_objectid_items(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + u64 objectid, int max_key_type) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = objectid; + key.type = max_key_type; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, log, &key, path, -1, 1); + + if (ret != 1) + break; + + if (path->slots[0] == 0) + break; + + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + + if (found_key.objectid != objectid) + break; + + ret = btrfs_del_item(trans, log, path); + BUG_ON(ret); + btrfs_release_path(log, path); + } + btrfs_release_path(log, path); + return 0; +} + +/* log a single inode in the tree log. + * At least one parent directory for this inode must exist in the tree + * or be logged already. + * + * Any items from this inode changed by the current transaction are copied + * to the log tree. An extra reference is taken on any extents in this + * file, allowing us to avoid a whole pile of corner cases around logging + * blocks that have been removed from the tree. + * + * See LOG_INODE_ALL and related defines for a description of what inode_only + * does. + * + * This handles both files and directories. + */ +static int __btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only) +{ + struct btrfs_path *path; + struct btrfs_path *dst_path; + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; + unsigned long src_offset; + unsigned long dst_offset; + struct extent_buffer *src; + struct btrfs_file_extent_item *extent; + struct btrfs_inode_item *inode_item; + u32 size; + int ret; + + log = root->log_root; + + path = btrfs_alloc_path(); + dst_path = btrfs_alloc_path(); + + min_key.objectid = inode->i_ino; + min_key.type = BTRFS_INODE_ITEM_KEY; + min_key.offset = 0; + + max_key.objectid = inode->i_ino; + if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) + max_key.type = BTRFS_XATTR_ITEM_KEY; + else + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + + /* + * if this inode has already been logged and we're in inode_only + * mode, we don't want to delete the things that have already + * been written to the log. + * + * But, if the inode has been through an inode_only log, + * the logged_trans field is not set. This allows us to catch + * any new names for this inode in the backrefs by logging it + * again + */ + if (inode_only == LOG_INODE_EXISTS && + BTRFS_I(inode)->logged_trans == trans->transid) { + btrfs_free_path(path); + btrfs_free_path(dst_path); + goto out; + } + mutex_lock(&BTRFS_I(inode)->log_mutex); + + /* + * a brute force approach to making sure we get the most uptodate + * copies of everything. + */ + if (S_ISDIR(inode->i_mode)) { + int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; + + if (inode_only == LOG_INODE_EXISTS) + max_key_type = BTRFS_XATTR_ITEM_KEY; + ret = drop_objectid_items(trans, log, path, + inode->i_ino, max_key_type); + } else { + ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); + } + BUG_ON(ret); + path->keep_locks = 1; + + while(1) { + ret = btrfs_search_forward(root, &min_key, &max_key, + path, 0, trans->transid); + if (ret != 0) + break; + + if (min_key.objectid != inode->i_ino) + break; + if (min_key.type > max_key.type) + break; + + src = path->nodes[0]; + size = btrfs_item_size_nr(src, path->slots[0]); + ret = btrfs_insert_empty_item(trans, log, dst_path, &min_key, + size); + if (ret) + BUG(); + + dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], + dst_path->slots[0]); + + src_offset = btrfs_item_ptr_offset(src, path->slots[0]); + + copy_extent_buffer(dst_path->nodes[0], src, dst_offset, + src_offset, size); + + if (inode_only == LOG_INODE_EXISTS && + min_key.type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(dst_path->nodes[0], + dst_path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); + + /* set the generation to zero so the recover code + * can tell the difference between an logging + * just to say 'this inode exists' and a logging + * to say 'update this inode with these values' + */ + btrfs_set_inode_generation(dst_path->nodes[0], + inode_item, 0); + } + /* take a reference on file data extents so that truncates + * or deletes of this inode don't have to relog the inode + * again + */ + if (btrfs_key_type(&min_key) == BTRFS_EXTENT_DATA_KEY) { + int found_type; + extent = btrfs_item_ptr(src, path->slots[0], + struct btrfs_file_extent_item); + + found_type = btrfs_file_extent_type(src, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 ds = btrfs_file_extent_disk_bytenr(src, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(src, + extent); + /* ds == 0 is a hole */ + if (ds != 0) { + ret = btrfs_inc_extent_ref(trans, log, + ds, dl, + log->root_key.objectid, + 0, + inode->i_ino, + min_key.offset); + BUG_ON(ret); + } + } + } + + btrfs_mark_buffer_dirty(dst_path->nodes[0]); + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + + if (min_key.offset < (u64)-1) + min_key.offset++; + else if (min_key.type < (u8)-1) + min_key.type++; + else if (min_key.objectid < (u64)-1) + min_key.objectid++; + else + break; + } + if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + ret = log_directory_changes(trans, root, inode, path, dst_path); + BUG_ON(ret); + } + mutex_unlock(&BTRFS_I(inode)->log_mutex); + + btrfs_free_path(path); + btrfs_free_path(dst_path); + + mutex_lock(&root->fs_info->tree_log_mutex); + ret = update_log_root(trans, log); + BUG_ON(ret); + mutex_unlock(&root->fs_info->tree_log_mutex); +out: + return 0; +} + +int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only) +{ + int ret; + + start_log_trans(trans, root); + ret = __btrfs_log_inode(trans, root, inode, inode_only); + end_log_trans(root); + return ret; +} + +/* + * helper function around btrfs_log_inode to make sure newly created + * parent directories also end up in the log. A minimal inode and backref + * only logging is done of any parent directories that are older than + * the last committed transaction + */ +int btrfs_log_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry) +{ + int inode_only = LOG_INODE_ALL; + struct super_block *sb; + int ret; + + start_log_trans(trans, root); + sb = dentry->d_inode->i_sb; + while(1) { + ret = __btrfs_log_inode(trans, root, dentry->d_inode, + inode_only); + BUG_ON(ret); + inode_only = LOG_INODE_EXISTS; + + dentry = dentry->d_parent; + if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) + break; + + if (BTRFS_I(dentry->d_inode)->generation <= + root->fs_info->last_trans_committed) + break; + } + end_log_trans(root); + return 0; +} + +/* + * it is not safe to log dentry if the chunk root has added new + * chunks. This returns 0 if the dentry was logged, and 1 otherwise. + * If this returns 1, you must commit the transaction to safely get your + * data on disk. + */ +int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry) +{ + u64 gen; + gen = root->fs_info->last_trans_new_blockgroup; + if (gen > root->fs_info->last_trans_committed) + return 1; + else + return btrfs_log_dentry(trans, root, dentry); +} + +/* + * should be called during mount to recover any replay any log trees + * from the FS + */ +int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) +{ + int ret; + struct btrfs_path *path; + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_key tmp_key; + struct btrfs_root *log; + struct btrfs_fs_info *fs_info = log_root_tree->fs_info; + struct walk_control wc = { + .process_func = process_one_buffer, + .stage = 0, + }; + + fs_info->log_root_recovering = 1; + path = btrfs_alloc_path(); + BUG_ON(!path); + + trans = btrfs_start_transaction(fs_info->tree_root, 1); + + wc.trans = trans; + wc.pin = 1; + + walk_log_tree(trans, log_root_tree, &wc); + +again: + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = (u64)-1; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + + while(1) { + ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); + if (ret < 0) + break; + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + btrfs_release_path(log_root_tree, path); + if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) + break; + + log = btrfs_read_fs_root_no_radix(log_root_tree, + &found_key); + BUG_ON(!log); + + + tmp_key.objectid = found_key.offset; + tmp_key.type = BTRFS_ROOT_ITEM_KEY; + tmp_key.offset = (u64)-1; + + wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); + + BUG_ON(!wc.replay_dest); + + btrfs_record_root_in_trans(wc.replay_dest); + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + if (wc.stage == LOG_WALK_REPLAY_ALL) { + ret = fixup_inode_link_counts(trans, wc.replay_dest, + path); + BUG_ON(ret); + } + + key.offset = found_key.offset - 1; + free_extent_buffer(log->node); + kfree(log); + + if (found_key.offset == 0) + break; + } + btrfs_release_path(log_root_tree, path); + + /* step one is to pin it all, step two is to replay just inodes */ + if (wc.pin) { + wc.pin = 0; + wc.process_func = replay_one_buffer; + wc.stage = LOG_WALK_REPLAY_INODES; + goto again; + } + /* step three is to replay everything */ + if (wc.stage < LOG_WALK_REPLAY_ALL) { + wc.stage++; + goto again; + } + + btrfs_free_path(path); + + free_extent_buffer(log_root_tree->node); + log_root_tree->log_root = NULL; + fs_info->log_root_recovering = 0; + + /* step 4: commit the transaction, which also unpins the blocks */ + btrfs_commit_transaction(trans, fs_info->tree_root); + + kfree(log_root_tree); + return 0; +} diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h new file mode 100644 index 00000000000..b9409b32ed0 --- /dev/null +++ b/fs/btrfs/tree-log.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __TREE_LOG_ +#define __TREE_LOG_ + +int btrfs_sync_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_log_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry); +int btrfs_recover_log_trees(struct btrfs_root *tree_root); +int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry); +int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only); +int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *dir, u64 index); +int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *inode, u64 dirid); +#endif -- cgit v1.2.3-70-g09d2 From d0c803c4049c5ca322d4795d8b74f28768603e0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 16:17:57 -0400 Subject: Btrfs: Record dirty pages tree-log pages in an extent_io tree This is the same way the transaction code makes sure that all the other tree blocks are safely on disk. There's an extent_io tree for each root, and any blocks allocated to the tree logs are recorded in that tree. At tree-log sync, the extent_io tree is walked to flush down the dirty pages and wait for them. The main benefit is less time spent walking the tree log and skipping clean pages, and getting sequential IO down to the drive. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 17 +++++++++++++++-- fs/btrfs/extent-tree.c | 7 ++++++- fs/btrfs/transaction.c | 21 ++++++++++++++------- fs/btrfs/transaction.h | 2 ++ fs/btrfs/tree-log.c | 29 +++++++++++++++-------------- 6 files changed, 54 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2ed6918f32e..eb65fd80888 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -668,6 +668,8 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct extent_io_tree dirty_log_pages; + struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5edb7f88579..57fbf107e59 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -777,6 +777,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -819,11 +821,23 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct extent_buffer *eb; + struct btrfs_root *log_root_tree = fs_info->log_root_tree; + u64 start = 0; + u64 end = 0; int ret; - if (!fs_info->log_root_tree) + if (!log_root_tree) return 0; + while(1) { + ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log_root_tree->dirty_log_pages, + start, end, GFP_NOFS); + } eb = fs_info->log_root_tree->node; WARN_ON(btrfs_header_level(eb) != 0); @@ -1412,7 +1426,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c479d71e286..c0bb6b9ac4c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2392,8 +2392,13 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); btrfs_set_buffer_uptodate(buf); - set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + set_extent_dirty(&root->dirty_log_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + } else { + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); + } trans->blocks_used++; return buf; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 61a377bcb2f..151b00d5259 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -302,23 +302,18 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, } -int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages) { int ret; int err = 0; int werr = 0; - struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start = 0; u64 end; unsigned long index; - if (!trans || !trans->transaction) { - return filemap_write_and_wait(btree_inode->i_mapping); - } - dirty_pages = &trans->transaction->dirty_pages; while(1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); @@ -385,6 +380,18 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, return werr; } +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (!trans || !trans->transaction) { + struct inode *btree_inode; + btree_inode = root->fs_info->btree_inode; + return filemap_write_and_wait(btree_inode->i_mapping); + } + return btrfs_write_and_wait_marked_extents(root, + &trans->transaction->dirty_pages); +} + static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index cc63650d60d..eef2cb7d7e7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -99,4 +99,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_root *root); +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages); #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ae96451bc22..bfa71080096 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1954,10 +1954,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, int ret; unsigned long batch; struct btrfs_root *log = root->log_root; - struct walk_control wc = { - .write = 1, - .process_func = process_one_buffer - }; mutex_lock(&log->fs_info->tree_log_mutex); if (atomic_read(&log->fs_info->tree_log_commit)) { @@ -1985,18 +1981,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (batch == log->fs_info->tree_log_batch) break; } - ret = walk_log_tree(trans, log, &wc); - BUG_ON(ret); - - ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); - BUG_ON(ret); - - wc.wait = 1; - ret = walk_log_tree(trans, log, &wc); + ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); - - ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, + &root->fs_info->log_root_tree->dirty_log_pages); BUG_ON(ret); btrfs_set_super_log_root(&root->fs_info->super_for_commit, @@ -2025,6 +2014,8 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) int ret; struct btrfs_root *log; struct key; + u64 start; + u64 end; struct walk_control wc = { .free = 1, .process_func = process_one_buffer @@ -2037,6 +2028,16 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); + while(1) { + ret = find_first_extent_bit(&log->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log->dirty_log_pages, + start, end, GFP_NOFS); + } + log = root->log_root; ret = btrfs_del_root(trans, root->fs_info->log_root_tree, &log->root_key); -- cgit v1.2.3-70-g09d2 From 3de4586c5278a28107030c336956381f69ff7a9d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Nov 2008 21:02:50 -0500 Subject: Btrfs: Allow subvolumes and snapshots anywhere in the directory tree Before, all snapshots and subvolumes lived in a single flat directory. This was awkward and confusing because the single flat directory was only writable with the ioctls. This commit changes the ioctls to create subvols and snapshots at any point in the directory tree. This requires making separate ioctls for snapshot and subvol creation instead of a combining them into one. The subvol ioctl does: btrfsctl -S subvol_name parent_dir After the ioctl is done subvol_name lives inside parent_dir. The snapshot ioctl does: btrfsctl -s path_for_snapshot root_to_snapshot path_for_snapshot can be an absolute or relative path. btrfsctl breaks it up into directory and basename components. root_to_snapshot can be any file or directory in the FS. The snapshot is taken of the entire root where that file lives. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +++-- fs/btrfs/disk-io.c | 14 +++++++--- fs/btrfs/inode.c | 50 ++++++++++++++++++++++------------- fs/btrfs/ioctl.c | 71 ++++++++++++++++++++++++++++++++++++-------------- fs/btrfs/ioctl.h | 7 +++-- fs/btrfs/super.c | 10 +++---- fs/btrfs/transaction.c | 66 +++++++++++++++++++++++++++++++++++++--------- fs/btrfs/transaction.h | 2 ++ 8 files changed, 162 insertions(+), 65 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5ff74282a62..5611f8e035a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -606,6 +606,7 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + struct btrfs_root *fs_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -758,7 +759,6 @@ struct btrfs_root { struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; - struct inode *inode; struct extent_io_tree dirty_log_pages; struct kobject root_kobj; @@ -1876,6 +1876,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); +int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, struct inode *inode, @@ -1896,9 +1898,6 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group); -void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, - int namelen); - int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 82833e5d84b..0a5350573f6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -838,7 +838,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u64 objectid) { root->node = NULL; - root->inode = NULL; root->commit_root = NULL; root->ref_tree = NULL; root->sectorsize = sectorsize; @@ -1430,6 +1429,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; u64 generation; + struct btrfs_key location; struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -1729,7 +1729,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_cleaner; if (sb->s_flags & MS_RDONLY) - return tree_root; + goto read_fs_root; if (btrfs_super_log_root(disk_super) != 0) { u32 blocksize; @@ -1755,6 +1755,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); + location.objectid = BTRFS_FS_TREE_OBJECTID; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + +read_fs_root: + fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); + if (!fs_info->fs_root) + goto fail_cleaner; return tree_root; fail_cleaner: @@ -1944,8 +1952,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) (unsigned long)root->root_key.objectid); if (root->in_sysfs) btrfs_sysfs_del_root(root); - if (root->inode) - iput(root->inode); if (root->node) free_extent_buffer(root->node); if (root->commit_root) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e3620e69bb..e163b1b7470 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3038,8 +3038,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return inode; } -static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) +struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { struct inode * inode; struct btrfs_inode *bi = BTRFS_I(dir); @@ -3067,13 +3066,21 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); if (IS_ERR(inode)) return ERR_CAST(inode); - - /* the inode and parent dir are two different roots */ - if (new && root != sub_root) { - igrab(inode); - sub_root->inode = inode; - } } + return inode; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + inode = btrfs_lookup_dentry(dir, dentry); + if (IS_ERR(inode)) + return ERR_CAST(inode); return d_splice_alias(inode, dentry); } @@ -3129,7 +3136,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, return 0; filp->f_pos = 2; } - path = btrfs_alloc_path(); path->reada = 2; @@ -3159,6 +3165,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, path->slots[0]++; } } + advance = 1; item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot); @@ -3194,16 +3201,25 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + + /* is this a reference to our own snapshot? If so + * skip it + */ + if (location.type == BTRFS_ROOT_ITEM_KEY && + location.objectid == root->root_key.objectid) { + over = 0; + goto skip; + } over = filldir(dirent, name_ptr, name_len, found_key.offset, location.objectid, d_type); +skip: if (name_ptr != tmp_name) kfree(name_ptr); if (over) goto nopos; - di_len = btrfs_dir_name_len(leaf, di) + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; @@ -3318,8 +3334,7 @@ out: * helper to find a free sequence number in a given directory. This current * code is very simple, later versions will do smarter things in the btree */ -static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, - u64 *index) +int btrfs_set_inode_index(struct inode *dir, u64 *index) { int ret = 0; @@ -3365,7 +3380,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); if (dir) { - ret = btrfs_set_inode_index(dir, inode, index); + ret = btrfs_set_inode_index(dir, index); if (ret) return ERR_PTR(ret); } @@ -3651,7 +3666,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; - err = btrfs_set_inode_index(dir, inode, &index); + err = btrfs_set_inode_index(dir, &index); if (err) goto fail; @@ -4349,13 +4364,13 @@ out: * Invalidate a single dcache entry at the root of the filesystem. * Needed after creation of snapshot or subvolume. */ -void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, +void btrfs_invalidate_dcache_root(struct inode *dir, char *name, int namelen) { struct dentry *alias, *entry; struct qstr qstr; - alias = d_find_alias(root->fs_info->sb->s_root->d_inode); + alias = d_find_alias(dir); if (alias) { qstr.name = name; qstr.len = namelen; @@ -4387,7 +4402,6 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - new_root->inode = inode; inode->i_nlink = 1; btrfs_i_size_write(inode, 0); @@ -4590,7 +4604,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } } - ret = btrfs_set_inode_index(new_dir, old_inode, &index); + ret = btrfs_set_inode_index(new_dir, &index); if (ret) goto out_fail; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f43df72b0e1..ec45b308613 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -67,6 +67,7 @@ static noinline int create_subvol(struct btrfs_root *root, int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + u64 index = 0; unsigned long nr = 1; ret = btrfs_check_free_space(root, 1, 0); @@ -126,6 +127,7 @@ static noinline int create_subvol(struct btrfs_root *root, key.objectid = objectid; key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); +printk("inserting root objectid %Lu\n", objectid); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); if (ret) @@ -135,24 +137,27 @@ static noinline int create_subvol(struct btrfs_root *root, * insert the directory item */ key.offset = (u64)-1; - dir = root->fs_info->sb->s_root->d_inode; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + dir = dentry->d_parent->d_inode; + ret = btrfs_set_inode_index(dir, &index); + BUG_ON(ret); + + ret = btrfs_insert_dir_item(trans, root, name, namelen, dir->i_ino, &key, - BTRFS_FT_DIR, 0); + BTRFS_FT_DIR, index); if (ret) goto fail; - +#if 0 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, name, namelen, objectid, root->fs_info->sb->s_root->d_inode->i_ino, 0); if (ret) goto fail; - +#endif ret = btrfs_commit_transaction(trans, root); if (ret) goto fail_commit; - new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); + new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); BUG_ON(!new_root); trans = btrfs_start_transaction(new_root, 1); @@ -170,14 +175,16 @@ fail: ret = err; fail_commit: btrfs_btree_balance_dirty(root, nr); +printk("all done ret %d\n", ret); return ret; } -static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, + char *name, int namelen) { struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; int err; unsigned long nr = 0; @@ -188,7 +195,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_unlock; - pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); + pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) { ret = -ENOMEM; goto fail_unlock; @@ -201,12 +208,12 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) } memcpy(pending_snapshot->name, name, namelen); pending_snapshot->name[namelen] = '\0'; + pending_snapshot->dentry = dentry; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); pending_snapshot->root = root; list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); - ret = btrfs_update_inode(trans, root, root->inode); err = btrfs_commit_transaction(trans, root); fail_unlock: @@ -230,7 +237,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) * inside this filesystem so it's quite a bit simpler. */ static noinline int btrfs_mksubvol(struct path *parent, char *name, - int mode, int namelen) + int mode, int namelen, + struct btrfs_root *snap_src) { struct dentry *dentry; int error; @@ -248,6 +256,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, if (!IS_POSIXACL(parent->dentry->d_inode)) mode &= ~current->fs->umask; + error = mnt_want_write(parent->mnt); if (error) goto out_dput; @@ -266,8 +275,12 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, * Also we should pass on the mode eventually to allow creating new * subvolume with specific mode bits. */ - error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry, - name, namelen); + if (snap_src) { + error = create_snapshot(snap_src, dentry, name, namelen); + } else { + error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, + dentry, name, namelen); + } if (error) goto out_drop_write; @@ -471,15 +484,16 @@ out: } static noinline int btrfs_ioctl_snap_create(struct file *file, - void __user *arg) + void __user *arg, int subvol) { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_dir_item *di; struct btrfs_path *path; + struct file *src_file; u64 root_dirid; int namelen; - int ret; + int ret = 0; if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; @@ -523,12 +537,29 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, goto out; } - if (root == root->fs_info->tree_root) { + if (subvol) { ret = btrfs_mksubvol(&file->f_path, vol_args->name, file->f_path.dentry->d_inode->i_mode, - namelen); + namelen, NULL); } else { - ret = create_snapshot(root, vol_args->name, namelen); + struct inode *src_inode; + src_file = fget(vol_args->fd); + if (!src_file) { + ret = -EINVAL; + goto out; + } + + src_inode = src_file->f_path.dentry->d_inode; + if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { + printk("btrfs: Snapshot src from another FS\n"); + ret = -EINVAL; + fput(src_file); + goto out; + } + ret = btrfs_mksubvol(&file->f_path, vol_args->name, + file->f_path.dentry->d_inode->i_mode, + namelen, BTRFS_I(src_inode)->root); + fput(src_file); } out: @@ -1030,7 +1061,9 @@ long btrfs_ioctl(struct file *file, unsigned int switch (cmd) { case BTRFS_IOC_SNAP_CREATE: - return btrfs_ioctl_snap_create(file, (void __user *)arg); + return btrfs_ioctl_snap_create(file, (void __user *)arg, 0); + case BTRFS_IOC_SUBVOL_CREATE: + return btrfs_ioctl_snap_create(file, (void __user *)arg, 1); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 989ba8a0121..78049ea208d 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,9 +22,10 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 -#define BTRFS_PATH_NAME_MAX 4095 +#define BTRFS_PATH_NAME_MAX 3072 struct btrfs_ioctl_vol_args { + __s64 fd; char name[BTRFS_PATH_NAME_MAX + 1]; }; @@ -51,7 +52,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) - struct btrfs_ioctl_clone_range_args { __s64 src_fd; __u64 src_offset, src_length; @@ -61,4 +61,7 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ struct btrfs_ioctl_clone_range_args) +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) + #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 92393cc60d0..77c5eff3e20 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -285,11 +285,11 @@ static int btrfs_parse_early_options(const char *options, int flags, out: /* * If no subvolume name is specified we use the default one. Allocate - * a copy of the string "default" here so that code later in the + * a copy of the string "." here so that code later in the * mount path doesn't care if it's the default volume or another one. */ if (!*subvol_name) { - *subvol_name = kstrdup("default", GFP_KERNEL); + *subvol_name = kstrdup(".", GFP_KERNEL); if (!*subvol_name) return -ENOMEM; } @@ -323,12 +323,12 @@ static int btrfs_fill_super(struct super_block * sb, } sb->s_fs_info = tree_root; disk_super = &tree_root->fs_info->super_copy; - inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), - tree_root); + inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID, + tree_root->fs_info->fs_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->root = tree_root; + bi->root = tree_root->fs_info->fs_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 202c1b6df4a..eec8b246503 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -779,7 +779,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct extent_buffer *tmp; struct extent_buffer *old; int ret; - int namelen; u64 objectid; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); @@ -816,28 +815,48 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) goto fail; + key.offset = (u64)-1; + memcpy(&pending->root_key, &key, sizeof(key)); +fail: + kfree(new_root_item); + return ret; +} + +static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, + struct btrfs_pending_snapshot *pending) +{ + int ret; + int namelen; + u64 index = 0; + struct btrfs_trans_handle *trans; + struct inode *parent_inode; + struct inode *inode; + + trans = btrfs_start_transaction(fs_info->fs_root, 1); + /* * insert the directory item */ - key.offset = (u64)-1; namelen = strlen(pending->name); - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - pending->name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR, 0); + parent_inode = pending->dentry->d_parent->d_inode; + ret = btrfs_set_inode_index(parent_inode, &index); + ret = btrfs_insert_dir_item(trans, + BTRFS_I(parent_inode)->root, + pending->name, namelen, + parent_inode->i_ino, + &pending->root_key, BTRFS_FT_DIR, index); if (ret) goto fail; - +#if 0 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, pending->name, strlen(pending->name), objectid, root->fs_info->sb->s_root->d_inode->i_ino, 0); - - /* Invalidate existing dcache entry for new snapshot. */ - btrfs_invalidate_dcache_root(root, pending->name, namelen); - +#endif + inode = btrfs_lookup_dentry(parent_inode, pending->dentry); + d_instantiate(pending->dentry, inode); fail: - kfree(new_root_item); + btrfs_end_transaction(trans, fs_info->fs_root); return ret; } @@ -846,6 +865,22 @@ fail: */ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) +{ + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + struct list_head *cur; + int ret; + + list_for_each(cur, head) { + pending = list_entry(cur, struct btrfs_pending_snapshot, list); + ret = create_pending_snapshot(trans, fs_info, pending); + BUG_ON(ret); + } + return 0; +} + +static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; @@ -854,7 +889,7 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, while(!list_empty(head)) { pending = list_entry(head->next, struct btrfs_pending_snapshot, list); - ret = create_pending_snapshot(trans, fs_info, pending); + ret = finish_pending_snapshot(fs_info, pending); BUG_ON(ret); list_del(&pending->list); kfree(pending->name); @@ -1033,11 +1068,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_drop_dead_reloc_roots(root); mutex_unlock(&root->fs_info->tree_reloc_mutex); + /* do the directory inserts of any pending snapshot creations */ + finish_pending_snapshots(trans, root->fs_info); + mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); + put_transaction(cur_trans); put_transaction(cur_trans); @@ -1046,6 +1085,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); mutex_unlock(&root->fs_info->trans_mutex); + kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eef2cb7d7e7..202c8be6c05 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,8 +47,10 @@ struct btrfs_trans_handle { }; struct btrfs_pending_snapshot { + struct dentry *dentry; struct btrfs_root *root; char *name; + struct btrfs_key root_key; struct list_head list; }; -- cgit v1.2.3-70-g09d2 From d2fb3437e4d8d12c73c587615ad187d5288547ec Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 11 Dec 2008 16:30:39 -0500 Subject: Btrfs: fix leaking block group on balance The block group structs are referenced in many different places, and it's not safe to free while balancing. So, those block group structs were simply leaked instead. This patch replaces the block group pointer in the inode with the starting byte offset of the block group and adds reference counting to the block group struct. Signed-off-by: Yan Zheng --- fs/btrfs/btrfs_inode.h | 8 ++- fs/btrfs/ctree.h | 17 ++++--- fs/btrfs/extent-tree.c | 132 ++++++++++++++++++++++--------------------------- fs/btrfs/inode.c | 43 +++++----------- fs/btrfs/ioctl.c | 2 +- fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 2 +- 7 files changed, 88 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 1b9ec1ab1f6..a8c9693b75a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -28,11 +28,6 @@ struct btrfs_inode { /* which subvolume this inode belongs to */ struct btrfs_root *root; - /* the block group preferred for allocations. This pointer is buggy - * and needs to be replaced with a bytenr instead - */ - struct btrfs_block_group_cache *block_group; - /* key used to find this inode on disk. This is used by the code * to read in roots of subvolumes */ @@ -115,6 +110,9 @@ struct btrfs_inode { */ u64 index_cnt; + /* the start of block group preferred for allocations. */ + u64 block_group; + struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5b0c79d22c0..8733081d97a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -653,6 +653,9 @@ struct btrfs_block_group_cache { /* for block groups in the same raid type */ struct list_head list; + + /* usage count */ + atomic_t count; }; struct btrfs_leaf_ref_tree { @@ -1706,10 +1709,8 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner); +u64 btrfs_find_block_group(struct btrfs_root *root, + u64 search_start, u64 search_hint, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, @@ -1770,6 +1771,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, u64 owner_objectid); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); int btrfs_make_block_group(struct btrfs_trans_handle *trans, @@ -2019,10 +2021,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); -int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, - struct btrfs_trans_handle *trans, u64 new_dirid, - struct btrfs_block_group_cache *block_group); - +int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, + struct btrfs_root *new_root, struct dentry *dentry, + u64 new_dirid, u64 alloc_hint); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 673ff59c288..1cc89246ee2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -53,10 +53,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, int all); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, int all); -static struct btrfs_block_group_cache * -__btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache *hint, - u64 search_start, int data, int owner); static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int is_data); @@ -142,6 +138,8 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, break; } } + if (ret) + atomic_inc(&ret->count); spin_unlock(&info->block_group_cache_lock); return ret; @@ -318,6 +316,12 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return cache; } +static inline void put_block_group(struct btrfs_block_group_cache *cache) +{ + if (atomic_dec_and_test(&cache->count)) + kfree(cache); +} + static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { @@ -341,54 +345,16 @@ static u64 div_factor(u64 num, int factor) return num; } -static struct btrfs_block_group_cache * -__btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache *hint, - u64 search_start, int data, int owner) +u64 btrfs_find_block_group(struct btrfs_root *root, + u64 search_start, u64 search_hint, int owner) { struct btrfs_block_group_cache *cache; - struct btrfs_block_group_cache *found_group = NULL; - struct btrfs_fs_info *info = root->fs_info; u64 used; - u64 last = 0; - u64 free_check; + u64 last = max(search_hint, search_start); + u64 group_start = 0; int full_search = 0; - int factor = 10; + int factor = 9; int wrapped = 0; - - if (data & BTRFS_BLOCK_GROUP_METADATA) - factor = 9; - - if (search_start) { - struct btrfs_block_group_cache *shint; - shint = btrfs_lookup_first_block_group(info, search_start); - if (shint && block_group_bits(shint, data)) { - spin_lock(&shint->lock); - used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned + shint->reserved < - div_factor(shint->key.offset, factor)) { - spin_unlock(&shint->lock); - return shint; - } - spin_unlock(&shint->lock); - } - } - if (hint && block_group_bits(hint, data)) { - spin_lock(&hint->lock); - used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned + hint->reserved < - div_factor(hint->key.offset, factor)) { - spin_unlock(&hint->lock); - return hint; - } - spin_unlock(&hint->lock); - last = hint->key.objectid + hint->key.offset; - } else { - if (hint) - last = max(hint->key.objectid, search_start); - else - last = search_start; - } again: while (1) { cache = btrfs_lookup_first_block_group(root->fs_info, last); @@ -399,16 +365,18 @@ again: last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (block_group_bits(cache, data)) { - free_check = div_factor(cache->key.offset, factor); + if ((full_search || !cache->ro) && + block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { if (used + cache->pinned + cache->reserved < - free_check) { - found_group = cache; + div_factor(cache->key.offset, factor)) { + group_start = cache->key.objectid; spin_unlock(&cache->lock); + put_block_group(cache); goto found; } } spin_unlock(&cache->lock); + put_block_group(cache); cond_resched(); } if (!wrapped) { @@ -423,18 +391,7 @@ again: goto again; } found: - return found_group; -} - -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner) -{ - - struct btrfs_block_group_cache *ret; - ret = __btrfs_find_block_group(root, hint, search_start, data, owner); - return ret; + return group_start; } /* simple helper to search for an existing extent at a given offset */ @@ -1809,6 +1766,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return werr; } +int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) +{ + struct btrfs_block_group_cache *block_group; + int readonly = 0; + + block_group = btrfs_lookup_block_group(root->fs_info, bytenr); + if (!block_group || block_group->ro) + readonly = 1; + if (block_group) + put_block_group(block_group); + return readonly; +} + static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1995,10 +1965,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, int ret; ret = btrfs_add_free_space(cache, bytenr, num_bytes); - if (ret) - return -1; + WARN_ON(ret); } } + put_block_group(cache); total -= num_bytes; bytenr += num_bytes; } @@ -2008,12 +1978,16 @@ static int update_block_group(struct btrfs_trans_handle *trans, static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) { struct btrfs_block_group_cache *cache; + u64 bytenr; cache = btrfs_lookup_first_block_group(root->fs_info, search_start); if (!cache) return 0; - return cache->key.objectid; + bytenr = cache->key.objectid; + put_block_group(cache); + + return bytenr; } int btrfs_update_pinned_extents(struct btrfs_root *root, @@ -2055,6 +2029,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, if (cache->cached) btrfs_add_free_space(cache, bytenr, len); } + put_block_group(cache); bytenr += len; num -= len; } @@ -2085,6 +2060,7 @@ static int update_reserved_extents(struct btrfs_root *root, } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); + put_block_group(cache); bytenr += len; num -= len; } @@ -2724,6 +2700,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(root->fs_info, bytenr); BUG_ON(!cache); btrfs_add_free_space(cache, bytenr, num_bytes); + put_block_group(cache); update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } @@ -2928,6 +2905,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } new_group: mutex_unlock(&block_group->alloc_mutex); + put_block_group(block_group); + block_group = NULL; new_group_no_lock: /* don't try to compare new allocations against the * last allocation any more @@ -2997,6 +2976,8 @@ loop_check: block_group = list_entry(cur, struct btrfs_block_group_cache, list); + atomic_inc(&block_group->count); + search_start = block_group->key.objectid; cur = cur->next; } @@ -3004,7 +2985,7 @@ loop_check: /* we found what we needed */ if (ins->objectid) { if (!(data & BTRFS_BLOCK_GROUP_DATA)) - trans->block_group = block_group; + trans->block_group = block_group->key.objectid; if (last_ptr) *last_ptr = ins->objectid + ins->offset; @@ -3015,6 +2996,8 @@ loop_check: loop, allowed_chunk_alloc); ret = -ENOSPC; } + if (block_group) + put_block_group(block_group); up_read(&space_info->groups_sem); return ret; @@ -3124,6 +3107,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } btrfs_add_free_space(cache, start, len); + put_block_group(cache); update_reserved_extents(root, start, len, 0); return 0; } @@ -3288,6 +3272,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); BUG_ON(ret); + put_block_group(block_group); ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, ins); return ret; @@ -5703,6 +5688,7 @@ next: WARN_ON(block_group->reserved > 0); WARN_ON(btrfs_block_group_used(&block_group->item) > 0); spin_unlock(&block_group->lock); + put_block_group(block_group); ret = 0; out: btrfs_free_path(path); @@ -5763,6 +5749,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); + + WARN_ON(atomic_read(&block_group->count) != 1); kfree(block_group); spin_lock(&info->block_group_cache_lock); @@ -5807,6 +5795,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); mutex_init(&cache->alloc_mutex); mutex_init(&cache->cache_mutex); @@ -5861,11 +5850,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->key.objectid = chunk_offset; cache->key.offset = size; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); mutex_init(&cache->alloc_mutex); mutex_init(&cache->cache_mutex); INIT_LIST_HEAD(&cache->list); - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); @@ -5926,10 +5916,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->space_info->lock); block_group->space_info->full = 0; - /* - memset(shrink_block_group, 0, sizeof(*shrink_block_group)); - kfree(shrink_block_group); - */ + put_block_group(block_group); + put_block_group(block_group); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 932d8c0b2c0..0a28b770631 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -989,7 +989,6 @@ next_slot: if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { - struct btrfs_block_group_cache *block_group; disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); extent_end = found_key.offset + btrfs_file_extent_num_bytes(leaf, fi); @@ -1007,9 +1006,7 @@ next_slot: goto out_check; if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) goto out_check; - block_group = btrfs_lookup_block_group(root->fs_info, - disk_bytenr); - if (!block_group || block_group->ro) + if (btrfs_extent_readonly(root, disk_bytenr)) goto out_check; disk_bytenr += btrfs_file_extent_offset(leaf, fi); nocow = 1; @@ -1969,16 +1966,11 @@ void btrfs_read_locked_inode(struct inode *inode) rdev = btrfs_inode_rdev(leaf, inode_item); BTRFS_I(inode)->index_cnt = (u64)-1; + BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, - alloc_group_block); - BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - if (!BTRFS_I(inode)->block_group) { - BTRFS_I(inode)->block_group = btrfs_find_block_group(root, - NULL, 0, - BTRFS_BLOCK_GROUP_METADATA, 0); - } + BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, + alloc_group_block, 0); btrfs_free_path(path); inode_item = NULL; @@ -2048,8 +2040,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); - btrfs_set_inode_block_group(leaf, item, - BTRFS_I(inode)->block_group->key.objectid); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); } /* @@ -3358,14 +3349,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, const char *name, int name_len, - u64 ref_objectid, - u64 objectid, - struct btrfs_block_group_cache *group, - int mode, u64 *index) + u64 ref_objectid, u64 objectid, + u64 alloc_hint, int mode, u64 *index) { struct inode *inode; struct btrfs_inode_item *inode_item; - struct btrfs_block_group_cache *new_inode_group; struct btrfs_key *location; struct btrfs_path *path; struct btrfs_inode_ref *ref; @@ -3401,13 +3389,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - new_inode_group = btrfs_find_block_group(root, group, 0, - BTRFS_BLOCK_GROUP_METADATA, owner); - if (!new_inode_group) { - printk("find_block group failed\n"); - new_inode_group = group; - } - BTRFS_I(inode)->block_group = new_inode_group; + BTRFS_I(inode)->block_group = + btrfs_find_block_group(root, 0, alloc_hint, owner); key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4366,16 +4349,16 @@ out: /* * create a new subvolume directory/inode (helper for the ioctl). */ -int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, - struct btrfs_trans_handle *trans, u64 new_dirid, - struct btrfs_block_group_cache *block_group) +int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, + struct btrfs_root *new_root, struct dentry *dentry, + u64 new_dirid, u64 alloc_hint) { struct inode *inode; int error; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, block_group, S_IFDIR | 0700, &index); + new_dirid, alloc_hint, S_IFDIR | 0700, &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 69c4a07f586..5d67858ce99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -173,7 +173,7 @@ static noinline int create_subvol(struct btrfs_root *root, trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid, + ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, BTRFS_I(dir)->block_group); if (ret) goto fail; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 47cd5fcad2c..4604178a43a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -182,7 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; - h->block_group = NULL; + h->block_group = 0; h->alloc_exclude_nr = 0; h->alloc_exclude_start = 0; root->fs_info->running_transaction->use_count++; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 202c8be6c05..ffe7f639732 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -41,7 +41,7 @@ struct btrfs_trans_handle { unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; - struct btrfs_block_group_cache *block_group; + u64 block_group; u64 alloc_exclude_start; u64 alloc_exclude_nr; }; -- cgit v1.2.3-70-g09d2 From d397712bcc6a759a560fd247e6053ecae091f958 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 5 Jan 2009 21:25:51 -0500 Subject: Btrfs: Fix checkpatch.pl warnings There were many, most are fixed now. struct-funcs.c generates some warnings but these are bogus. Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 5 +- fs/btrfs/async-thread.c | 6 +- fs/btrfs/compat.h | 4 +- fs/btrfs/compression.c | 12 +-- fs/btrfs/ctree.c | 217 ++++++++++++++++-------------------------- fs/btrfs/ctree.h | 30 +++--- fs/btrfs/dir-item.c | 2 +- fs/btrfs/disk-io.c | 187 +++++++++++++------------------------ fs/btrfs/export.c | 8 +- fs/btrfs/extent-tree.c | 223 +++++++++++++++++++++++--------------------- fs/btrfs/extent_io.c | 213 ++++++++++++++++++++---------------------- fs/btrfs/extent_map.c | 14 +-- fs/btrfs/file-item.c | 18 ++-- fs/btrfs/file.c | 49 +++++----- fs/btrfs/free-space-cache.c | 37 ++++---- fs/btrfs/inode-map.c | 1 - fs/btrfs/inode.c | 173 +++++++++++++++++----------------- fs/btrfs/ioctl.c | 37 ++++---- fs/btrfs/locking.c | 5 +- fs/btrfs/ordered-data.c | 34 +++---- fs/btrfs/print-tree.c | 73 +++++++++------ fs/btrfs/ref-cache.c | 12 +-- fs/btrfs/root-tree.c | 17 ++-- fs/btrfs/struct-funcs.c | 4 +- fs/btrfs/super.c | 25 +++-- fs/btrfs/sysfs.c | 6 +- fs/btrfs/transaction.c | 45 ++++----- fs/btrfs/transaction.h | 6 +- fs/btrfs/tree-defrag.c | 9 +- fs/btrfs/tree-log.c | 70 +++++++------- fs/btrfs/volumes.c | 78 ++++++++-------- fs/btrfs/xattr.c | 3 +- fs/btrfs/zlib.c | 45 ++++----- 33 files changed, 770 insertions(+), 898 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 867eaf1f8ef..1d53b62dbba 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -161,8 +161,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = __btrfs_setxattr(inode, name, value, size, 0); out: - if (value) - kfree(value); + kfree(value); if (!ret) btrfs_update_cached_acl(inode, p_acl, acl); @@ -213,7 +212,7 @@ static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, } static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags) { return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); } diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4229450b759..8e2fec05dbe 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -104,7 +104,7 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, spin_lock_irqsave(&workers->lock, flags); - while(!list_empty(&workers->order_list)) { + while (!list_empty(&workers->order_list)) { work = list_entry(workers->order_list.next, struct btrfs_work, order_list); @@ -143,7 +143,7 @@ static int worker_loop(void *arg) struct btrfs_work *work; do { spin_lock_irq(&worker->lock); - while(!list_empty(&worker->pending)) { + while (!list_empty(&worker->pending)) { cur = worker->pending.next; work = list_entry(cur, struct btrfs_work, list); list_del(&work->list); @@ -188,7 +188,7 @@ int btrfs_stop_workers(struct btrfs_workers *workers) struct btrfs_worker_thread *worker; list_splice_init(&workers->idle_list, &workers->worker_list); - while(!list_empty(&workers->worker_list)) { + while (!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; worker = list_entry(cur, struct btrfs_worker_thread, worker_list); diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index 75e4426d6fb..594d60bdd3c 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -4,7 +4,7 @@ #define btrfs_drop_nlink(inode) drop_nlink(inode) #define btrfs_inc_nlink(inode) inc_nlink(inode) -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 27) static inline struct dentry *d_obtain_alias(struct inode *inode) { struct dentry *d; @@ -21,7 +21,7 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) # define __pagevec_lru_add_file __pagevec_lru_add # define open_bdev_exclusive open_bdev_excl # define close_bdev_exclusive(bdev, mode) close_bdev_excl(bdev) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 2436163d543..ee848d8585d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -137,7 +137,8 @@ static int check_compressed_csum(struct inode *inode, kunmap_atomic(kaddr, KM_USER0); if (csum != *cb_sum) { - printk("btrfs csum failed ino %lu extent %llu csum %u " + printk(KERN_INFO "btrfs csum failed ino %lu " + "extent %llu csum %u " "wanted %u mirror %d\n", inode->i_ino, (unsigned long long)disk_start, csum, *cb_sum, cb->mirror_num); @@ -217,7 +218,7 @@ csum_failed: * we have verified the checksum already, set page * checked so the end_io handlers know about it */ - while(bio_index < cb->orig_bio->bi_vcnt) { + while (bio_index < cb->orig_bio->bi_vcnt) { SetPageChecked(bvec->bv_page); bvec++; bio_index++; @@ -246,7 +247,7 @@ static noinline int end_compressed_writeback(struct inode *inode, u64 start, int i; int ret; - while(nr_pages > 0) { + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); @@ -463,7 +464,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; pagevec_init(&pvec, 0); - while(last_offset < compressed_end) { + while (last_offset < compressed_end) { page_index = last_offset >> PAGE_CACHE_SHIFT; if (page_index > end_index) @@ -697,9 +698,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); BUG_ON(ret); - if (!btrfs_test_flag(inode, NODATASUM)) { + if (!btrfs_test_flag(inode, NODATASUM)) btrfs_lookup_bio_sums(root, inode, comp_bio, sums); - } ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); BUG_ON(ret); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7fad2e3ad6f..9e46c077681 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -67,7 +67,7 @@ void btrfs_free_path(struct btrfs_path *p) * * It is safe to call this on paths that no locks or extent buffers held. */ -void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) +noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -112,7 +112,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) { struct extent_buffer *eb; - while(1) { + while (1) { eb = btrfs_root_node(root); btrfs_tree_lock(eb); @@ -202,22 +202,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } /* - * does the dirty work in cow of a single block. The parent block - * (if supplied) is updated to point to the new cow copy. The new - * buffer is marked dirty and returned locked. If you modify the block - * it needs to be marked dirty again. + * does the dirty work in cow of a single block. The parent block (if + * supplied) is updated to point to the new cow copy. The new buffer is marked + * dirty and returned locked. If you modify the block it needs to be marked + * dirty again. * * search_start -- an allocation hint for the new block * - * empty_size -- a hint that you plan on doing more cow. This is the size in bytes - * the allocator should try to find free next to the block it returns. This is - * just a hint and may be ignored by the allocator. + * empty_size -- a hint that you plan on doing more cow. This is the size in + * bytes the allocator should try to find free next to the block it returns. + * This is just a hint and may be ignored by the allocator. * * prealloc_dest -- if you have already reserved a destination for the cow, - * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent - * is used to finish the allocation. + * this uses that block instead of allocating a new one. + * btrfs_alloc_reserved_extent is used to finish the allocation. */ -static int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, +static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -366,7 +366,7 @@ static int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, * This version of it has extra checks so that a block isn't cow'd more than * once per transaction, as long as it hasn't been written yet */ -int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, +noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 prealloc_dest) @@ -375,13 +375,16 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, int ret; if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + printk(KERN_CRIT "trans %llu running %llu\n", + (unsigned long long)trans->transid, + (unsigned long long) root->fs_info->running_transaction->transid); WARN_ON(1); } if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); + printk(KERN_CRIT "trans %llu running %llu\n", + (unsigned long long)trans->transid, + (unsigned long long)root->fs_info->generation); WARN_ON(1); } @@ -489,16 +492,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (cache_only && parent_level != 1) return 0; - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); + if (trans->transaction != root->fs_info->running_transaction) WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); + if (trans->transid != root->fs_info->generation) WARN_ON(1); - } parent_nritems = btrfs_header_nritems(parent); blocksize = btrfs_level_size(root, parent_level - 1); @@ -681,51 +678,18 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_bytenr(leaf)); } -#if 0 - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); - btrfs_item_key(leaf, &leaf_key, i); - if (comp_keys(&leaf_key, &cpukey) >= 0) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", i); - BUG_ON(1); - } - if (btrfs_item_offset_nr(leaf, i) != - btrfs_item_end_nr(leaf, i + 1)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", i); - BUG_ON(1); - } - if (i == 0) { - if (btrfs_item_offset_nr(leaf, i) + - btrfs_item_size_nr(leaf, i) != - BTRFS_LEAF_DATA_SIZE(root)) { - btrfs_print_leaf(root, leaf); - printk("slot %d first offset bad\n", i); - BUG_ON(1); - } - } - } - if (nritems > 0) { - if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { - btrfs_print_leaf(root, leaf); - printk("slot %d bad size \n", nritems - 1); - BUG_ON(1); - } - } -#endif if (slot != 0 && slot < nritems - 1) { btrfs_item_key(leaf, &leaf_key, slot); btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); if (comp_keys(&leaf_key, &cpukey) <= 0) { btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", slot); + printk(KERN_CRIT "slot %d offset bad key\n", slot); BUG_ON(1); } if (btrfs_item_offset_nr(leaf, slot - 1) != btrfs_item_end_nr(leaf, slot)) { btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); + printk(KERN_CRIT "slot %d offset bad\n", slot); BUG_ON(1); } } @@ -736,7 +700,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, if (btrfs_item_offset_nr(leaf, slot) != btrfs_item_end_nr(leaf, slot + 1)) { btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); + printk(KERN_CRIT "slot %d offset bad\n", slot); BUG_ON(1); } } @@ -745,30 +709,10 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, return 0; } -static int noinline check_block(struct btrfs_root *root, +static noinline int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - u64 found_start; return 0; - if (btrfs_header_level(path->nodes[level]) != level) - printk("warning: bad level %Lu wanted %d found %d\n", - path->nodes[level]->start, level, - btrfs_header_level(path->nodes[level])); - found_start = btrfs_header_bytenr(path->nodes[level]); - if (found_start != path->nodes[level]->start) { - printk("warning: bad bytentr %Lu found %Lu\n", - path->nodes[level]->start, found_start); - } -#if 0 - struct extent_buffer *buf = path->nodes[level]; - - if (memcmp_extent_buffer(buf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(buf), - BTRFS_FSID_SIZE)) { - printk("warning bad block %Lu\n", buf->start); - return 1; - } -#endif if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -802,7 +746,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb, unsigned long map_len = 0; int err; - while(low < high) { + while (low < high) { mid = (low + high) / 2; offset = p + mid * item_size; @@ -1130,7 +1074,7 @@ enospc: * when they are completely full. This is also done top down, so we * have to be pessimistic. */ -static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, +static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -1296,7 +1240,7 @@ static noinline void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; - while(1) { + while (1) { if (direction < 0) { if (nr == 0) break; @@ -1322,7 +1266,8 @@ static noinline void reada_for_search(struct btrfs_root *root, nscan++; if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) break; - if(nread > (256 * 1024) || nscan > 128) + + if (nread > (256 * 1024) || nscan > 128) break; if (search < lowest_read) @@ -1333,17 +1278,17 @@ static noinline void reada_for_search(struct btrfs_root *root, } /* - * when we walk down the tree, it is usually safe to unlock the higher layers in - * the tree. The exceptions are when our path goes through slot 0, because operations - * on the tree might require changing key pointers higher up in the tree. + * when we walk down the tree, it is usually safe to unlock the higher layers + * in the tree. The exceptions are when our path goes through slot 0, because + * operations on the tree might require changing key pointers higher up in the + * tree. * - * callers might also have set path->keep_locks, which tells this code to - * keep the lock if the path points to the last slot in the block. This is - * part of walking through the tree, and selecting the next slot in the higher - * block. + * callers might also have set path->keep_locks, which tells this code to keep + * the lock if the path points to the last slot in the block. This is part of + * walking through the tree, and selecting the next slot in the higher block. * - * lowest_unlock sets the lowest level in the tree we're allowed to unlock. - * so if lowest_unlock is 1, level 0 won't be unlocked + * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so + * if lowest_unlock is 1, level 0 won't be unlocked */ static noinline void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) @@ -1832,9 +1777,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, if (!empty && src_nritems <= 8) return 1; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } if (empty) { push_items = min(src_nritems, push_items); @@ -1854,7 +1798,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), - push_items * sizeof(struct btrfs_key_ptr)); + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), @@ -1899,19 +1843,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } - if (src_nritems < 4) { + if (src_nritems < 4) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ - if (max_push >= src_nritems) { + if (max_push >= src_nritems) return 1; - } if (max_push < push_items) push_items = max_push; @@ -1924,7 +1865,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), - push_items * sizeof(struct btrfs_key_ptr)); + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(src, src_nritems - push_items); btrfs_set_header_nritems(dst, dst_nritems + push_items); @@ -1945,7 +1886,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, * * returns zero on success or < 0 on failure. */ -static int noinline insert_new_root(struct btrfs_trans_handle *trans, +static noinline int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -2176,14 +2117,15 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int noinline btrfs_leaf_free_space(struct btrfs_root *root, +noinline int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { int nritems = btrfs_header_nritems(leaf); int ret; ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); if (ret < 0) { - printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " + "used %d nritems %d\n", ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), leaf_space_used(leaf, 0, nritems), nritems); } @@ -2219,9 +2161,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root int ret; slot = path->slots[1]; - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } + upper = path->nodes[1]; if (slot >= btrfs_header_nritems(upper) - 1) return 1; @@ -2418,9 +2360,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; right_nritems = btrfs_header_nritems(right); - if (right_nritems == 0) { + if (right_nritems == 0) return 1; - } WARN_ON(!btrfs_tree_locked(path->nodes[1])); @@ -2502,7 +2443,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_items * sizeof(struct btrfs_item)); push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset_nr(right, push_items -1); + btrfs_item_offset_nr(right, push_items - 1); copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, @@ -2537,7 +2478,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* fixup right node */ if (push_items > right_nritems) { - printk("push items %d nr %u\n", push_items, right_nritems); + printk(KERN_CRIT "push items %d nr %u\n", push_items, + right_nritems); WARN_ON(1); } @@ -2640,9 +2582,8 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, /* first try to make some room by pushing left and right */ if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); - if (wret < 0) { + if (wret < 0) return wret; - } if (wret) { wret = push_leaf_left(trans, root, path, data_size, 0); if (wret < 0) @@ -2665,7 +2606,7 @@ again: l = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(l); - mid = (nritems + 1)/ 2; + mid = (nritems + 1) / 2; right = btrfs_alloc_free_block(trans, root, root->leafsize, path->nodes[1]->start, @@ -2734,7 +2675,7 @@ again: path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, - path, &disk_key, 1); + path, &disk_key, 1); if (wret) ret = wret; } @@ -3033,8 +2974,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, BTRFS_FILE_EXTENT_INLINE) { ptr = btrfs_item_ptr_offset(leaf, slot); memmove_extent_buffer(leaf, ptr, - (unsigned long)fi, - offsetof(struct btrfs_file_extent_item, + (unsigned long)fi, + offsetof(struct btrfs_file_extent_item, disk_bytenr)); } } @@ -3096,7 +3037,8 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, BUG_ON(slot < 0); if (slot >= nritems) { btrfs_print_leaf(root, leaf); - printk("slot %d too large, nritems %d\n", slot, nritems); + printk(KERN_CRIT "slot %d too large, nritems %d\n", + slot, nritems); BUG_ON(1); } @@ -3218,7 +3160,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, if (old_data < data_end) { btrfs_print_leaf(root, leaf); - printk("slot %d old_data %d data_end %d\n", + printk(KERN_CRIT "slot %d old_data %d data_end %d\n", slot, old_data, data_end); BUG_ON(1); } @@ -3317,9 +3259,8 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, unsigned int data_end; struct btrfs_disk_key disk_key; - for (i = 0; i < nr; i++) { + for (i = 0; i < nr; i++) total_data += data_size[i]; - } total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); @@ -3336,7 +3277,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (btrfs_leaf_free_space(root, leaf) < total_size) { btrfs_print_leaf(root, leaf); - printk("not enough freespace need %u have %d\n", + printk(KERN_CRIT "not enough freespace need %u have %d\n", total_size, btrfs_leaf_free_space(root, leaf)); BUG(); } @@ -3349,7 +3290,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (old_data < data_end) { btrfs_print_leaf(root, leaf); - printk("slot %d old_data %d data_end %d\n", + printk(KERN_CRIT "slot %d old_data %d data_end %d\n", slot, old_data, data_end); BUG_ON(1); } @@ -3457,7 +3398,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, int wret; nritems = btrfs_header_nritems(parent); - if (slot != nritems -1) { + if (slot != nritems - 1) { memmove_extent_buffer(parent, btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot + 1), @@ -3614,7 +3555,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_nritems(leaf) == 0) { path->slots[1] = slot; - ret = btrfs_del_leaf(trans, root, path, leaf->start); + ret = btrfs_del_leaf(trans, root, path, + leaf->start); BUG_ON(ret); free_extent_buffer(leaf); } else { @@ -3717,7 +3659,7 @@ again: ret = 1; goto out; } - while(1) { + while (1) { nritems = btrfs_header_nritems(cur); level = btrfs_header_level(cur); sret = bin_search(cur, min_key, level, &slot); @@ -3738,7 +3680,7 @@ again: * min_trans parameters. If it isn't in cache or is too * old, skip to the next one. */ - while(slot < nritems) { + while (slot < nritems) { u64 blockptr; u64 gen; struct extent_buffer *tmp; @@ -3830,7 +3772,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct extent_buffer *c; WARN_ON(!path->keep_locks); - while(level < BTRFS_MAX_LEVEL) { + while (level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; @@ -3839,9 +3781,8 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, next: if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) { + if (level == BTRFS_MAX_LEVEL) return 1; - } continue; } if (level == 0) @@ -3889,9 +3830,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int ret; nritems = btrfs_header_nritems(path->nodes[0]); - if (nritems == 0) { + if (nritems == 0) return 1; - } btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); @@ -3915,7 +3855,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) goto done; } - while(level < BTRFS_MAX_LEVEL) { + while (level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; @@ -3923,9 +3863,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) { + if (level == BTRFS_MAX_LEVEL) return 1; - } continue; } @@ -3946,7 +3885,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; } path->slots[level] = slot; - while(1) { + while (1) { level--; c = path->nodes[level]; if (path->locks[level]) @@ -3986,7 +3925,7 @@ int btrfs_previous_item(struct btrfs_root *root, u32 nritems; int ret; - while(1) { + while (1) { if (path->slots[0] == 0) { ret = btrfs_prev_leaf(root, path); if (ret != 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ccea0648e10..eee060f8811 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -126,7 +126,6 @@ struct btrfs_ordered_sum; static int btrfs_csum_sizes[] = { 4, 0 }; /* four bytes for CRC32 */ -//#define BTRFS_CRC32_SIZE 4 #define BTRFS_EMPTY_DIR_SIZE 0 #define BTRFS_FT_UNKNOWN 0 @@ -283,8 +282,8 @@ struct btrfs_header { } __attribute__ ((__packed__)); #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ - sizeof(struct btrfs_header)) / \ - sizeof(struct btrfs_key_ptr)) + sizeof(struct btrfs_header)) / \ + sizeof(struct btrfs_key_ptr)) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ @@ -1512,7 +1511,7 @@ static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) static inline int btrfs_is_leaf(struct extent_buffer *eb) { - return (btrfs_header_level(eb) == 0); + return btrfs_header_level(eb) == 0; } /* struct btrfs_root_item */ @@ -1597,8 +1596,8 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline unsigned long btrfs_file_extent_inline_start(struct - btrfs_file_extent_item *e) +static inline unsigned long +btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { unsigned long offset = (unsigned long)e; offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); @@ -1660,20 +1659,20 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { /* if we already have a name just free it */ - if (root->name) - kfree(root->name); + kfree(root->name); root->name = kmalloc(len+1, GFP_KERNEL); if (!root->name) return -ENOMEM; memcpy(root->name, name, len); - root->name[len] ='\0'; + root->name[len] = '\0'; return 0; } -static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) +{ if (level == 0) return root->leafsize; return root->nodesize; @@ -1707,9 +1706,9 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); -struct btrfs_block_group_cache *btrfs_lookup_block_group(struct - btrfs_fs_info *info, - u64 bytenr); +struct btrfs_block_group_cache *btrfs_lookup_block_group( + struct btrfs_fs_info *info, + u64 bytenr); u64 btrfs_find_block_group(struct btrfs_root *root, u64 search_start, u64 search_hint, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, @@ -1908,8 +1907,9 @@ int btrfs_search_root(struct btrfs_root *root, u64 search_start, int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest_root); /* dir-item.c */ -int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, const char *name, int name_len, u64 dir, +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + int name_len, u64 dir, struct btrfs_key *location, u8 type, u64 index); struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 5040b71f190..926a0b287a7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -333,7 +333,7 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); total_len = btrfs_item_size_nr(leaf, path->slots[0]); - while(cur < total_len) { + while (cur < total_len) { this_len = sizeof(*dir_item) + btrfs_dir_name_len(leaf, dir_item) + btrfs_dir_data_len(leaf, dir_item); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dae25e78a6b..81a313874ae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,7 +23,7 @@ #include #include #include -#include // for block_sync_page +#include #include #include #include @@ -40,19 +40,6 @@ #include "ref-cache.h" #include "tree-log.h" -#if 0 -static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) -{ - if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { - printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)extent_buffer_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(buf)); - return 1; - } - return 0; -} -#endif - static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -128,23 +115,13 @@ static struct extent_map *btree_get_extent(struct inode *inode, u64 failed_start = em->start; u64 failed_len = em->len; - printk("failed to insert %Lu %Lu -> %Lu into tree\n", - em->start, em->len, em->block_start); free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); if (em) { - printk("after failing, found %Lu %Lu %Lu\n", - em->start, em->len, em->block_start); ret = 0; } else { em = lookup_extent_mapping(em_tree, failed_start, failed_len); - if (em) { - printk("double failure lookup gives us " - "%Lu %Lu -> %Lu\n", em->start, - em->len, em->block_start); - free_extent_map(em); - } ret = -EIO; } } else if (ret) { @@ -191,15 +168,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, unsigned long inline_result; len = buf->len - offset; - while(len > 0) { + while (len > 0) { err = map_private_extent_buffer(buf, offset, 32, &map_token, &kaddr, &map_start, &map_len, KM_USER0); - if (err) { - printk("failed to map extent buffer! %lu\n", - offset); + if (err) return 1; - } cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(root, kaddr + offset - map_start, crc, cur_len); @@ -218,15 +192,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - /* FIXME, this is not good */ if (memcmp_extent_buffer(buf, result, 0, csum_size)) { u32 val; u32 found = 0; memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); - printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X level %d\n", + printk(KERN_INFO "btrfs: %s checksum verify failed " + "on %llu wanted %X found %X level %d\n", root->fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) @@ -293,7 +266,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) return ret; -printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num); + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) @@ -307,9 +280,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror } /* - * checksum a dirty tree block before IO. This has extra checks to make - * sure we only fill in the checksum field in the first page of a multi-page block + * checksum a dirty tree block before IO. This has extra checks to make sure + * we only fill in the checksum field in the first page of a multi-page block */ + static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; @@ -327,28 +301,22 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (!page->private) goto out; len = page->private >> 2; - if (len == 0) { - WARN_ON(1); - } + WARN_ON(len == 0); + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", - start, found_start, len); WARN_ON(1); goto err; } if (eb->first_page != page) { - printk("bad first page %lu %lu\n", eb->first_page->index, - page->index); WARN_ON(1); goto err; } if (!PageUptodate(page)) { - printk("csum not up to date page %lu\n", page->index); WARN_ON(1); goto err; } @@ -396,29 +364,30 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, goto out; if (!page->private) goto out; + len = page->private >> 2; - if (len == 0) { - WARN_ON(1); - } + WARN_ON(len == 0); + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("bad tree block start %llu %llu\n", + printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); ret = -EIO; goto err; } if (eb->first_page != page) { - printk("bad first page %lu %lu\n", eb->first_page->index, - page->index); + printk(KERN_INFO "btrfs bad first page %lu %lu\n", + eb->first_page->index, page->index); WARN_ON(1); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { - printk("bad fsid on block %Lu\n", eb->start); + printk(KERN_INFO "btrfs bad fsid on block %llu\n", + (unsigned long long)eb->start); ret = -EIO; goto err; } @@ -578,7 +547,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, HZ/10); } #endif - while(atomic_read(&fs_info->async_submit_draining) && + while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { wait_event(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) == 0)); @@ -594,7 +563,7 @@ static int btree_csum_one_bio(struct bio *bio) struct btrfs_root *root; WARN_ON(bio->bi_vcnt <= 0); - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { root = BTRFS_I(bvec->bv_page->mapping->host)->root; csum_dirty_buffer(root, bvec->bv_page); bio_index++; @@ -680,9 +649,8 @@ static int btree_writepages(struct address_space *mapping, num_dirty = count_range_bits(tree, &start, (u64)-1, thresh, EXTENT_DIRTY); - if (num_dirty < thresh) { + if (num_dirty < thresh) return 0; - } } return extent_writepages(tree, mapping, btree_get_extent, wbc); } @@ -701,15 +669,14 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) int ret; if (PageWriteback(page) || PageDirty(page)) - return 0; + return 0; tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_state(map, tree, page, gfp_flags); - if (!ret) { + if (!ret) return 0; - } ret = try_release_extent_buffer(tree, page); if (ret == 1) { @@ -728,8 +695,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk("warning page private not zero on page %Lu\n", - page_offset(page)); + printk(KERN_WARNING "btrfs warning page private not zero " + "on page %llu\n", (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -813,7 +780,7 @@ int btrfs_write_tree_block(struct extent_buffer *buf) int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, - buf->start, buf->start + buf->len -1); + buf->start, buf->start + buf->len - 1); } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, @@ -832,11 +799,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); - if (ret == 0) { + if (ret == 0) buf->flags |= EXTENT_UPTODATE; - } else { + else WARN_ON(1); - } return buf; } @@ -944,7 +910,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, if (!log_root_tree) return 0; - while(1) { + while (1) { ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -1165,24 +1131,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, root->in_sysfs = 1; return root; } -#if 0 -static int add_hasher(struct btrfs_fs_info *info, char *type) { - struct btrfs_hasher *hasher; - - hasher = kmalloc(sizeof(*hasher), GFP_NOFS); - if (!hasher) - return -ENOMEM; - hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); - if (!hasher->hash_tfm) { - kfree(hasher); - return -EINVAL; - } - spin_lock(&info->hash_lock); - list_add(&hasher->list, &info->hashers); - spin_unlock(&info->hash_lock); - return 0; -} -#endif static int btrfs_congested_fn(void *congested_data, int bdi_bits) { @@ -1226,9 +1174,8 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { + if (bdi->unplug_io_fn) bdi->unplug_io_fn(bdi, page); - } } } @@ -1420,8 +1367,9 @@ static int transaction_kthread(void *arg) mutex_lock(&root->fs_info->transaction_kthread_mutex); if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + printk(KERN_INFO "btrfs: total reference cache " + "size %llu\n", + root->fs_info->total_ref_cache_size); } mutex_lock(&root->fs_info->trans_mutex); @@ -1592,14 +1540,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->tree_log_writers, 0); fs_info->tree_log_transid = 0; -#if 0 - ret = add_hasher(fs_info, "crc32c"); - if (ret) { - printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); - err = -ENOMEM; - goto fail_iput; - } -#endif __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -1720,7 +1660,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { - printk("btrfs: valid FS not found on %s\n", sb->s_id); + printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -1728,8 +1668,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk("btrfs: failed to read the system array on %s\n", - sb->s_id); + printk(KERN_WARNING "btrfs: failed to read the system " + "array on %s\n", sb->s_id); goto fail_sys_array; } @@ -1746,14 +1686,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), + BTRFS_UUID_SIZE); mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk("btrfs: failed to read chunk tree on %s\n", sb->s_id); + printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", + sb->s_id); goto fail_chunk_root; } @@ -1812,7 +1753,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { - printk("Btrfs log replay required on RO media\n"); + printk(KERN_WARNING "Btrfs log replay required " + "on RO media\n"); err = -EIO; goto fail_trans_kthread; } @@ -2097,7 +2039,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk("btrfs: %d errors while writing supers\n", total_errors); + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); BUG(); } @@ -2114,7 +2057,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk("btrfs: %d errors while writing supers\n", total_errors); + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); BUG(); } return 0; @@ -2137,16 +2081,11 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } -#if 0 - if (root->in_sysfs) - btrfs_sysfs_del_root(root); -#endif if (root->node) free_extent_buffer(root->node); if (root->commit_root) free_extent_buffer(root->commit_root); - if (root->name) - kfree(root->name); + kfree(root->name); kfree(root); return 0; } @@ -2157,7 +2096,7 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root *gang[8]; int i; - while(1) { + while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang)); @@ -2228,18 +2167,17 @@ int close_ctree(struct btrfs_root *root) if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); - if (ret) { - printk("btrfs: commit super returns %d\n", ret); - } + if (ret) + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } if (fs_info->delalloc_bytes) { - printk("btrfs: at unmount delalloc count %Lu\n", + printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", fs_info->delalloc_bytes); } if (fs_info->total_ref_cache_size) { - printk("btrfs: at umount reference cache size %Lu\n", - fs_info->total_ref_cache_size); + printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", + (unsigned long long)fs_info->total_ref_cache_size); } if (fs_info->extent_root->node) @@ -2248,13 +2186,13 @@ int close_ctree(struct btrfs_root *root) if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); - if (root->fs_info->chunk_root->node); + if (root->fs_info->chunk_root->node) free_extent_buffer(root->fs_info->chunk_root->node); - if (root->fs_info->dev_root->node); + if (root->fs_info->dev_root->node) free_extent_buffer(root->fs_info->dev_root->node); - if (root->fs_info->csum_root->node); + if (root->fs_info->csum_root->node) free_extent_buffer(root->fs_info->csum_root->node); btrfs_free_block_groups(root->fs_info); @@ -2273,7 +2211,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->submit_workers); #if 0 - while(!list_empty(&fs_info->hashers)) { + while (!list_empty(&fs_info->hashers)) { struct btrfs_hasher *hasher; hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, hashers); @@ -2324,9 +2262,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { - printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " + "found %llu running %llu\n", (unsigned long long)buf->start, - transid, root->fs_info->generation); + (unsigned long long)transid, + (unsigned long long)root->fs_info->generation); WARN_ON(1); } set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); @@ -2361,9 +2301,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; int ret; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); - if (ret == 0) { + if (ret == 0) buf->flags |= EXTENT_UPTODATE; - } return ret; } diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 48b82cd7583..85315d2c90d 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -7,9 +7,11 @@ #include "export.h" #include "compat.h" -#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) -#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) -#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) +#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ + parent_objectid) / 4) +#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, \ + parent_root_objectid) / 4) +#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, int connectable) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 780c1eeb829..ec43fa526d7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,10 +49,10 @@ struct pending_extent_op { int del; }; -static int finish_current_insert(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root, int all); -static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root, int all); +static int finish_current_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, int all); +static int del_pending_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, int all); static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int is_data); @@ -247,7 +247,7 @@ static int cache_block_group(struct btrfs_root *root, if (ret < 0) goto err; - while(1) { + while (1) { leaf = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { @@ -292,9 +292,8 @@ err: /* * return the block group that starts at or after bytenr */ -static struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct - btrfs_fs_info *info, - u64 bytenr) +static struct btrfs_block_group_cache * +btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { struct btrfs_block_group_cache *cache; @@ -306,9 +305,9 @@ static struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct /* * return the block group that contains teh given bytenr */ -struct btrfs_block_group_cache *btrfs_lookup_block_group(struct - btrfs_fs_info *info, - u64 bytenr) +struct btrfs_block_group_cache *btrfs_lookup_block_group( + struct btrfs_fs_info *info, + u64 bytenr) { struct btrfs_block_group_cache *cache; @@ -492,7 +491,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) * to the key objectid. */ -static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, +static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, u64 parent, @@ -537,7 +536,7 @@ out: * updates all the backrefs that are pending on update_list for the * extent_root */ -static int noinline update_backrefs(struct btrfs_trans_handle *trans, +static noinline int update_backrefs(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_path *path, struct list_head *update_list) @@ -573,9 +572,11 @@ loop: btrfs_ref_generation(leaf, ref) != op->orig_generation || (ref_objectid != op->level && ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { - printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, " - "owner %u\n", op->bytenr, op->orig_parent, - ref_root, op->level); + printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " + "root %llu, owner %u\n", + (unsigned long long)op->bytenr, + (unsigned long long)op->orig_parent, + (unsigned long long)ref_root, op->level); btrfs_print_leaf(extent_root, leaf); BUG(); } @@ -620,7 +621,7 @@ out: return 0; } -static int noinline insert_extents(struct btrfs_trans_handle *trans, +static noinline int insert_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_path *path, struct list_head *insert_list, int nr) @@ -781,7 +782,7 @@ static int noinline insert_extents(struct btrfs_trans_handle *trans, return ret; } -static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, +static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, u64 parent, @@ -840,7 +841,7 @@ out: return ret; } -static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, +static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { @@ -868,7 +869,7 @@ static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); #else blkdev_issue_discard(bdev, start >> 9, len >> 9); @@ -908,7 +909,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, #endif } -static int noinline free_extents(struct btrfs_trans_handle *trans, +static noinline int free_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct list_head *del_list) { @@ -937,10 +938,11 @@ search: extent_root->root_key.objectid, op->orig_generation, op->level, 1); if (ret) { - printk("Unable to find backref byte nr %Lu root %Lu gen %Lu " - "owner %u\n", op->bytenr, - extent_root->root_key.objectid, op->orig_generation, - op->level); + printk(KERN_ERR "btrfs unable to find backref byte nr %llu " + "root %llu gen %llu owner %u\n", + (unsigned long long)op->bytenr, + (unsigned long long)extent_root->root_key.objectid, + (unsigned long long)op->orig_generation, op->level); btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); goto out; @@ -1282,7 +1284,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != bytenr) { btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); - printk("wanted %Lu found %Lu\n", bytenr, key.objectid); + printk(KERN_ERR "btrfs wanted %llu found %llu\n", + (unsigned long long)bytenr, + (unsigned long long)key.objectid); BUG(); } BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); @@ -1353,7 +1357,8 @@ int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, goto out; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", bytenr); + printk(KERN_INFO "btrfs failed to find block number %llu\n", + (unsigned long long)bytenr); BUG(); } l = path->nodes[0]; @@ -1738,7 +1743,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - while(1) { + while (1) { cache = NULL; spin_lock(&root->fs_info->block_group_cache_lock); for (n = rb_first(&root->fs_info->block_group_cache_tree); @@ -1921,10 +1926,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, spin_unlock(&space_info->lock); ret = btrfs_alloc_chunk(trans, extent_root, flags); - if (ret) { -printk("space info full %Lu\n", flags); + if (ret) space_info->full = 1; - } out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; @@ -1941,7 +1944,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 old_val; u64 byte_in_group; - while(total) { + while (total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) return -1; @@ -2089,7 +2092,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) int ret; mutex_lock(&root->fs_info->pinned_mutex); - while(1) { + while (1) { ret = find_first_extent_bit(pinned_extents, last, &start, &end, EXTENT_DIRTY); if (ret) @@ -2110,7 +2113,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int ret; mutex_lock(&root->fs_info->pinned_mutex); - while(1) { + while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -2400,7 +2403,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, if (ret == 0) { struct btrfs_key found_key; extent_slot = path->slots[0]; - while(extent_slot > 0) { + while (extent_slot > 0) { extent_slot--; btrfs_item_key_to_cpu(path->nodes[0], &found_key, extent_slot); @@ -2422,8 +2425,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, &key, path, -1, 1); if (ret) { printk(KERN_ERR "umm, got %d back from search" - ", was looking for %Lu\n", ret, - bytenr); + ", was looking for %llu\n", ret, + (unsigned long long)bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } BUG_ON(ret); @@ -2432,9 +2435,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); - printk("Unable to find ref byte nr %Lu root %Lu " - "gen %Lu owner %Lu\n", bytenr, - root_objectid, ref_generation, owner_objectid); + printk(KERN_ERR "btrfs unable to find ref byte nr %llu " + "root %llu gen %llu owner %llu\n", + (unsigned long long)bytenr, + (unsigned long long)root_objectid, + (unsigned long long)ref_generation, + (unsigned long long)owner_objectid); } leaf = path->nodes[0]; @@ -2517,8 +2523,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ -static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root, int all) +static int del_pending_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, int all) { int ret; int err = 0; @@ -2539,7 +2545,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct again: mutex_lock(&info->extent_ins_mutex); - while(1) { + while (1) { ret = find_first_extent_bit(pending_del, search, &start, &end, EXTENT_WRITEBACK); if (ret) { @@ -2753,7 +2759,7 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int noinline find_free_extent(struct btrfs_trans_handle *trans, +static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, @@ -2762,7 +2768,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int data) { int ret = 0; - struct btrfs_root * root = orig_root->fs_info->extent_root; + struct btrfs_root *root = orig_root->fs_info->extent_root; u64 total_needed = num_bytes; u64 *last_ptr = NULL; u64 last_wanted = 0; @@ -2995,8 +3001,10 @@ loop_check: *last_ptr = ins->objectid + ins->offset; ret = 0; } else if (!ret) { - printk(KERN_ERR "we were searching for %Lu bytes, num_bytes %Lu," - " loop %d, allowed_alloc %d\n", total_needed, num_bytes, + printk(KERN_ERR "btrfs searching for %llu bytes, " + "num_bytes %llu, loop %d, allowed_alloc %d\n", + (unsigned long long)total_needed, + (unsigned long long)num_bytes, loop, allowed_chunk_alloc); ret = -ENOSPC; } @@ -3012,19 +3020,22 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) struct btrfs_block_group_cache *cache; struct list_head *l; - printk(KERN_INFO "space_info has %Lu free, is %sfull\n", - info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved, (info->full) ? "" : "not "); + printk(KERN_INFO "space_info has %llu free, is %sfull\n", + (unsigned long long)(info->total_bytes - info->bytes_used - + info->bytes_pinned - info->bytes_reserved), + (info->full) ? "" : "not "); down_read(&info->groups_sem); list_for_each(l, &info->block_groups) { cache = list_entry(l, struct btrfs_block_group_cache, list); spin_lock(&cache->lock); - printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " - "%Lu pinned %Lu reserved\n", - cache->key.objectid, cache->key.offset, - btrfs_block_group_used(&cache->item), - cache->pinned, cache->reserved); + printk(KERN_INFO "block group %llu has %llu bytes, %llu used " + "%llu pinned %llu reserved\n", + (unsigned long long)cache->key.objectid, + (unsigned long long)cache->key.offset, + (unsigned long long)btrfs_block_group_used(&cache->item), + (unsigned long long)cache->pinned, + (unsigned long long)cache->reserved); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -3045,15 +3056,15 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, if (data) { alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; + info->data_alloc_profile; data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; } else if (root == root->fs_info->chunk_root) { alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; + info->system_alloc_profile; data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; + info->metadata_alloc_profile; data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: @@ -3092,8 +3103,9 @@ again: struct btrfs_space_info *sinfo; sinfo = __find_space_info(root->fs_info, data); - printk("allocation failed flags %Lu, wanted %Lu\n", - data, num_bytes); + printk(KERN_ERR "btrfs allocation failed flags %llu, " + "wanted %llu\n", (unsigned long long)data, + (unsigned long long)num_bytes); dump_space_info(sinfo, num_bytes); BUG(); } @@ -3108,7 +3120,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { - printk(KERN_ERR "Unable to find block group for %Lu\n", start); + printk(KERN_ERR "Unable to find block group for %llu\n", + (unsigned long long)start); return -ENOSPC; } @@ -3235,10 +3248,12 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, } update_block: - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); + ret = update_block_group(trans, root, ins->objectid, + ins->offset, 1, 0); if (ret) { - printk("update block group failed for %Lu %Lu\n", - ins->objectid, ins->offset); + printk(KERN_ERR "btrfs update block group failed for %llu " + "%llu\n", (unsigned long long)ins->objectid, + (unsigned long long)ins->offset); BUG(); } out: @@ -3420,7 +3435,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, +static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref) { @@ -3445,15 +3460,15 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, - u32 *refs) +static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, + u64 len, u32 *refs) { int ret; ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); BUG_ON(ret); -#if 0 // some debugging code in case we see problems here +#if 0 /* some debugging code in case we see problems here */ /* if the refs count is one, it won't get increased again. But * if the ref count is > 1, someone may be decreasing it at * the same time we are. @@ -3474,8 +3489,8 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len free_extent_buffer(eb); } if (*refs == 1) { - printk("block %llu went down to one during drop_snap\n", - (unsigned long long)start); + printk(KERN_ERR "btrfs block %llu went down to one " + "during drop_snap\n", (unsigned long long)start); } } @@ -3489,7 +3504,7 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ -static int noinline walk_down_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { @@ -3516,7 +3531,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, /* * walk down to the last node level and free all the leaves */ - while(*level >= 0) { + while (*level >= 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; @@ -3576,10 +3591,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, *level = 0; break; } - if (printk_ratelimit()) { - printk("leaf ref miss for bytenr %llu\n", - (unsigned long long)bytenr); - } } next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { @@ -3641,7 +3652,7 @@ out: * walk_down_tree. The main difference is that it checks reference * counts while tree blocks are locked. */ -static int noinline walk_down_subtree(struct btrfs_trans_handle *trans, +static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { @@ -3730,7 +3741,7 @@ out: * to find the first node higher up where we haven't yet gone through * all the slots */ -static int noinline walk_up_tree(struct btrfs_trans_handle *trans, +static noinline int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, int max_level) @@ -3839,7 +3850,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } } - while(1) { + while (1) { wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; @@ -3920,7 +3931,7 @@ static unsigned long calc_ra(unsigned long start, unsigned long last, return min(last, start + nr - 1); } -static int noinline relocate_inode_pages(struct inode *inode, u64 start, +static noinline int relocate_inode_pages(struct inode *inode, u64 start, u64 len) { u64 page_start; @@ -4011,7 +4022,7 @@ out_unlock: return ret; } -static int noinline relocate_data_extent(struct inode *reloc_inode, +static noinline int relocate_data_extent(struct inode *reloc_inode, struct btrfs_key *extent_key, u64 offset) { @@ -4087,7 +4098,7 @@ static int is_cowonly_root(u64 root_objectid) return 0; } -static int noinline __next_ref_path(struct btrfs_trans_handle *trans, +static noinline int __next_ref_path(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_ref_path *ref_path, int first_time) @@ -4119,11 +4130,10 @@ walk_down: if (level < ref_path->lowest_level) break; - if (level >= 0) { + if (level >= 0) bytenr = ref_path->nodes[level]; - } else { + else bytenr = ref_path->extent_start; - } BUG_ON(bytenr == 0); parent = ref_path->nodes[level + 1]; @@ -4170,11 +4180,12 @@ walk_up: level = ref_path->current_level; while (level < BTRFS_MAX_LEVEL - 1) { u64 ref_objectid; - if (level >= 0) { + + if (level >= 0) bytenr = ref_path->nodes[level]; - } else { + else bytenr = ref_path->extent_start; - } + BUG_ON(bytenr == 0); key.objectid = bytenr; @@ -4299,7 +4310,7 @@ static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, return __next_ref_path(trans, extent_root, ref_path, 0); } -static int noinline get_new_locations(struct inode *reloc_inode, +static noinline int get_new_locations(struct inode *reloc_inode, struct btrfs_key *extent_key, u64 offset, int no_fragment, struct disk_extent **extents, @@ -4420,7 +4431,7 @@ out: return ret; } -static int noinline replace_one_extent(struct btrfs_trans_handle *trans, +static noinline int replace_one_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *extent_key, @@ -4778,7 +4789,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, return 0; } -static int noinline invalidate_extent_cache(struct btrfs_root *root, +static noinline int invalidate_extent_cache(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_block_group_cache *group, struct btrfs_root *target_root) @@ -4826,7 +4837,7 @@ static int noinline invalidate_extent_cache(struct btrfs_root *root, return 0; } -static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, +static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_block_group_cache *group, @@ -5035,7 +5046,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) return 0; } -static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, +static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root *reloc_root; @@ -5102,7 +5113,7 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, * tree blocks are shared between reloc trees, so they are also shared * between subvols. */ -static int noinline relocate_one_path(struct btrfs_trans_handle *trans, +static noinline int relocate_one_path(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *first_key, @@ -5199,7 +5210,7 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, return 0; } -static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, +static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *first_key, @@ -5217,7 +5228,7 @@ static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, return 0; } -static int noinline del_extent_zero(struct btrfs_trans_handle *trans, +static noinline int del_extent_zero(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) @@ -5233,7 +5244,7 @@ out: return ret; } -static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, +static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, struct btrfs_ref_path *ref_path) { struct btrfs_key root_key; @@ -5248,7 +5259,7 @@ static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, return btrfs_read_fs_root_no_name(fs_info, &root_key); } -static int noinline relocate_one_extent(struct btrfs_root *extent_root, +static noinline int relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key, struct btrfs_block_group_cache *group, @@ -5276,8 +5287,8 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); if (!ref_path) { - ret = -ENOMEM; - goto out; + ret = -ENOMEM; + goto out; } for (loops = 0; ; loops++) { @@ -5497,7 +5508,7 @@ out: return ret; } -static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, +static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *group) { struct inode *inode = NULL; @@ -5617,7 +5628,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) block_group = btrfs_lookup_block_group(info, group_start); BUG_ON(!block_group); - printk("btrfs relocating block group %llu flags %llu\n", + printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", (unsigned long long)block_group->key.objectid, (unsigned long long)block_group->flags); @@ -5649,7 +5660,7 @@ again: btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); mutex_unlock(&root->fs_info->cleaner_mutex); - while(1) { + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -5712,7 +5723,7 @@ next: } if (total_found > 0) { - printk("btrfs found %llu extents in pass %d\n", + printk(KERN_INFO "btrfs found %llu extents in pass %d\n", (unsigned long long)total_found, pass); pass++; if (total_found == skipped && pass > 2) { @@ -5754,7 +5765,7 @@ static int find_first_block_group(struct btrfs_root *root, if (ret < 0) goto out; - while(1) { + while (1) { slot = path->slots[0]; leaf = path->nodes[0]; if (slot >= btrfs_header_nritems(leaf)) { @@ -5825,7 +5836,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; - while(1) { + while (1) { ret = find_first_block_group(root, path, &key); if (ret > 0) { ret = 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0bf7684207a..39edb551dca 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -32,7 +32,7 @@ static LIST_HEAD(states); #define LEAK_DEBUG 0 #ifdef LEAK_DEBUG -static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(leak_lock); #endif #define BUFFER_LRU_MAX 64 @@ -81,7 +81,11 @@ void extent_io_exit(void) while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, leak_list); - printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); + printk(KERN_ERR "btrfs state leak: start %llu end %llu " + "state %lu in tree %p refs %d\n", + (unsigned long long)state->start, + (unsigned long long)state->end, + state->state, state->tree, atomic_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); @@ -89,7 +93,9 @@ void extent_io_exit(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); - printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); + printk(KERN_ERR "btrfs buffer leak start %llu len %lu " + "refs %d\n", (unsigned long long)eb->start, + eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -158,11 +164,11 @@ EXPORT_SYMBOL(free_extent_state); static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct tree_entry *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct tree_entry, rb_node); @@ -185,13 +191,13 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, struct rb_node **next_ret) { struct rb_root *root = &tree->state; - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; struct tree_entry *entry; struct tree_entry *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct tree_entry, rb_node); prev = n; prev_entry = entry; @@ -200,14 +206,13 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, n = n->rb_left; else if (offset > entry->end) n = n->rb_right; - else { + else return n; - } } if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while (prev && offset > prev_entry->end) { prev = rb_next(prev); prev_entry = rb_entry(prev, struct tree_entry, rb_node); } @@ -217,7 +222,7 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, if (next_ret) { prev_entry = rb_entry(prev, struct tree_entry, rb_node); - while(prev && offset < prev_entry->start) { + while (prev && offset < prev_entry->start) { prev = rb_prev(prev); prev_entry = rb_entry(prev, struct tree_entry, rb_node); } @@ -233,9 +238,8 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, struct rb_node *ret; ret = __etree_search(tree, offset, &prev, NULL); - if (!ret) { + if (!ret) return prev; - } return ret; } @@ -243,11 +247,11 @@ static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, u64 offset, struct rb_node *node) { struct rb_root *root = &tree->buffer; - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct extent_buffer *eb; - while(*p) { + while (*p) { parent = *p; eb = rb_entry(parent, struct extent_buffer, rb_node); @@ -268,10 +272,10 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, u64 offset) { struct rb_root *root = &tree->buffer; - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct extent_buffer *eb; - while(n) { + while (n) { eb = rb_entry(n, struct extent_buffer, rb_node); if (offset < eb->start) n = n->rb_left; @@ -363,7 +367,9 @@ static int insert_state(struct extent_io_tree *tree, struct rb_node *node; if (end < start) { - printk("end < start %Lu %Lu\n", end, start); + printk(KERN_ERR "btrfs end < start %llu %llu\n", + (unsigned long long)end, + (unsigned long long)start); WARN_ON(1); } if (bits & EXTENT_DIRTY) @@ -376,7 +382,10 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + printk(KERN_ERR "btrfs found node %llu %llu on insert of " + "%llu %llu\n", (unsigned long long)found->start, + (unsigned long long)found->end, + (unsigned long long)start, (unsigned long long)end); free_extent_state(state); return -EEXIST; } @@ -412,7 +421,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); free_extent_state(prealloc); return -EEXIST; } @@ -661,8 +669,9 @@ static void set_state_bits(struct extent_io_tree *tree, * [start, end] is inclusive * This takes the tree lock. */ -static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) +static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int exclusive, u64 *failed_start, + gfp_t mask) { struct extent_state *state; struct extent_state *prealloc = NULL; @@ -763,7 +772,7 @@ again: if (end < last_start) this_end = end; else - this_end = last_start -1; + this_end = last_start - 1; err = insert_state(tree, prealloc, start, this_end, bits); prealloc = NULL; @@ -891,8 +900,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_uptodate); -static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); } @@ -904,8 +913,8 @@ static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, 0, NULL, mask); } -static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); } @@ -1025,11 +1034,10 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, * our range starts. */ node = tree_search(tree, start); - if (!node) { + if (!node) goto out; - } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); if (state->end >= start && (state->state & bits)) { *start_ret = state->start; @@ -1062,15 +1070,14 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, * our range starts. */ node = tree_search(tree, start); - if (!node) { + if (!node) goto out; - } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { + if (state->end >= start && (state->state & bits)) return state; - } + node = rb_next(node); if (!node) break; @@ -1108,7 +1115,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, goto out; } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); if (found && (state->start != cur_start || (state->state & EXTENT_BOUNDARY))) { @@ -1150,7 +1157,7 @@ static noinline int __unlock_for_delalloc(struct inode *inode, if (index == locked_page->index && end_index == index) return 0; - while(nr_pages > 0) { + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); @@ -1186,7 +1193,7 @@ static noinline int lock_delalloc_pages(struct inode *inode, /* skip the page at the start index */ nrpages = end_index - index + 1; - while(nrpages > 0) { + while (nrpages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nrpages, ARRAY_SIZE(pages)), pages); @@ -1263,17 +1270,16 @@ again: * pages in order, so we can't process delalloc bytes before * locked_page */ - if (delalloc_start < *start) { + if (delalloc_start < *start) delalloc_start = *start; - } /* * make sure to limit the number of pages we try to lock down * if we're looping. */ - if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { + if (delalloc_end + 1 - delalloc_start > max_bytes && loops) delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; - } + /* step two, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, delalloc_start, delalloc_end); @@ -1341,7 +1347,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) return 0; - while(nr_pages > 0) { + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); @@ -1384,7 +1390,6 @@ u64 count_range_bits(struct extent_io_tree *tree, int found = 0; if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); WARN_ON(1); return 0; } @@ -1399,11 +1404,10 @@ u64 count_range_bits(struct extent_io_tree *tree, * our range starts. */ node = tree_search(tree, cur_start); - if (!node) { + if (!node) goto out; - } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); if (state->start > search_end) break; @@ -1927,19 +1931,15 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, nr = bio_get_nr_vecs(bdev); bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; - if (bio_ret) { + if (bio_ret) *bio_ret = bio; - } else { + else ret = submit_one_bio(rw, bio, mirror_num, bio_flags); - } return ret; } @@ -2028,13 +2028,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, break; } extent_offset = cur - em->start; - if (extent_map_end(em) <= cur) { -printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); - } BUG_ON(extent_map_end(em) <= cur); - if (end < cur) { -printk("2bad mapping end %Lu cur %Lu\n", end, cur); - } BUG_ON(end < cur); if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) @@ -2199,7 +2193,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end = 0; page_started = 0; if (!epd->extent_locked) { - while(delalloc_end < page_end) { + while (delalloc_end < page_end) { nr_delalloc = find_lock_delalloc_range(inode, tree, page, &delalloc_start, @@ -2242,9 +2236,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, nr_written++; end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) + printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); @@ -2297,7 +2290,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - unlock_extent(tree, unlock_start, cur + iosize -1, + unlock_extent(tree, unlock_start, cur + iosize - 1, GFP_NOFS); /* @@ -2344,9 +2337,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, + printk(KERN_ERR "btrfs warning page %lu not " + "writeback, cur %llu end %llu\n", + page->index, (unsigned long long)cur, (unsigned long long)end); } @@ -2430,8 +2423,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, retry: while (!done && (index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + PAGECACHE_TAG_DIRTY, min(end - index, + (pgoff_t)PAGEVEC_SIZE-1) + 1))) { unsigned i; scanned = 1; @@ -2536,9 +2529,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, extent_write_cache_pages(tree, mapping, &wbc_writepages, __extent_writepage, &epd, flush_write_bio); - if (epd.bio) { + if (epd.bio) submit_one_bio(WRITE, epd.bio, 0, 0); - } return ret; } EXPORT_SYMBOL(extent_write_full_page); @@ -2568,7 +2560,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .range_end = end + 1, }; - while(start <= end) { + while (start <= end) { page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); if (clear_page_dirty_for_io(page)) ret = __extent_writepage(page, &wbc_writepages, &epd); @@ -2606,9 +2598,8 @@ int extent_writepages(struct extent_io_tree *tree, ret = extent_write_cache_pages(tree, mapping, wbc, __extent_writepage, &epd, flush_write_bio); - if (epd.bio) { + if (epd.bio) submit_one_bio(WRITE, epd.bio, 0, 0); - } return ret; } EXPORT_SYMBOL(extent_writepages); @@ -2666,7 +2657,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; - start += (offset + blocksize -1) & ~(blocksize - 1); + start += (offset + blocksize - 1) & ~(blocksize - 1); if (start > end) return 0; @@ -2727,12 +2718,12 @@ int extent_prepare_write(struct extent_io_tree *tree, orig_block_start = block_start; lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { + while (block_start <= block_end) { em = get_extent(inode, page, page_offset, block_start, block_end - block_start + 1, 1); - if (IS_ERR(em) || !em) { + if (IS_ERR(em) || !em) goto err; - } + cur_end = min(block_end, extent_map_end(em) - 1); block_off_start = block_start & (PAGE_CACHE_SIZE - 1); block_off_end = block_off_start + blocksize; @@ -3170,7 +3161,7 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); set_extent_dirty(tree, page_offset(page), - page_offset(page) + PAGE_CACHE_SIZE -1, + page_offset(page) + PAGE_CACHE_SIZE - 1, GFP_NOFS); unlock_page(page); } @@ -3235,7 +3226,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); if (ret) return 1; - while(start <= end) { + while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); uptodate = PageUptodate(page); @@ -3321,16 +3312,12 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; - if (!PageUptodate(page)) { + if (!PageUptodate(page)) all_uptodate = 0; - } } if (all_uptodate) { if (start_i == 0) eb->flags |= EXTENT_UPTODATE; - if (ret) { - printk("all up to date but ret is %d\n", ret); - } goto unlock_exit; } @@ -3345,10 +3332,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, err = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, &bio_flags); - if (err) { + if (err) ret = err; - printk("err %d from __extent_read_full_page\n", ret); - } } else { unlock_page(page); } @@ -3357,26 +3342,23 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (bio) submit_one_bio(READ, bio, mirror_num, bio_flags); - if (ret || !wait) { - if (ret) - printk("ret %d wait %d returning\n", ret, wait); + if (ret || !wait) return ret; - } + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); - if (!PageUptodate(page)) { - printk("page not uptodate after wait_on_page_locked\n"); + if (!PageUptodate(page)) ret = -EIO; - } } + if (!ret) eb->flags |= EXTENT_UPTODATE; return ret; unlock_exit: i = start_i; - while(locked_pages > 0) { + while (locked_pages > 0) { page = extent_buffer_page(eb, i); i++; unlock_page(page); @@ -3403,7 +3385,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -3442,8 +3424,11 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; } + if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " + "wanted %lu %lu\n", (unsigned long long)eb->start, + eb->len, start, min_len); WARN_ON(1); } @@ -3506,7 +3491,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -3542,7 +3527,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); @@ -3574,7 +3559,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); @@ -3607,7 +3592,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, offset = (start_offset + dst_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(dst, i); WARN_ON(!PageUptodate(page)); @@ -3674,17 +3659,17 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " + "len %lu dst len %lu\n", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " + "len %lu dst len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } - while(len > 0) { + while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & @@ -3722,20 +3707,20 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " + "len %lu len %lu\n", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " + "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } if (dst_offset < src_offset) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); return; } - while(len > 0) { + while (len > 0) { dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index fd3ebfb8c3c..4a83e33ada3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(free_extent_map); static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct extent_map *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct extent_map, rb_node); @@ -122,13 +122,13 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node **prev_ret, struct rb_node **next_ret) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; struct extent_map *entry; struct extent_map *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; @@ -145,7 +145,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset >= extent_map_end(prev_entry)) { + while (prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); prev_entry = rb_entry(prev, struct extent_map, rb_node); } @@ -155,7 +155,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (next_ret) { prev_entry = rb_entry(prev, struct extent_map, rb_node); - while(prev && offset < prev_entry->start) { + while (prev && offset < prev_entry->start) { prev = rb_prev(prev); prev_entry = rb_entry(prev, struct extent_map, rb_node); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index cc6e0b6de94..b11abfad81a 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -24,7 +24,7 @@ #include "transaction.h" #include "print-tree.h" -#define MAX_CSUM_ITEMS(r,size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ +#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, @@ -166,7 +166,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, WARN_ON(bio->bi_vcnt <= 0); disk_bytenr = (u64)bio->bi_sector << 9; - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { offset = page_offset(bvec->bv_page) + bvec->bv_offset; ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); if (ret == 0) @@ -192,8 +192,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, offset + bvec->bv_len - 1, EXTENT_NODATASUM, GFP_NOFS); } else { - printk("no csum found for inode %lu " - "start %llu\n", inode->i_ino, + printk(KERN_INFO "btrfs no csum found " + "for inode %lu start %llu\n", + inode->i_ino, (unsigned long long)offset); } item = NULL; @@ -373,7 +374,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, BUG_ON(!ordered); sums->bytenr = ordered->start; - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { if (!contig) offset = page_offset(bvec->bv_page) + bvec->bv_offset; @@ -507,7 +508,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); - while(1) { + while (1) { key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.offset = end_byte - 1; key.type = BTRFS_EXTENT_CSUM_KEY; @@ -715,9 +716,8 @@ again: goto csum; diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); - if (diff != csum_size) { + if (diff != csum_size) goto insert; - } ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); @@ -732,7 +732,7 @@ insert: u64 next_sector = sector_sum->bytenr; struct btrfs_sector_sum *next = sector_sum + 1; - while(tmp < sums->len) { + while (tmp < sums->len) { if (next_sector + root->sectorsize != next->bytenr) break; tmp += root->sectorsize; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5908521922f..0e3a13a4565 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -44,10 +44,10 @@ /* simple helper to fault in pages and copy. This should go away * and be replaced with calls into generic code. */ -static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, +static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, - const char __user * buf) + const char __user *buf) { long page_fault = 0; int i; @@ -78,7 +78,7 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, /* * unlocks pages after btrfs_file_write is done with them */ -static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) +static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { @@ -103,7 +103,7 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) * this also makes the decision about creating an inline extent vs * doing real data extents, marking pages dirty and delalloc as required. */ -static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, +static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, struct page **pages, @@ -137,9 +137,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_set_trans_block_group(trans, inode); hint_byte = 0; - if ((end_of_last_block & 4095) == 0) { - printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); - } set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* check for reserved extents on each page, we don't want @@ -185,7 +182,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, len = (u64)-1; testend = 0; } - while(1) { + while (1) { if (!split) split = alloc_extent_map(GFP_NOFS); if (!split2) @@ -295,7 +292,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) path = btrfs_alloc_path(); ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, last_offset, 0); - while(1) { + while (1) { nritems = btrfs_header_nritems(path->nodes[0]); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); @@ -314,8 +311,10 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) if (found_key.offset < last_offset) { WARN_ON(1); btrfs_print_leaf(root, leaf); - printk("inode %lu found offset %Lu expected %Lu\n", - inode->i_ino, found_key.offset, last_offset); + printk(KERN_ERR "inode %lu found offset %llu " + "expected %llu\n", inode->i_ino, + (unsigned long long)found_key.offset, + (unsigned long long)last_offset); err = 1; goto out; } @@ -331,7 +330,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) extent_end = found_key.offset + btrfs_file_extent_inline_len(leaf, extent); extent_end = (extent_end + root->sectorsize - 1) & - ~((u64)root->sectorsize -1 ); + ~((u64)root->sectorsize - 1); } last_offset = extent_end; path->slots[0]++; @@ -339,8 +338,9 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) if (0 && last_offset < inode->i_size) { WARN_ON(1); btrfs_print_leaf(root, leaf); - printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino, - last_offset, inode->i_size); + printk(KERN_ERR "inode %lu found offset %llu size %llu\n", + inode->i_ino, (unsigned long long)last_offset, + (unsigned long long)inode->i_size); err = 1; } @@ -362,7 +362,7 @@ out: * inline_limit is used to tell this code which offsets in the file to keep * if they contain inline extents. */ -int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, +noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_byte) { @@ -398,7 +398,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - while(1) { + while (1) { recow = 0; btrfs_release_path(root, path); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, @@ -649,16 +649,15 @@ next_slot: if (disk_bytenr != 0) { ret = btrfs_update_extent_ref(trans, root, disk_bytenr, orig_parent, - leaf->start, + leaf->start, root->root_key.objectid, trans->transid, ins.objectid); BUG_ON(ret); } btrfs_release_path(root, path); - if (disk_bytenr != 0) { + if (disk_bytenr != 0) inode_add_bytes(inode, extent_end - end); - } } if (found_extent && !keep) { @@ -944,7 +943,7 @@ done: * waits for data=ordered extents to finish before allowing the pages to be * modified. */ -static int noinline prepare_pages(struct btrfs_root *root, struct file *file, +static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, unsigned long last_index, size_t write_bytes) @@ -979,7 +978,8 @@ again: struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); + ordered = btrfs_lookup_first_ordered_extent(inode, + last_pos - 1); if (ordered && ordered->file_offset + ordered->len > start_pos && ordered->file_offset < last_pos) { @@ -1085,7 +1085,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } } - while(count > 0) { + while (count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(count, nrptrs * (size_t)PAGE_CACHE_SIZE - @@ -1178,7 +1178,7 @@ out_nolock: return num_written ? num_written : err; } -int btrfs_release_file(struct inode * inode, struct file * filp) +int btrfs_release_file(struct inode *inode, struct file *filp) { if (filp->private_data) btrfs_ioctl_trans_end(filp); @@ -1237,9 +1237,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); - if (ret < 0) { + if (ret < 0) goto out; - } /* we've logged all the items and now have a consistent * version of the file in the log. It is possible that diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2e69b9c3043..d1e5f0e84c5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -213,10 +213,13 @@ static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, info->offset = offset; info->bytes += bytes; } else if (right_info && right_info->offset != offset+bytes) { - printk(KERN_ERR "adding space in the middle of an existing " - "free space area. existing: offset=%Lu, bytes=%Lu. " - "new: offset=%Lu, bytes=%Lu\n", right_info->offset, - right_info->bytes, offset, bytes); + printk(KERN_ERR "btrfs adding space in the middle of an " + "existing free space area. existing: " + "offset=%llu, bytes=%llu. new: offset=%llu, " + "bytes=%llu\n", (unsigned long long)right_info->offset, + (unsigned long long)right_info->bytes, + (unsigned long long)offset, + (unsigned long long)bytes); BUG(); } @@ -225,11 +228,14 @@ static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, if (unlikely((left_info->offset + left_info->bytes) != offset)) { - printk(KERN_ERR "free space to the left of new free " - "space isn't quite right. existing: offset=%Lu," - " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n", - left_info->offset, left_info->bytes, offset, - bytes); + printk(KERN_ERR "btrfs free space to the left " + "of new free space isn't " + "quite right. existing: offset=%llu, " + "bytes=%llu. new: offset=%llu, bytes=%llu\n", + (unsigned long long)left_info->offset, + (unsigned long long)left_info->bytes, + (unsigned long long)offset, + (unsigned long long)bytes); BUG(); } @@ -265,8 +271,7 @@ out: BUG(); } - if (alloc_info) - kfree(alloc_info); + kfree(alloc_info); return ret; } @@ -283,9 +288,11 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, if (info && info->offset == offset) { if (info->bytes < bytes) { - printk(KERN_ERR "Found free space at %Lu, size %Lu," - "trying to use %Lu\n", - info->offset, info->bytes, bytes); + printk(KERN_ERR "Found free space at %llu, size %llu," + "trying to use %llu\n", + (unsigned long long)info->offset, + (unsigned long long)info->bytes, + (unsigned long long)bytes); WARN_ON(1); ret = -EINVAL; goto out; @@ -401,8 +408,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, info = rb_entry(n, struct btrfs_free_space, offset_index); if (info->bytes >= bytes) count++; - //printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset, - // info->bytes); } printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" "\n", count); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 80038c5ef7c..2aa79873eb4 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -129,7 +129,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino = key.objectid + 1; path->slots[0]++; } - // FIXME -ENOSPC BUG_ON(1); found: btrfs_release_path(root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 068bad46338..1b35ea63b6c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -124,7 +124,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, * the btree. The caller should have done a btrfs_drop_extents so that * no overlapping inline items exist in the btree */ -static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, +static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, size_t size, size_t compressed_size, struct page **compressed_pages) @@ -148,7 +148,8 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, cur_size = compressed_size; } - path = btrfs_alloc_path(); if (!path) + path = btrfs_alloc_path(); + if (!path) return -ENOMEM; btrfs_set_trans_block_group(trans, inode); @@ -165,7 +166,6 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); if (ret) { err = ret; - printk("got bad ret %d\n", ret); goto fail; } leaf = path->nodes[0]; @@ -181,7 +181,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, if (use_compress) { struct page *cpage; int i = 0; - while(compressed_size > 0) { + while (compressed_size > 0) { cpage = compressed_pages[i]; cur_size = min_t(unsigned long, compressed_size, PAGE_CACHE_SIZE); @@ -519,8 +519,7 @@ free_pages_out: WARN_ON(pages[i]->mapping); page_cache_release(pages[i]); } - if (pages) - kfree(pages); + kfree(pages); goto out; } @@ -549,7 +548,7 @@ static noinline int submit_compressed_extents(struct inode *inode, trans = btrfs_join_transaction(root, 1); - while(!list_empty(&async_cow->extents)) { + while (!list_empty(&async_cow->extents)) { async_extent = list_entry(async_cow->extents.next, struct async_extent, list); list_del(&async_extent->list); @@ -562,8 +561,8 @@ static noinline int submit_compressed_extents(struct inode *inode, unsigned long nr_written = 0; lock_extent(io_tree, async_extent->start, - async_extent->start + async_extent->ram_size - 1, - GFP_NOFS); + async_extent->start + + async_extent->ram_size - 1, GFP_NOFS); /* allocate blocks */ cow_file_range(inode, async_cow->locked_page, @@ -581,7 +580,7 @@ static noinline int submit_compressed_extents(struct inode *inode, if (!page_started) extent_write_locked_range(io_tree, inode, async_extent->start, - async_extent->start + + async_extent->start + async_extent->ram_size - 1, btrfs_get_extent, WB_SYNC_ALL); @@ -618,7 +617,7 @@ static noinline int submit_compressed_extents(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); - while(1) { + while (1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); spin_unlock(&em_tree->lock); @@ -651,11 +650,11 @@ static noinline int submit_compressed_extents(struct inode *inode, NULL, 1, 1, 0, 1, 1, 0); ret = btrfs_submit_compressed_write(inode, - async_extent->start, - async_extent->ram_size, - ins.objectid, - ins.offset, async_extent->pages, - async_extent->nr_pages); + async_extent->start, + async_extent->ram_size, + ins.objectid, + ins.offset, async_extent->pages, + async_extent->nr_pages); BUG_ON(ret); trans = btrfs_join_transaction(root, 1); @@ -735,14 +734,13 @@ static noinline int cow_file_range(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); - while(disk_num_bytes > 0) { + while (disk_num_bytes > 0) { cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); - if (ret) { - BUG(); - } + BUG_ON(ret); + em = alloc_extent_map(GFP_NOFS); em->start = start; em->orig_start = em->start; @@ -755,7 +753,7 @@ static noinline int cow_file_range(struct inode *inode, em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); - while(1) { + while (1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); spin_unlock(&em_tree->lock); @@ -779,11 +777,9 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(ret); } - if (disk_num_bytes < cur_alloc_size) { - printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, - cur_alloc_size); + if (disk_num_bytes < cur_alloc_size) break; - } + /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits @@ -842,9 +838,8 @@ static noinline void async_cow_submit(struct btrfs_work *work) waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); - if (async_cow->inode) { + if (async_cow->inode) submit_compressed_extents(async_cow->inode, async_cow); - } } static noinline void async_cow_free(struct btrfs_work *work) @@ -871,7 +866,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC, 1, 0, GFP_NOFS); - while(start < end) { + while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); async_cow->inode = inode; async_cow->root = root; @@ -904,7 +899,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, limit)); } - while(atomic_read(&root->fs_info->async_submit_draining) && + while (atomic_read(&root->fs_info->async_submit_draining) && atomic_read(&root->fs_info->async_delalloc_pages)) { wait_event(root->fs_info->async_submit_wait, (atomic_read(&root->fs_info->async_delalloc_pages) == @@ -918,7 +913,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, return 0; } -static int noinline csum_exist_in_range(struct btrfs_root *root, +static noinline int csum_exist_in_range(struct btrfs_root *root, u64 bytenr, u64 num_bytes) { int ret; @@ -1146,13 +1141,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, if (btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started, 1, nr_written); + page_started, 1, nr_written); else if (btrfs_test_flag(inode, PREALLOC)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started, 0, nr_written); + page_started, 0, nr_written); else ret = cow_file_range_async(inode, locked_page, start, end, - page_started, nr_written); + page_started, nr_written); return ret; } @@ -1200,8 +1195,11 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, spin_lock(&root->fs_info->delalloc_lock); if (end - start + 1 > root->fs_info->delalloc_bytes) { - printk("warning: delalloc account %Lu %Lu\n", - end - start + 1, root->fs_info->delalloc_bytes); + printk(KERN_INFO "btrfs warning: delalloc account " + "%llu %llu\n", + (unsigned long long)end - start + 1, + (unsigned long long) + root->fs_info->delalloc_bytes); root->fs_info->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0; } else { @@ -1241,9 +1239,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, ret = btrfs_map_block(map_tree, READ, logical, &map_length, NULL, 0); - if (map_length < length + size) { + if (map_length < length + size) return 1; - } return 0; } @@ -1255,8 +1252,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int __btrfs_submit_bio_start(struct inode *inode, int rw, + struct bio *bio, int mirror_num, + unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; @@ -1341,9 +1339,8 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) { - if ((end & (PAGE_CACHE_SIZE - 1)) == 0) { + if ((end & (PAGE_CACHE_SIZE - 1)) == 0) WARN_ON(1); - } return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); } @@ -1755,14 +1752,14 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, } local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); - if (ret) { + if (ret) goto zeroit; - } + csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); btrfs_csum_final(csum, (char *)&csum); - if (csum != private) { + if (csum != private) goto zeroit; - } + kunmap_atomic(kaddr, KM_IRQ0); local_irq_restore(flags); good: @@ -1773,9 +1770,10 @@ good: return 0; zeroit: - printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n", - page->mapping->host->i_ino, (unsigned long long)start, csum, - private); + printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " + "private %llu\n", page->mapping->host->i_ino, + (unsigned long long)start, csum, + (unsigned long long)private); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); @@ -2097,9 +2095,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, /* * copy everything in the in-memory inode into the btree. */ -int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) +noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) { struct btrfs_inode_item *inode_item; struct btrfs_path *path; @@ -2174,7 +2171,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, inode->i_ino, dir->i_ino, &index); if (ret) { - printk("failed to delete reference to %.*s, " + printk(KERN_INFO "btrfs failed to delete reference to %.*s, " "inode %lu parent %lu\n", name_len, name, inode->i_ino, dir->i_ino); goto err; @@ -2280,9 +2277,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - if (!err) { + if (!err) btrfs_i_size_write(inode, 0); - } fail_trans: nr = trans->blocks_used; @@ -2516,9 +2512,9 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, search_again: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { + if (ret < 0) goto error; - } + if (ret > 0) { /* there are no items in the tree for us to truncate, we're * done @@ -2530,7 +2526,7 @@ search_again: path->slots[0]--; } - while(1) { + while (1) { fi = NULL; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); @@ -2562,19 +2558,18 @@ search_again: item_end--; } if (item_end < new_size) { - if (found_type == BTRFS_DIR_ITEM_KEY) { + if (found_type == BTRFS_DIR_ITEM_KEY) found_type = BTRFS_INODE_ITEM_KEY; - } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { + else if (found_type == BTRFS_EXTENT_ITEM_KEY) found_type = BTRFS_EXTENT_DATA_KEY; - } else if (found_type == BTRFS_EXTENT_DATA_KEY) { + else if (found_type == BTRFS_EXTENT_DATA_KEY) found_type = BTRFS_XATTR_ITEM_KEY; - } else if (found_type == BTRFS_XATTR_ITEM_KEY) { + else if (found_type == BTRFS_XATTR_ITEM_KEY) found_type = BTRFS_INODE_REF_KEY; - } else if (found_type) { + else if (found_type) found_type--; - } else { + else break; - } btrfs_set_key_type(&key, found_type); goto next; } @@ -2656,7 +2651,7 @@ delete: pending_del_nr++; pending_del_slot = path->slots[0]; } else { - printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot); + BUG(); } } else { break; @@ -2938,9 +2933,10 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, namelen, 0); if (IS_ERR(di)) ret = PTR_ERR(di); - if (!di || IS_ERR(di)) { + + if (!di || IS_ERR(di)) goto out_err; - } + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_free_path(path); @@ -3020,8 +3016,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; - return (args->ino == inode->i_ino && - args->root == BTRFS_I(inode)->root); + return args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root; } struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, @@ -3085,7 +3081,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { - struct inode * inode; + struct inode *inode; struct btrfs_inode *bi = BTRFS_I(dir); struct btrfs_root *root = bi->root; struct btrfs_root *sub_root = root; @@ -3385,9 +3381,8 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) if (BTRFS_I(dir)->index_cnt == (u64)-1) { ret = btrfs_set_inode_index_count(dir); - if (ret) { + if (ret) return ret; - } } *index = BTRFS_I(dir)->index_cnt; @@ -3879,12 +3874,13 @@ static noinline int uncompress_inline(struct btrfs_path *path, /* * a bit scary, this does extent mapping from logical file offset to the disk. - * the ugly parts come from merging extents from the disk with the - * in-ram representation. This gets more complex because of the data=ordered code, + * the ugly parts come from merging extents from the disk with the in-ram + * representation. This gets more complex because of the data=ordered code, * where the in-ram extents might be locked pending data=ordered completion. * * This also copies inline extents directly into the page. */ + struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -4081,7 +4077,7 @@ again: extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { - printk("unkknown found_type %d\n", found_type); + printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); WARN_ON(1); } not_found: @@ -4093,7 +4089,11 @@ not_found_em: insert: btrfs_release_path(root, path); if (em->start > start || extent_map_end(em) <= start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); + printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " + "[%llu %llu]\n", (unsigned long long)em->start, + (unsigned long long)em->len, + (unsigned long long)start, + (unsigned long long)len); err = -EIO; goto out; } @@ -4130,8 +4130,6 @@ insert: } } else { err = -EIO; - printk("failing to insert %Lu %Lu\n", - start, len); free_extent_map(em); em = NULL; } @@ -4147,9 +4145,8 @@ out: btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); - if (!err) { + if (!err) err = ret; - } } if (err) { free_extent_map(em); @@ -4482,13 +4479,15 @@ void btrfs_destroy_inode(struct inode *inode) } spin_unlock(&BTRFS_I(inode)->root->list_lock); - while(1) { + while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (!ordered) break; else { - printk("found ordered extent %Lu %Lu\n", - ordered->file_offset, ordered->len); + printk(KERN_ERR "btrfs found ordered " + "extent %llu %llu on inode cleanup\n", + (unsigned long long)ordered->file_offset, + (unsigned long long)ordered->len); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); @@ -4572,8 +4571,8 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } -static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) +static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(old_dir)->root; @@ -4663,7 +4662,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) return -EROFS; spin_lock(&root->fs_info->delalloc_lock); - while(!list_empty(head)) { + while (!list_empty(head)) { binode = list_entry(head->next, struct btrfs_inode, delalloc_inodes); inode = igrab(&binode->vfs_inode); @@ -4684,7 +4683,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) * ordered extents get created before we return */ atomic_inc(&root->fs_info->async_submit_draining); - while(atomic_read(&root->fs_info->nr_async_submits) || + while (atomic_read(&root->fs_info->nr_async_submits) || atomic_read(&root->fs_info->async_delalloc_pages)) { wait_event(root->fs_info->async_submit_wait, (atomic_read(&root->fs_info->nr_async_submits) == 0 && diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ba484aac1b9..c2aa33e3feb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -311,7 +311,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, * to see if is references the subvolume where we are * placing this new snapshot. */ - while(1) { + while (1) { if (!test || dir == snap_src->fs_info->sb->s_root || test == snap_src->fs_info->sb->s_root || @@ -319,7 +319,8 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, break; } if (S_ISLNK(test->d_inode->i_mode)) { - printk("Symlink in snapshot path, failed\n"); + printk(KERN_INFO "Btrfs symlink in snapshot " + "path, failed\n"); error = -EMLINK; btrfs_free_path(path); goto out_drop_write; @@ -329,7 +330,8 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, path, test_oid, parent_oid); if (ret == 0) { - printk("Snapshot creation failed, looping\n"); + printk(KERN_INFO "Btrfs snapshot creation " + "failed, looping\n"); error = -EMLINK; btrfs_free_path(path); goto out_drop_write; @@ -617,7 +619,8 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, src_inode = src_file->f_path.dentry->d_inode; if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { - printk("btrfs: Snapshot src from another FS\n"); + printk(KERN_INFO "btrfs: Snapshot src from " + "another FS\n"); ret = -EINVAL; fput(src_file); goto out; @@ -810,9 +813,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, ((off + len) & (bs-1))) goto out_unlock; - printk("final src extent is %llu~%llu\n", off, len); - printk("final dst extent is %llu~%llu\n", destoff, len); - /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { @@ -883,10 +883,13 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); if (type == BTRFS_FILE_EXTENT_REG) { - disko = btrfs_file_extent_disk_bytenr(leaf, extent); - diskl = btrfs_file_extent_disk_num_bytes(leaf, extent); + disko = btrfs_file_extent_disk_bytenr(leaf, + extent); + diskl = btrfs_file_extent_disk_num_bytes(leaf, + extent); datao = btrfs_file_extent_offset(leaf, extent); - datal = btrfs_file_extent_num_bytes(leaf, extent); + datal = btrfs_file_extent_num_bytes(leaf, + extent); } else if (type == BTRFS_FILE_EXTENT_INLINE) { /* take upper bound, may be compressed */ datal = btrfs_file_extent_ram_bytes(leaf, @@ -916,8 +919,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - printk(" orig disk %llu~%llu data %llu~%llu\n", - disko, diskl, datao, datal); if (off > key.offset) { datao += off - key.offset; @@ -929,8 +930,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, /* disko == 0 means it's a hole */ if (!disko) datao = 0; - printk(" final disk %llu~%llu data %llu~%llu\n", - disko, diskl, datao, datal); btrfs_set_file_extent_offset(leaf, extent, datao); @@ -952,12 +951,11 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, skip = off - key.offset; new_key.offset += skip; } + if (key.offset + datal > off+len) trim = key.offset + datal - (off+len); - printk("len %lld skip %lld trim %lld\n", - datal, skip, trim); + if (comp && (skip || trim)) { - printk("btrfs clone_range can't split compressed inline extents yet\n"); ret = -EINVAL; goto out; } @@ -969,7 +967,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out; if (skip) { - u32 start = btrfs_file_extent_calc_inline_size(0); + u32 start = + btrfs_file_extent_calc_inline_size(0); memmove(buf+start, buf+start+skip, datal); } @@ -985,7 +984,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_mark_buffer_dirty(leaf); } - next: +next: btrfs_release_path(root, path); key.offset++; } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index e30aa6e2958..39bae7761db 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -31,9 +31,10 @@ * difference in almost every workload, but spinning for the right amount of * time needs some help. * - * In general, we want to spin as long as the lock holder is doing btree searches, - * and we should give up if they are in more expensive code. + * In general, we want to spin as long as the lock holder is doing btree + * searches, and we should give up if they are in more expensive code. */ + int btrfs_tree_lock(struct extent_buffer *eb) { int i; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d9e232227da..a2094017027 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -39,11 +39,11 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct btrfs_ordered_extent *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); @@ -67,13 +67,13 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, struct rb_node **prev_ret) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *test; struct btrfs_ordered_extent *entry; struct btrfs_ordered_extent *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; @@ -88,7 +88,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, if (!prev_ret) return NULL; - while(prev && file_offset >= entry_end(prev_entry)) { + while (prev && file_offset >= entry_end(prev_entry)) { test = rb_next(prev); if (!test) break; @@ -102,7 +102,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, if (prev) prev_entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); - while(prev && file_offset < entry_end(prev_entry)) { + while (prev && file_offset < entry_end(prev_entry)) { test = rb_prev(prev); if (!test) break; @@ -193,10 +193,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, node = tree_insert(&tree->tree, file_offset, &entry->rb_node); - if (node) { - printk("warning dup entry from add_ordered_extent\n"); - BUG(); - } + BUG_ON(node); + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, entry_end(entry) - 1, GFP_NOFS); @@ -282,7 +280,7 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) struct btrfs_ordered_sum *sum; if (atomic_dec_and_test(&entry->refs)) { - while(!list_empty(&entry->list)) { + while (!list_empty(&entry->list)) { cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); list_del(&sum->list); @@ -432,11 +430,10 @@ again: orig_end >> PAGE_CACHE_SHIFT); end = orig_end; - while(1) { + while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); - if (!ordered) { + if (!ordered) break; - } if (ordered->file_offset > orig_end) { btrfs_put_ordered_extent(ordered); break; @@ -492,7 +489,7 @@ out: * if none is found */ struct btrfs_ordered_extent * -btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -553,7 +550,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * yet */ node = &ordered->rb_node; - while(1) { + while (1) { node = rb_prev(node); if (!node) break; @@ -581,9 +578,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, * between our ordered extent and the next one. */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (test->file_offset > entry_end(ordered)) { + if (test->file_offset > entry_end(ordered)) i_size_test = test->file_offset; - } } else { i_size_test = i_size_read(inode); } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 64725c13aa1..5f8f218c100 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -24,13 +24,14 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) { int num_stripes = btrfs_chunk_num_stripes(eb, chunk); int i; - printk("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n", + printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " + "num_stripes %d\n", (unsigned long long)btrfs_chunk_length(eb, chunk), (unsigned long long)btrfs_chunk_owner(eb, chunk), (unsigned long long)btrfs_chunk_type(eb, chunk), num_stripes); for (i = 0 ; i < num_stripes ; i++) { - printk("\t\t\tstripe %d devid %llu offset %llu\n", i, + printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); } @@ -38,8 +39,8 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) static void print_dev_item(struct extent_buffer *eb, struct btrfs_dev_item *dev_item) { - printk("\t\tdev item devid %llu " - "total_bytes %llu bytes used %Lu\n", + printk(KERN_INFO "\t\tdev item devid %llu " + "total_bytes %llu bytes used %llu\n", (unsigned long long)btrfs_device_id(eb, dev_item), (unsigned long long)btrfs_device_total_bytes(eb, dev_item), (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); @@ -61,14 +62,15 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_dev_extent *dev_extent; u32 type; - printk("leaf %llu total ptrs %d free space %d\n", + printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); - printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", + printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " + "itemsize %d\n", i, (unsigned long long)key.objectid, type, (unsigned long long)key.offset, @@ -76,33 +78,36 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(l, ii), + printk(KERN_INFO "\t\tinode generation %llu size %llu " + "mode %o\n", + (unsigned long long) + btrfs_inode_generation(l, ii), (unsigned long long)btrfs_inode_size(l, ii), btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); - printk("\t\tdir oid %llu type %u\n", + printk(KERN_INFO "\t\tdir oid %llu type %u\n", (unsigned long long)found_key.objectid, btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data bytenr %llu refs %u\n", - (unsigned long long)btrfs_disk_root_bytenr(l, ri), + printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", + (unsigned long long) + btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printk("\t\textent data refs %u\n", + printk(KERN_INFO "\t\textent data refs %u\n", btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_REF_KEY: ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); - printk("\t\textent back ref root %llu gen %llu " - "owner %llu num_refs %lu\n", + printk(KERN_INFO "\t\textent back ref root %llu " + "gen %llu owner %llu num_refs %lu\n", (unsigned long long)btrfs_ref_root(l, ref), (unsigned long long)btrfs_ref_generation(l, ref), (unsigned long long)btrfs_ref_objectid(l, ref), @@ -114,26 +119,36 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_file_extent_item); if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { - printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l, fi)); + printk(KERN_INFO "\t\tinline extent data " + "size %u\n", + btrfs_file_extent_inline_len(l, fi)); break; } - printk("\t\textent data disk bytenr %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), - (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); - printk("\t\textent data offset %llu nr %llu ram %llu\n", - (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_bytes(l, fi), - (unsigned long long)btrfs_file_extent_ram_bytes(l, fi)); + printk(KERN_INFO "\t\textent data disk bytenr %llu " + "nr %llu\n", + (unsigned long long) + btrfs_file_extent_disk_bytenr(l, fi), + (unsigned long long) + btrfs_file_extent_disk_num_bytes(l, fi)); + printk(KERN_INFO "\t\textent data offset %llu " + "nr %llu ram %llu\n", + (unsigned long long) + btrfs_file_extent_offset(l, fi), + (unsigned long long) + btrfs_file_extent_num_bytes(l, fi), + (unsigned long long) + btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); - printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_disk_block_group_used(l, bi)); + printk(KERN_INFO "\t\tblock group used %llu\n", + (unsigned long long) + btrfs_disk_block_group_used(l, bi)); break; case BTRFS_CHUNK_ITEM_KEY: - print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk)); + print_chunk(l, btrfs_item_ptr(l, i, + struct btrfs_chunk)); break; case BTRFS_DEV_ITEM_KEY: print_dev_item(l, btrfs_item_ptr(l, i, @@ -142,7 +157,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_EXTENT_KEY: dev_extent = btrfs_item_ptr(l, i, struct btrfs_dev_extent); - printk("\t\tdev extent chunk_tree %llu\n" + printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" "\t\tchunk objectid %llu chunk offset %llu " "length %llu\n", (unsigned long long) @@ -171,13 +186,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) btrfs_print_leaf(root, c); return; } - printk("node %llu level %d total ptrs %d free spc %u\n", + printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", (unsigned long long)btrfs_header_bytenr(c), btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - printk("\tkey %d (%llu %u %llu) block %llu\n", + printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", i, (unsigned long long)key.objectid, key.type, diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index a50ebb67055..6f0acc4c9ea 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -74,11 +74,11 @@ void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct btrfs_leaf_ref *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); @@ -98,10 +98,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct btrfs_leaf_ref *entry; - while(n) { + while (n) { entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); WARN_ON(!entry->in_tree); @@ -127,7 +127,7 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, return 0; spin_lock(&tree->lock); - while(!list_empty(&tree->list)) { + while (!list_empty(&tree->list)) { ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); BUG_ON(ref->tree != tree); if (ref->root_gen > max_root_gen) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index f99335a999d..b48650de447 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -132,8 +132,9 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("unable to update root key %Lu %u %Lu\n", - key->objectid, key->type, key->offset); + printk(KERN_CRIT "unable to update root key %llu %u %llu\n", + (unsigned long long)key->objectid, key->type, + (unsigned long long)key->offset); BUG_ON(1); } @@ -159,9 +160,9 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root /* * at mount time we want to find all the old transaction snapshots that were in - * the process of being deleted if we crashed. This is any root item with an offset - * lower than the latest root. They need to be queued for deletion to finish - * what was happening when we crashed. + * the process of being deleted if we crashed. This is any root item with an + * offset lower than the latest root. They need to be queued for deletion to + * finish what was happening when we crashed. */ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest) @@ -188,7 +189,7 @@ again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - while(1) { + while (1) { leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; @@ -258,11 +259,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; - if (ret) { -btrfs_print_leaf(root, path->nodes[0]); -printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); - } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index 8d7f568009c..c0f7ecaf1e7 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -66,7 +66,7 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ unsigned long map_len; \ u##bits res; \ err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ + sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ if (err) { \ @@ -103,7 +103,7 @@ void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_start; \ unsigned long map_len; \ err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ + sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ if (err) { \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ccdcb7bb7ad..b4c101d9322 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -55,18 +55,12 @@ static struct super_operations btrfs_super_ops; -static void btrfs_put_super (struct super_block * sb) +static void btrfs_put_super(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); int ret; ret = close_ctree(root); - if (ret) { - printk("close ctree returns %d\n", ret); - } -#if 0 - btrfs_sysfs_del_super(root->fs_info); -#endif sb->s_fs_info = NULL; } @@ -299,12 +293,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, return error; } -static int btrfs_fill_super(struct super_block * sb, +static int btrfs_fill_super(struct super_block *sb, struct btrfs_fs_devices *fs_devices, - void * data, int silent) + void *data, int silent) { - struct inode * inode; - struct dentry * root_dentry; + struct inode *inode; + struct dentry *root_dentry; struct btrfs_super_block *disk_super; struct btrfs_root *tree_root; struct btrfs_inode *bi; @@ -479,8 +473,10 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, root = dget(s->s_root); else { mutex_lock(&s->s_root->d_inode->i_mutex); - root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + root = lookup_one_len(subvol_name, s->s_root, + strlen(subvol_name)); mutex_unlock(&s->s_root->d_inode->i_mutex); + if (IS_ERR(root)) { up_write(&s->s_umount); deactivate_super(s); @@ -557,8 +553,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; + /* We treat it as constant endianness (it doesn't matter _which_) - because we want the fsid to come out the same whether mounted + because we want the fsid to come out the same whether mounted on a big-endian or little-endian host */ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); @@ -658,7 +655,7 @@ static int btrfs_interface_init(void) static void btrfs_interface_exit(void) { if (misc_deregister(&btrfs_misc) < 0) - printk("misc_deregister failed for control device"); + printk(KERN_INFO "misc_deregister failed for control device"); } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 04087c02084..a240b6fa81d 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -67,7 +67,8 @@ struct btrfs_root_attr { }; #define ROOT_ATTR(name, mode, show, store) \ -static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) +static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \ + show, store) ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); @@ -86,7 +87,8 @@ struct btrfs_super_attr { }; #define SUPER_ATTR(name, mode, show, store) \ -static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) +static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \ + show, store) SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e7b56e9d3a..56ab1f5ea11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -28,9 +28,6 @@ #include "ref-cache.h" #include "tree-log.h" -extern struct kmem_cache *btrfs_trans_handle_cachep; -extern struct kmem_cache *btrfs_transaction_cachep; - #define BTRFS_ROOT_TRANS_TAG 0 static noinline void put_transaction(struct btrfs_transaction *transaction) @@ -85,10 +82,10 @@ static noinline int join_transaction(struct btrfs_root *root) } /* - * this does all the record keeping required to make sure that a - * reference counted root is properly recorded in a given transaction. - * This is required to make sure the old root from before we joined the transaction - * is deleted when the transaction commits + * this does all the record keeping required to make sure that a reference + * counted root is properly recorded in a given transaction. This is required + * to make sure the old root from before we joined the transaction is deleted + * when the transaction commits */ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { @@ -144,7 +141,7 @@ static void wait_current_trans(struct btrfs_root *root) if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); cur_trans->use_count++; - while(1) { + while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); if (cur_trans->blocked) { @@ -213,7 +210,7 @@ static noinline int wait_for_commit(struct btrfs_root *root, { DEFINE_WAIT(wait); mutex_lock(&root->fs_info->trans_mutex); - while(!commit->commit_done) { + while (!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); if (commit->commit_done) @@ -228,8 +225,8 @@ static noinline int wait_for_commit(struct btrfs_root *root, } /* - * rate limit against the drop_snapshot code. This helps to slow down new operations - * if the drop_snapshot code isn't able to keep up. + * rate limit against the drop_snapshot code. This helps to slow down new + * operations if the drop_snapshot code isn't able to keep up. */ static void throttle_on_drops(struct btrfs_root *root) { @@ -332,12 +329,12 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, u64 end; unsigned long index; - while(1) { + while (1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); if (ret) break; - while(start <= end) { + while (start <= end) { cond_resched(); index = start >> PAGE_CACHE_SHIFT; @@ -368,14 +365,14 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, page_cache_release(page); } } - while(1) { + while (1) { ret = find_first_extent_bit(dirty_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) break; clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); - while(start <= end) { + while (start <= end) { index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_get_page(btree_inode->i_mapping, index); @@ -431,7 +428,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, root); btrfs_extent_post_op(trans, root); - while(1) { + while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start) break; @@ -472,7 +469,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_extent_post_op(trans, fs_info->tree_root); - while(!list_empty(&fs_info->dirty_cowonly_roots)) { + while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); @@ -521,7 +518,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, int err = 0; u32 refs; - while(1) { + while (1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), BTRFS_ROOT_TRANS_TAG); @@ -653,7 +650,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, int ret = 0; int err; - while(!list_empty(list)) { + while (!list_empty(list)) { struct btrfs_root *root; dirty = list_entry(list->prev, struct btrfs_dirty_root, list); @@ -663,13 +660,12 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, root = dirty->latest_root; atomic_inc(&root->fs_info->throttles); - while(1) { + while (1) { trans = btrfs_start_transaction(tree_root, 1); mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, dirty->root); - if (ret != -EAGAIN) { + if (ret != -EAGAIN) break; - } mutex_unlock(&root->fs_info->drop_mutex); err = btrfs_update_root(trans, @@ -874,7 +870,7 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, struct list_head *head = &trans->transaction->pending_snapshots; int ret; - while(!list_empty(head)) { + while (!list_empty(head)) { pending = list_entry(head->next, struct btrfs_pending_snapshot, list); ret = finish_pending_snapshot(fs_info, pending); @@ -1076,9 +1072,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (root->fs_info->closing) { + if (root->fs_info->closing) drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); - } return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ffe7f639732..ea292117f88 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -66,9 +66,9 @@ static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, trans->block_group = BTRFS_I(inode)->block_group; } -static inline void btrfs_update_inode_block_group(struct - btrfs_trans_handle *trans, - struct inode *inode) +static inline void btrfs_update_inode_block_group( + struct btrfs_trans_handle *trans, + struct inode *inode) { BTRFS_I(inode)->block_group = trans->block_group; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a6a3956cedf..3e8358c3616 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,10 +23,11 @@ #include "transaction.h" #include "locking.h" -/* defrag all the leaves in a given btree. If cache_only == 1, don't read things - * from disk, otherwise read all the leaves and try to get key order to +/* defrag all the leaves in a given btree. If cache_only == 1, don't read + * things from disk, otherwise read all the leaves and try to get key order to * better reflect disk order */ + int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { @@ -65,9 +66,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, level = btrfs_header_level(root->node); orig_level = level; - if (level == 0) { + if (level == 0) goto out; - } + if (root->defrag_progress.objectid == 0) { struct extent_buffer *root_node; u32 nritems; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b1c2921f5be..3a72a1b6c24 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -829,7 +829,7 @@ conflict_again: */ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); - while(ptr < ptr_end) { + while (ptr < ptr_end) { victim_ref = (struct btrfs_inode_ref *)ptr; victim_name_len = btrfs_inode_ref_name_len(leaf, victim_ref); @@ -938,9 +938,8 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans, file_bytes = (item_size / csum_size) * root->sectorsize; sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); - if (!sums) { + if (!sums) return -ENOMEM; - } INIT_LIST_HEAD(&sums->list); sums->len = file_bytes; @@ -952,7 +951,7 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans, sector_sum = sums->sums; cur_offset = key->offset; ptr = btrfs_item_ptr_offset(eb, slot); - while(item_size > 0) { + while (item_size > 0) { sector_sum->bytenr = cur_offset; read_extent_buffer(eb, §or_sum->sum, ptr, csum_size); sector_sum++; @@ -995,7 +994,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); - while(1) { + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) break; @@ -1012,7 +1011,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], path->slots[0]); - while(ptr < ptr_end) { + while (ptr < ptr_end) { struct btrfs_inode_ref *ref; ref = (struct btrfs_inode_ref *)ptr; @@ -1048,7 +1047,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = (u64)-1; - while(1) { + while (1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) break; @@ -1206,8 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, name, name_len, 1); - } - else if (key->type == BTRFS_DIR_INDEX_KEY) { + } else if (key->type == BTRFS_DIR_INDEX_KEY) { dst_di = btrfs_lookup_dir_index_item(trans, root, path, key->objectid, key->offset, name, @@ -1282,7 +1280,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(eb, slot); ptr_end = ptr + item_size; - while(ptr < ptr_end) { + while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; name_len = btrfs_dir_name_len(eb, di); ret = replay_one_name(trans, root, path, eb, di, key); @@ -1408,7 +1406,7 @@ again: item_size = btrfs_item_size_nr(eb, slot); ptr = btrfs_item_ptr_offset(eb, slot); ptr_end = ptr + item_size; - while(ptr < ptr_end) { + while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); @@ -1513,14 +1511,14 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, again: range_start = 0; range_end = 0; - while(1) { + while (1) { ret = find_dir_range(log, path, dirid, key_type, &range_start, &range_end); if (ret != 0) break; dir_key.offset = range_start; - while(1) { + while (1) { int nritems; ret = btrfs_search_slot(NULL, root, &dir_key, path, 0, 0); @@ -1676,7 +1674,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, return 0; } -static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, struct walk_control *wc) @@ -1694,7 +1692,7 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - while(*level > 0) { + while (*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; @@ -1753,11 +1751,11 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - if (path->nodes[*level] == root->node) { + if (path->nodes[*level] == root->node) parent = path->nodes[*level]; - } else { + else parent = path->nodes[*level + 1]; - } + bytenr = path->nodes[*level]->start; blocksize = btrfs_level_size(root, *level); @@ -1790,7 +1788,7 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, return 0; } -static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, +static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, struct walk_control *wc) @@ -1801,7 +1799,7 @@ static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, int slot; int ret; - for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { struct extent_buffer *node; @@ -1875,7 +1873,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, extent_buffer_get(log->node); path->slots[level] = 0; - while(1) { + while (1) { wret = walk_down_log_tree(trans, log, path, &level, wc); if (wret > 0) break; @@ -1941,7 +1939,7 @@ static int wait_log_commit(struct btrfs_root *log) schedule(); finish_wait(&log->fs_info->tree_log_wait, &wait); mutex_lock(&log->fs_info->tree_log_mutex); - } while(transid == log->fs_info->tree_log_transid && + } while (transid == log->fs_info->tree_log_transid && atomic_read(&log->fs_info->tree_log_commit)); return 0; } @@ -1965,13 +1963,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } atomic_set(&log->fs_info->tree_log_commit, 1); - while(1) { + while (1) { batch = log->fs_info->tree_log_batch; mutex_unlock(&log->fs_info->tree_log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&log->fs_info->tree_log_mutex); - while(atomic_read(&log->fs_info->tree_log_writers)) { + while (atomic_read(&log->fs_info->tree_log_writers)) { DEFINE_WAIT(wait); prepare_to_wait(&log->fs_info->tree_log_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -2030,7 +2028,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); - while(1) { + while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -2287,9 +2285,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, struct btrfs_key tmp; btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); - if (key_type == tmp.type) { + if (key_type == tmp.type) first_offset = max(min_offset, tmp.offset) + 1; - } } goto done; } @@ -2319,7 +2316,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * we have a block from this transaction, log every item in it * from our directory */ - while(1) { + while (1) { struct btrfs_key tmp; src = path->nodes[0]; nritems = btrfs_header_nritems(src); @@ -2396,7 +2393,7 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, again: min_key = 0; max_key = 0; - while(1) { + while (1) { ret = log_dir_items(trans, root, inode, path, dst_path, key_type, min_key, &max_key); @@ -2432,7 +2429,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, key.type = max_key_type; key.offset = (u64)-1; - while(1) { + while (1) { ret = btrfs_search_slot(trans, log, &key, path, -1, 1); if (ret != 1) @@ -2481,7 +2478,7 @@ static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, list_add_tail(&sums->list, list); path = btrfs_alloc_path(); - while(disk_bytenr < end) { + while (disk_bytenr < end) { if (!item || disk_bytenr < item_start_offset || disk_bytenr >= item_last_offset) { struct btrfs_key found_key; @@ -2496,7 +2493,8 @@ static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, if (ret == -ENOENT || ret == -EFBIG) ret = 0; sum = 0; - printk("log no csum found for byte %llu\n", + printk(KERN_INFO "log no csum found for " + "byte %llu\n", (unsigned long long)disk_bytenr); item = NULL; btrfs_release_path(root, path); @@ -2643,7 +2641,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - while(!list_empty(&ordered_sums)) { + while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, list); @@ -2736,7 +2734,7 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); path->keep_locks = 1; - while(1) { + while (1) { ins_nr = 0; ret = btrfs_search_forward(root, &min_key, &max_key, path, 0, trans->transid); @@ -2848,7 +2846,7 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans, start_log_trans(trans, root); sb = dentry->d_inode->i_sb; - while(1) { + while (1) { ret = __btrfs_log_inode(trans, root, dentry->d_inode, inode_only); BUG_ON(ret); @@ -2919,7 +2917,7 @@ again: key.offset = (u64)-1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - while(1) { + while (1) { ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); if (ret < 0) break; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6672adcec9f..b187b537888 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -140,7 +140,7 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) * the list if the block device is congested. This way, multiple devices * can make progress from a single worker thread. */ -static int noinline run_scheduled_bios(struct btrfs_device *device) +static noinline int run_scheduled_bios(struct btrfs_device *device) { struct bio *pending; struct backing_dev_info *bdi; @@ -187,7 +187,7 @@ loop: } spin_unlock(&device->io_lock); - while(pending) { + while (pending) { cur = pending; pending = pending->bi_next; cur->bi_next = NULL; @@ -458,7 +458,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, bdev = open_bdev_exclusive(device->name, flags, holder); if (IS_ERR(bdev)) { - printk("open %s failed\n", device->name); + printk(KERN_INFO "open %s failed\n", device->name); goto error; } set_blocksize(bdev, 4096); @@ -570,14 +570,15 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, devid = le64_to_cpu(disk_super->dev_item.devid); transid = btrfs_super_generation(disk_super); if (disk_super->label[0]) - printk("device label %s ", disk_super->label); + printk(KERN_INFO "device label %s ", disk_super->label); else { /* FIXME, make a readl uuid parser */ - printk("device fsid %llx-%llx ", + printk(KERN_INFO "device fsid %llx-%llx ", *(unsigned long long *)disk_super->fsid, *(unsigned long long *)(disk_super->fsid + 8)); } - printk("devid %Lu transid %Lu %s\n", devid, transid, path); + printk(KERN_INFO "devid %llu transid %llu %s\n", + (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); brelse(bh); @@ -683,9 +684,8 @@ no_more_items: goto check_pending; } } - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) goto next; - } start_found = 1; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); @@ -1001,14 +1001,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && root->fs_info->fs_devices->rw_devices <= 4) { - printk("btrfs: unable to go below four devices on raid10\n"); + printk(KERN_ERR "btrfs: unable to go below four devices " + "on raid10\n"); ret = -EINVAL; goto out; } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && root->fs_info->fs_devices->rw_devices <= 2) { - printk("btrfs: unable to go below two devices on raid1\n"); + printk(KERN_ERR "btrfs: unable to go below two " + "devices on raid1\n"); ret = -EINVAL; goto out; } @@ -1031,7 +1033,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) bh = NULL; disk_super = NULL; if (!device) { - printk("btrfs: no missing devices found to remove\n"); + printk(KERN_ERR "btrfs: no missing devices found to " + "remove\n"); goto out; } } else { @@ -1060,7 +1063,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { - printk("btrfs: unable to remove the only writeable device\n"); + printk(KERN_ERR "btrfs: unable to remove the only writeable " + "device\n"); ret = -EINVAL; goto error_brelse; } @@ -1286,9 +1290,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EINVAL; bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); - if (!bdev) { + if (!bdev) return -EIO; - } if (root->fs_info->fs_devices->seeding) { seeding_dev = 1; @@ -1401,8 +1404,8 @@ error: goto out; } -static int noinline btrfs_update_device(struct btrfs_trans_handle *trans, - struct btrfs_device *device) +static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) { int ret; struct btrfs_path *path; @@ -1563,7 +1566,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, int ret; int i; - printk("btrfs relocating chunk %llu\n", + printk(KERN_INFO "btrfs relocating chunk %llu\n", (unsigned long long)chunk_offset); root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; @@ -1748,7 +1751,7 @@ int btrfs_balance(struct btrfs_root *dev_root) key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; - while(1) { + while (1) { ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) goto error; @@ -1916,7 +1919,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; } -static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size, +static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, int sub_stripes) { if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) @@ -2041,7 +2044,7 @@ again: min_free += 1024 * 1024; INIT_LIST_HEAD(&private_devs); - while(index < num_stripes) { + while (index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); BUG_ON(!device->writeable); if (device->total_bytes > device->bytes_used) @@ -2242,7 +2245,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return 0; } -static int noinline init_first_rw_device(struct btrfs_trans_handle *trans, +static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device) { @@ -2338,7 +2341,7 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) { struct extent_map *em; - while(1) { + while (1) { spin_lock(&tree->map_tree.lock); em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); if (em) @@ -2413,9 +2416,8 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int max_errors = 0; struct btrfs_multi_bio *multi = NULL; - if (multi_ret && !(rw & (1 << BIO_RW))) { + if (multi_ret && !(rw & (1 << BIO_RW))) stripes_allocated = 1; - } again: if (multi_ret) { multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), @@ -2434,7 +2436,9 @@ again: return 0; if (!em) { - printk("unable to find logical %Lu len %Lu\n", logical, *length); + printk(KERN_CRIT "unable to find logical %llu len %llu\n", + (unsigned long long)logical, + (unsigned long long)*length); BUG(); } @@ -2541,9 +2545,8 @@ again: device = map->stripes[stripe_index].dev; if (device->bdev) { bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { + if (bdi->unplug_io_fn) bdi->unplug_io_fn(bdi, unplug_page); - } } } else { multi->stripes[i].physical = @@ -2717,7 +2720,7 @@ struct async_sched { * This will add one bio to the pending list for a device and make sure * the work struct is scheduled. */ -static int noinline schedule_bio(struct btrfs_root *root, +static noinline int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, int rw, struct bio *bio) { @@ -2785,8 +2788,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, total_devs = multi->num_stripes; if (map_length < length) { - printk("mapping failed logical %Lu bio len %Lu " - "len %Lu\n", logical, length, map_length); + printk(KERN_CRIT "mapping failed logical %llu bio len %llu " + "len %llu\n", (unsigned long long)logical, + (unsigned long long)length, + (unsigned long long)map_length); BUG(); } multi->end_io = first_bio->bi_end_io; @@ -2794,7 +2799,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, multi->orig_bio = first_bio; atomic_set(&multi->stripes_pending, multi->num_stripes); - while(dev_nr < total_devs) { + while (dev_nr < total_devs) { if (total_devs > 1) { if (dev_nr < total_devs - 1) { bio = bio_clone(first_bio, GFP_NOFS); @@ -3058,7 +3063,8 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - printk("warning devid %Lu missing\n", devid); + printk(KERN_WARNING "warning devid %llu missing\n", + (unsigned long long)devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; @@ -3078,12 +3084,6 @@ static int read_one_dev(struct btrfs_root *root, if (device->writeable) device->fs_devices->total_rw_bytes += device->total_bytes; ret = 0; -#if 0 - ret = btrfs_open_device(device); - if (ret) { - kfree(device); - } -#endif return ret; } @@ -3174,7 +3174,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) key.type = 0; again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - while(1) { + while (1) { leaf = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 4146f0710e6..7f332e27089 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -264,7 +264,8 @@ struct xattr_handler *btrfs_xattr_handlers[] = { */ static bool btrfs_is_valid_xattr(const char *name) { - return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || + return !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) || !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index c4617cde6c7..ecfbce836d3 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -152,7 +152,7 @@ static int free_workspace(struct workspace *workspace) static void free_workspaces(void) { struct workspace *workspace; - while(!list_empty(&idle_workspace)) { + while (!list_empty(&idle_workspace)) { workspace = list_entry(idle_workspace.next, struct workspace, list); list_del(&workspace->list); @@ -397,12 +397,10 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, ret = -1; goto out; } - while(workspace->inf_strm.total_in < srclen) { + while (workspace->inf_strm.total_in < srclen) { ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) { + if (ret != Z_OK && ret != Z_STREAM_END) break; - } - /* * buf start is the byte offset we're of the start of * our workspace buffer @@ -424,16 +422,14 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, /* we didn't make progress in this inflate * call, we're done */ - if (ret != Z_STREAM_END) { + if (ret != Z_STREAM_END) ret = -1; - } break; } /* we haven't yet hit data corresponding to this page */ - if (total_out <= start_byte) { + if (total_out <= start_byte) goto next; - } /* * the start of the data we care about is offset into @@ -448,7 +444,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, current_buf_start = buf_start; /* copy bytes from the working buffer into the pages */ - while(working_bytes > 0) { + while (working_bytes > 0) { bytes = min(PAGE_CACHE_SIZE - pg_offset, PAGE_CACHE_SIZE - buf_offset); bytes = min(bytes, working_bytes); @@ -471,6 +467,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, ret = 0; goto done; } + page_out = bvec[page_out_index].bv_page; pg_offset = 0; page_bytes_left = PAGE_CACHE_SIZE; @@ -480,9 +477,8 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, * make sure our new page is covered by this * working buffer */ - if (total_out <= start_byte) { + if (total_out <= start_byte) goto next; - } /* the next page in the biovec might not * be adjacent to the last page, but it @@ -517,11 +513,10 @@ next: PAGE_CACHE_SIZE); } } - if (ret != Z_STREAM_END) { + if (ret != Z_STREAM_END) ret = -1; - } else { + else ret = 0; - } done: zlib_inflateEnd(&workspace->inf_strm); if (data_in) @@ -579,16 +574,15 @@ int btrfs_zlib_decompress(unsigned char *data_in, goto out; } - while(bytes_left > 0) { + while (bytes_left > 0) { unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; unsigned long pg_offset = 0; ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) { + if (ret != Z_OK && ret != Z_STREAM_END) break; - } buf_start = total_out; total_out = workspace->inf_strm.total_out; @@ -598,15 +592,13 @@ int btrfs_zlib_decompress(unsigned char *data_in, break; } - if (total_out <= start_byte) { + if (total_out <= start_byte) goto next; - } - if (total_out > start_byte && buf_start < start_byte) { + if (total_out > start_byte && buf_start < start_byte) buf_offset = start_byte - buf_start; - } else { + else buf_offset = 0; - } bytes = min(PAGE_CACHE_SIZE - pg_offset, PAGE_CACHE_SIZE - buf_offset); @@ -622,11 +614,12 @@ next: workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; } - if (ret != Z_STREAM_END && bytes_left != 0) { + + if (ret != Z_STREAM_END && bytes_left != 0) ret = -1; - } else { + else ret = 0; - } + zlib_inflateEnd(&workspace->inf_strm); out: free_workspace(workspace); -- cgit v1.2.3-70-g09d2